1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order 2# to ensure consistency between the C implementation and the Python 3# implementation. 4# 5# For this purpose, the module-level "ET" symbol is temporarily 6# monkey-patched when running the "test_xml_etree_c" test suite. 7 8import copy 9import functools 10import html 11import io 12import itertools 13import locale 14import operator 15import os 16import pickle 17import sys 18import textwrap 19import types 20import unittest 21import warnings 22import weakref 23 24from functools import partial 25from itertools import product, islice 26from test import support 27from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr 28 29# pyET is the pure-Python implementation. 30# 31# ET is pyET in test_xml_etree and is the C accelerated version in 32# test_xml_etree_c. 33pyET = None 34ET = None 35 36SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 37try: 38 SIMPLE_XMLFILE.encode("utf-8") 39except UnicodeEncodeError: 40 raise unittest.SkipTest("filename is not encodable to utf8") 41SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 42UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") 43 44SAMPLE_XML = """\ 45<body> 46 <tag class='a'>text</tag> 47 <tag class='b' /> 48 <section> 49 <tag class='b' id='inner'>subtext</tag> 50 </section> 51</body> 52""" 53 54SAMPLE_SECTION = """\ 55<section> 56 <tag class='b' id='inner'>subtext</tag> 57 <nexttag /> 58 <nextsection> 59 <tag /> 60 </nextsection> 61</section> 62""" 63 64SAMPLE_XML_NS = """ 65<body xmlns="http://effbot.org/ns"> 66 <tag>text</tag> 67 <tag /> 68 <section> 69 <tag>subtext</tag> 70 </section> 71</body> 72""" 73 74SAMPLE_XML_NS_ELEMS = """ 75<root> 76<h:table xmlns:h="hello"> 77 <h:tr> 78 <h:td>Apples</h:td> 79 <h:td>Bananas</h:td> 80 </h:tr> 81</h:table> 82 83<f:table xmlns:f="foo"> 84 <f:name>African Coffee Table</f:name> 85 <f:width>80</f:width> 86 <f:length>120</f:length> 87</f:table> 88</root> 89""" 90 91ENTITY_XML = """\ 92<!DOCTYPE points [ 93<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 94%user-entities; 95]> 96<document>&entity;</document> 97""" 98 99EXTERNAL_ENTITY_XML = """\ 100<!DOCTYPE points [ 101<!ENTITY entity SYSTEM "file:///non-existing-file.xml"> 102]> 103<document>&entity;</document> 104""" 105 106def checkwarnings(*filters, quiet=False): 107 def decorator(test): 108 def newtest(*args, **kwargs): 109 with support.check_warnings(*filters, quiet=quiet): 110 test(*args, **kwargs) 111 functools.update_wrapper(newtest, test) 112 return newtest 113 return decorator 114 115 116class ModuleTest(unittest.TestCase): 117 def test_sanity(self): 118 # Import sanity. 119 120 from xml.etree import ElementTree 121 from xml.etree import ElementInclude 122 from xml.etree import ElementPath 123 124 def test_all(self): 125 names = ("xml.etree.ElementTree", "_elementtree") 126 support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",)) 127 128 129def serialize(elem, to_string=True, encoding='unicode', **options): 130 if encoding != 'unicode': 131 file = io.BytesIO() 132 else: 133 file = io.StringIO() 134 tree = ET.ElementTree(elem) 135 tree.write(file, encoding=encoding, **options) 136 if to_string: 137 return file.getvalue() 138 else: 139 file.seek(0) 140 return file 141 142def summarize_list(seq): 143 return [elem.tag for elem in seq] 144 145 146class ElementTestCase: 147 @classmethod 148 def setUpClass(cls): 149 cls.modules = {pyET, ET} 150 151 def pickleRoundTrip(self, obj, name, dumper, loader, proto): 152 save_m = sys.modules[name] 153 try: 154 sys.modules[name] = dumper 155 temp = pickle.dumps(obj, proto) 156 sys.modules[name] = loader 157 result = pickle.loads(temp) 158 except pickle.PicklingError as pe: 159 # pyET must be second, because pyET may be (equal to) ET. 160 human = dict([(ET, "cET"), (pyET, "pyET")]) 161 raise support.TestFailed("Failed to round-trip %r from %r to %r" 162 % (obj, 163 human.get(dumper, dumper), 164 human.get(loader, loader))) from pe 165 finally: 166 sys.modules[name] = save_m 167 return result 168 169 def assertEqualElements(self, alice, bob): 170 self.assertIsInstance(alice, (ET.Element, pyET.Element)) 171 self.assertIsInstance(bob, (ET.Element, pyET.Element)) 172 self.assertEqual(len(list(alice)), len(list(bob))) 173 for x, y in zip(alice, bob): 174 self.assertEqualElements(x, y) 175 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') 176 self.assertEqual(properties(alice), properties(bob)) 177 178# -------------------------------------------------------------------- 179# element tree tests 180 181class ElementTreeTest(unittest.TestCase): 182 183 def serialize_check(self, elem, expected): 184 self.assertEqual(serialize(elem), expected) 185 186 def test_interface(self): 187 # Test element tree interface. 188 189 def check_string(string): 190 len(string) 191 for char in string: 192 self.assertEqual(len(char), 1, 193 msg="expected one-character string, got %r" % char) 194 new_string = string + "" 195 new_string = string + " " 196 string[:0] 197 198 def check_mapping(mapping): 199 len(mapping) 200 keys = mapping.keys() 201 items = mapping.items() 202 for key in keys: 203 item = mapping[key] 204 mapping["key"] = "value" 205 self.assertEqual(mapping["key"], "value", 206 msg="expected value string, got %r" % mapping["key"]) 207 208 def check_element(element): 209 self.assertTrue(ET.iselement(element), msg="not an element") 210 direlem = dir(element) 211 for attr in 'tag', 'attrib', 'text', 'tail': 212 self.assertTrue(hasattr(element, attr), 213 msg='no %s member' % attr) 214 self.assertIn(attr, direlem, 215 msg='no %s visible by dir' % attr) 216 217 check_string(element.tag) 218 check_mapping(element.attrib) 219 if element.text is not None: 220 check_string(element.text) 221 if element.tail is not None: 222 check_string(element.tail) 223 for elem in element: 224 check_element(elem) 225 226 element = ET.Element("tag") 227 check_element(element) 228 tree = ET.ElementTree(element) 229 check_element(tree.getroot()) 230 element = ET.Element("t\xe4g", key="value") 231 tree = ET.ElementTree(element) 232 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$") 233 element = ET.Element("tag", key="value") 234 235 # Make sure all standard element methods exist. 236 237 def check_method(method): 238 self.assertTrue(hasattr(method, '__call__'), 239 msg="%s not callable" % method) 240 241 check_method(element.append) 242 check_method(element.extend) 243 check_method(element.insert) 244 check_method(element.remove) 245 check_method(element.getchildren) 246 check_method(element.find) 247 check_method(element.iterfind) 248 check_method(element.findall) 249 check_method(element.findtext) 250 check_method(element.clear) 251 check_method(element.get) 252 check_method(element.set) 253 check_method(element.keys) 254 check_method(element.items) 255 check_method(element.iter) 256 check_method(element.itertext) 257 check_method(element.getiterator) 258 259 # These methods return an iterable. See bug 6472. 260 261 def check_iter(it): 262 check_method(it.__next__) 263 264 check_iter(element.iterfind("tag")) 265 check_iter(element.iterfind("*")) 266 check_iter(tree.iterfind("tag")) 267 check_iter(tree.iterfind("*")) 268 269 # These aliases are provided: 270 271 self.assertEqual(ET.XML, ET.fromstring) 272 self.assertEqual(ET.PI, ET.ProcessingInstruction) 273 274 def test_set_attribute(self): 275 element = ET.Element('tag') 276 277 self.assertEqual(element.tag, 'tag') 278 element.tag = 'Tag' 279 self.assertEqual(element.tag, 'Tag') 280 element.tag = 'TAG' 281 self.assertEqual(element.tag, 'TAG') 282 283 self.assertIsNone(element.text) 284 element.text = 'Text' 285 self.assertEqual(element.text, 'Text') 286 element.text = 'TEXT' 287 self.assertEqual(element.text, 'TEXT') 288 289 self.assertIsNone(element.tail) 290 element.tail = 'Tail' 291 self.assertEqual(element.tail, 'Tail') 292 element.tail = 'TAIL' 293 self.assertEqual(element.tail, 'TAIL') 294 295 self.assertEqual(element.attrib, {}) 296 element.attrib = {'a': 'b', 'c': 'd'} 297 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 298 element.attrib = {'A': 'B', 'C': 'D'} 299 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 300 301 def test_simpleops(self): 302 # Basic method sanity checks. 303 304 elem = ET.XML("<body><tag/></body>") 305 self.serialize_check(elem, '<body><tag /></body>') 306 e = ET.Element("tag2") 307 elem.append(e) 308 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 309 elem.remove(e) 310 self.serialize_check(elem, '<body><tag /></body>') 311 elem.insert(0, e) 312 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 313 elem.remove(e) 314 elem.extend([e]) 315 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 316 elem.remove(e) 317 318 element = ET.Element("tag", key="value") 319 self.serialize_check(element, '<tag key="value" />') # 1 320 subelement = ET.Element("subtag") 321 element.append(subelement) 322 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 323 element.insert(0, subelement) 324 self.serialize_check(element, 325 '<tag key="value"><subtag /><subtag /></tag>') # 3 326 element.remove(subelement) 327 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 328 element.remove(subelement) 329 self.serialize_check(element, '<tag key="value" />') # 5 330 with self.assertRaises(ValueError) as cm: 331 element.remove(subelement) 332 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 333 self.serialize_check(element, '<tag key="value" />') # 6 334 element[0:0] = [subelement, subelement, subelement] 335 self.serialize_check(element[1], '<subtag />') 336 self.assertEqual(element[1:9], [element[1], element[2]]) 337 self.assertEqual(element[:9:2], [element[0], element[2]]) 338 del element[1:2] 339 self.serialize_check(element, 340 '<tag key="value"><subtag /><subtag /></tag>') 341 342 def test_cdata(self): 343 # Test CDATA handling (etc). 344 345 self.serialize_check(ET.XML("<tag>hello</tag>"), 346 '<tag>hello</tag>') 347 self.serialize_check(ET.XML("<tag>hello</tag>"), 348 '<tag>hello</tag>') 349 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 350 '<tag>hello</tag>') 351 352 def test_file_init(self): 353 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) 354 tree = ET.ElementTree(file=stringfile) 355 self.assertEqual(tree.find("tag").tag, 'tag') 356 self.assertEqual(tree.find("section/tag").tag, 'tag') 357 358 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 359 self.assertEqual(tree.find("element").tag, 'element') 360 self.assertEqual(tree.find("element/../empty-element").tag, 361 'empty-element') 362 363 def test_path_cache(self): 364 # Check that the path cache behaves sanely. 365 366 from xml.etree import ElementPath 367 368 elem = ET.XML(SAMPLE_XML) 369 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 370 cache_len_10 = len(ElementPath._cache) 371 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 372 self.assertEqual(len(ElementPath._cache), cache_len_10) 373 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 374 self.assertGreater(len(ElementPath._cache), cache_len_10) 375 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 376 self.assertLess(len(ElementPath._cache), 500) 377 378 def test_copy(self): 379 # Test copy handling (etc). 380 381 import copy 382 e1 = ET.XML("<tag>hello<foo/></tag>") 383 e2 = copy.copy(e1) 384 e3 = copy.deepcopy(e1) 385 e1.find("foo").tag = "bar" 386 self.serialize_check(e1, '<tag>hello<bar /></tag>') 387 self.serialize_check(e2, '<tag>hello<bar /></tag>') 388 self.serialize_check(e3, '<tag>hello<foo /></tag>') 389 390 def test_attrib(self): 391 # Test attribute handling. 392 393 elem = ET.Element("tag") 394 elem.get("key") # 1.1 395 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 396 397 elem.set("key", "value") 398 self.assertEqual(elem.get("key"), 'value') # 1.3 399 400 elem = ET.Element("tag", key="value") 401 self.assertEqual(elem.get("key"), 'value') # 2.1 402 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 403 404 attrib = {"key": "value"} 405 elem = ET.Element("tag", attrib) 406 attrib.clear() # check for aliasing issues 407 self.assertEqual(elem.get("key"), 'value') # 3.1 408 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 409 410 attrib = {"key": "value"} 411 elem = ET.Element("tag", **attrib) 412 attrib.clear() # check for aliasing issues 413 self.assertEqual(elem.get("key"), 'value') # 4.1 414 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 415 416 elem = ET.Element("tag", {"key": "other"}, key="value") 417 self.assertEqual(elem.get("key"), 'value') # 5.1 418 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 419 420 elem = ET.Element('test') 421 elem.text = "aa" 422 elem.set('testa', 'testval') 423 elem.set('testb', 'test2') 424 self.assertEqual(ET.tostring(elem), 425 b'<test testa="testval" testb="test2">aa</test>') 426 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 427 self.assertEqual(sorted(elem.items()), 428 [('testa', 'testval'), ('testb', 'test2')]) 429 self.assertEqual(elem.attrib['testb'], 'test2') 430 elem.attrib['testb'] = 'test1' 431 elem.attrib['testc'] = 'test2' 432 self.assertEqual(ET.tostring(elem), 433 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 434 435 elem = ET.Element('test') 436 elem.set('a', '\r') 437 elem.set('b', '\r\n') 438 elem.set('c', '\t\n\r ') 439 elem.set('d', '\n\n') 440 self.assertEqual(ET.tostring(elem), 441 b'<test a=" " b=" " c="	 " d=" " />') 442 443 def test_makeelement(self): 444 # Test makeelement handling. 445 446 elem = ET.Element("tag") 447 attrib = {"key": "value"} 448 subelem = elem.makeelement("subtag", attrib) 449 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 450 elem.append(subelem) 451 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 452 453 elem.clear() 454 self.serialize_check(elem, '<tag />') 455 elem.append(subelem) 456 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 457 elem.extend([subelem, subelem]) 458 self.serialize_check(elem, 459 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 460 elem[:] = [subelem] 461 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 462 elem[:] = tuple([subelem]) 463 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 464 465 def test_parsefile(self): 466 # Test parsing from file. 467 468 tree = ET.parse(SIMPLE_XMLFILE) 469 stream = io.StringIO() 470 tree.write(stream, encoding='unicode') 471 self.assertEqual(stream.getvalue(), 472 '<root>\n' 473 ' <element key="value">text</element>\n' 474 ' <element>text</element>tail\n' 475 ' <empty-element />\n' 476 '</root>') 477 tree = ET.parse(SIMPLE_NS_XMLFILE) 478 stream = io.StringIO() 479 tree.write(stream, encoding='unicode') 480 self.assertEqual(stream.getvalue(), 481 '<ns0:root xmlns:ns0="namespace">\n' 482 ' <ns0:element key="value">text</ns0:element>\n' 483 ' <ns0:element>text</ns0:element>tail\n' 484 ' <ns0:empty-element />\n' 485 '</ns0:root>') 486 487 with open(SIMPLE_XMLFILE) as f: 488 data = f.read() 489 490 parser = ET.XMLParser() 491 self.assertRegex(parser.version, r'^Expat ') 492 parser.feed(data) 493 self.serialize_check(parser.close(), 494 '<root>\n' 495 ' <element key="value">text</element>\n' 496 ' <element>text</element>tail\n' 497 ' <empty-element />\n' 498 '</root>') 499 500 target = ET.TreeBuilder() 501 parser = ET.XMLParser(target=target) 502 parser.feed(data) 503 self.serialize_check(parser.close(), 504 '<root>\n' 505 ' <element key="value">text</element>\n' 506 ' <element>text</element>tail\n' 507 ' <empty-element />\n' 508 '</root>') 509 510 def test_parseliteral(self): 511 element = ET.XML("<html><body>text</body></html>") 512 self.assertEqual(ET.tostring(element, encoding='unicode'), 513 '<html><body>text</body></html>') 514 element = ET.fromstring("<html><body>text</body></html>") 515 self.assertEqual(ET.tostring(element, encoding='unicode'), 516 '<html><body>text</body></html>') 517 sequence = ["<html><body>", "text</bo", "dy></html>"] 518 element = ET.fromstringlist(sequence) 519 self.assertEqual(ET.tostring(element), 520 b'<html><body>text</body></html>') 521 self.assertEqual(b"".join(ET.tostringlist(element)), 522 b'<html><body>text</body></html>') 523 self.assertEqual(ET.tostring(element, "ascii"), 524 b"<?xml version='1.0' encoding='ascii'?>\n" 525 b"<html><body>text</body></html>") 526 _, ids = ET.XMLID("<html><body>text</body></html>") 527 self.assertEqual(len(ids), 0) 528 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 529 self.assertEqual(len(ids), 1) 530 self.assertEqual(ids["body"].tag, 'body') 531 532 def test_iterparse(self): 533 # Test iterparse interface. 534 535 iterparse = ET.iterparse 536 537 context = iterparse(SIMPLE_XMLFILE) 538 action, elem = next(context) 539 self.assertEqual((action, elem.tag), ('end', 'element')) 540 self.assertEqual([(action, elem.tag) for action, elem in context], [ 541 ('end', 'element'), 542 ('end', 'empty-element'), 543 ('end', 'root'), 544 ]) 545 self.assertEqual(context.root.tag, 'root') 546 547 context = iterparse(SIMPLE_NS_XMLFILE) 548 self.assertEqual([(action, elem.tag) for action, elem in context], [ 549 ('end', '{namespace}element'), 550 ('end', '{namespace}element'), 551 ('end', '{namespace}empty-element'), 552 ('end', '{namespace}root'), 553 ]) 554 555 events = () 556 context = iterparse(SIMPLE_XMLFILE, events) 557 self.assertEqual([(action, elem.tag) for action, elem in context], []) 558 559 events = () 560 context = iterparse(SIMPLE_XMLFILE, events=events) 561 self.assertEqual([(action, elem.tag) for action, elem in context], []) 562 563 events = ("start", "end") 564 context = iterparse(SIMPLE_XMLFILE, events) 565 self.assertEqual([(action, elem.tag) for action, elem in context], [ 566 ('start', 'root'), 567 ('start', 'element'), 568 ('end', 'element'), 569 ('start', 'element'), 570 ('end', 'element'), 571 ('start', 'empty-element'), 572 ('end', 'empty-element'), 573 ('end', 'root'), 574 ]) 575 576 events = ("start", "end", "start-ns", "end-ns") 577 context = iterparse(SIMPLE_NS_XMLFILE, events) 578 self.assertEqual([(action, elem.tag) if action in ("start", "end") 579 else (action, elem) 580 for action, elem in context], [ 581 ('start-ns', ('', 'namespace')), 582 ('start', '{namespace}root'), 583 ('start', '{namespace}element'), 584 ('end', '{namespace}element'), 585 ('start', '{namespace}element'), 586 ('end', '{namespace}element'), 587 ('start', '{namespace}empty-element'), 588 ('end', '{namespace}empty-element'), 589 ('end', '{namespace}root'), 590 ('end-ns', None), 591 ]) 592 593 events = ('start-ns', 'end-ns') 594 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) 595 res = [action for action, elem in context] 596 self.assertEqual(res, ['start-ns', 'end-ns']) 597 598 events = ("start", "end", "bogus") 599 with open(SIMPLE_XMLFILE, "rb") as f: 600 with self.assertRaises(ValueError) as cm: 601 iterparse(f, events) 602 self.assertFalse(f.closed) 603 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 604 605 with support.check_no_resource_warning(self): 606 with self.assertRaises(ValueError) as cm: 607 iterparse(SIMPLE_XMLFILE, events) 608 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 609 del cm 610 611 source = io.BytesIO( 612 b"<?xml version='1.0' encoding='iso-8859-1'?>\n" 613 b"<body xmlns='http://éffbot.org/ns'\n" 614 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 615 events = ("start-ns",) 616 context = iterparse(source, events) 617 self.assertEqual([(action, elem) for action, elem in context], [ 618 ('start-ns', ('', 'http://\xe9ffbot.org/ns')), 619 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), 620 ]) 621 622 source = io.StringIO("<document />junk") 623 it = iterparse(source) 624 action, elem = next(it) 625 self.assertEqual((action, elem.tag), ('end', 'document')) 626 with self.assertRaises(ET.ParseError) as cm: 627 next(it) 628 self.assertEqual(str(cm.exception), 629 'junk after document element: line 1, column 12') 630 631 self.addCleanup(support.unlink, TESTFN) 632 with open(TESTFN, "wb") as f: 633 f.write(b"<document />junk") 634 it = iterparse(TESTFN) 635 action, elem = next(it) 636 self.assertEqual((action, elem.tag), ('end', 'document')) 637 with support.check_no_resource_warning(self): 638 with self.assertRaises(ET.ParseError) as cm: 639 next(it) 640 self.assertEqual(str(cm.exception), 641 'junk after document element: line 1, column 12') 642 del cm, it 643 644 def test_writefile(self): 645 elem = ET.Element("tag") 646 elem.text = "text" 647 self.serialize_check(elem, '<tag>text</tag>') 648 ET.SubElement(elem, "subtag").text = "subtext" 649 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 650 651 # Test tag suppression 652 elem.tag = None 653 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 654 elem.insert(0, ET.Comment("comment")) 655 self.serialize_check(elem, 656 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 657 658 elem[0] = ET.PI("key", "value") 659 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 660 661 def test_custom_builder(self): 662 # Test parser w. custom builder. 663 664 with open(SIMPLE_XMLFILE) as f: 665 data = f.read() 666 class Builder(list): 667 def start(self, tag, attrib): 668 self.append(("start", tag)) 669 def end(self, tag): 670 self.append(("end", tag)) 671 def data(self, text): 672 pass 673 builder = Builder() 674 parser = ET.XMLParser(target=builder) 675 parser.feed(data) 676 self.assertEqual(builder, [ 677 ('start', 'root'), 678 ('start', 'element'), 679 ('end', 'element'), 680 ('start', 'element'), 681 ('end', 'element'), 682 ('start', 'empty-element'), 683 ('end', 'empty-element'), 684 ('end', 'root'), 685 ]) 686 687 with open(SIMPLE_NS_XMLFILE) as f: 688 data = f.read() 689 class Builder(list): 690 def start(self, tag, attrib): 691 self.append(("start", tag)) 692 def end(self, tag): 693 self.append(("end", tag)) 694 def data(self, text): 695 pass 696 def pi(self, target, data): 697 self.append(("pi", target, data)) 698 def comment(self, data): 699 self.append(("comment", data)) 700 def start_ns(self, prefix, uri): 701 self.append(("start-ns", prefix, uri)) 702 def end_ns(self, prefix): 703 self.append(("end-ns", prefix)) 704 builder = Builder() 705 parser = ET.XMLParser(target=builder) 706 parser.feed(data) 707 self.assertEqual(builder, [ 708 ('pi', 'pi', 'data'), 709 ('comment', ' comment '), 710 ('start-ns', '', 'namespace'), 711 ('start', '{namespace}root'), 712 ('start', '{namespace}element'), 713 ('end', '{namespace}element'), 714 ('start', '{namespace}element'), 715 ('end', '{namespace}element'), 716 ('start', '{namespace}empty-element'), 717 ('end', '{namespace}empty-element'), 718 ('end', '{namespace}root'), 719 ('end-ns', ''), 720 ]) 721 722 def test_custom_builder_only_end_ns(self): 723 class Builder(list): 724 def end_ns(self, prefix): 725 self.append(("end-ns", prefix)) 726 727 builder = Builder() 728 parser = ET.XMLParser(target=builder) 729 parser.feed(textwrap.dedent("""\ 730 <?pi data?> 731 <!-- comment --> 732 <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'> 733 <a:element key='value'>text</a:element> 734 <p:element>text</p:element>tail 735 <empty-element/> 736 </root> 737 """)) 738 self.assertEqual(builder, [ 739 ('end-ns', 'a'), 740 ('end-ns', 'p'), 741 ('end-ns', ''), 742 ]) 743 744 # Element.getchildren() and ElementTree.getiterator() are deprecated. 745 @checkwarnings(("This method will be removed in future versions. " 746 "Use .+ instead.", 747 DeprecationWarning)) 748 def test_getchildren(self): 749 # Test Element.getchildren() 750 751 with open(SIMPLE_XMLFILE, "rb") as f: 752 tree = ET.parse(f) 753 self.assertEqual([summarize_list(elem.getchildren()) 754 for elem in tree.getroot().iter()], [ 755 ['element', 'element', 'empty-element'], 756 [], 757 [], 758 [], 759 ]) 760 self.assertEqual([summarize_list(elem.getchildren()) 761 for elem in tree.getiterator()], [ 762 ['element', 'element', 'empty-element'], 763 [], 764 [], 765 [], 766 ]) 767 768 elem = ET.XML(SAMPLE_XML) 769 self.assertEqual(len(elem.getchildren()), 3) 770 self.assertEqual(len(elem[2].getchildren()), 1) 771 self.assertEqual(elem[:], elem.getchildren()) 772 child1 = elem[0] 773 child2 = elem[2] 774 del elem[1:2] 775 self.assertEqual(len(elem.getchildren()), 2) 776 self.assertEqual(child1, elem[0]) 777 self.assertEqual(child2, elem[1]) 778 elem[0:2] = [child2, child1] 779 self.assertEqual(child2, elem[0]) 780 self.assertEqual(child1, elem[1]) 781 self.assertNotEqual(child1, elem[0]) 782 elem.clear() 783 self.assertEqual(elem.getchildren(), []) 784 785 def test_writestring(self): 786 elem = ET.XML("<html><body>text</body></html>") 787 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 788 elem = ET.fromstring("<html><body>text</body></html>") 789 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 790 791 def test_tostring_default_namespace(self): 792 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 793 self.assertEqual( 794 ET.tostring(elem, encoding='unicode'), 795 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 796 ) 797 self.assertEqual( 798 ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'), 799 '<body xmlns="http://effbot.org/ns"><tag /></body>' 800 ) 801 802 def test_tostring_default_namespace_different_namespace(self): 803 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 804 self.assertEqual( 805 ET.tostring(elem, encoding='unicode', default_namespace='foobar'), 806 '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>' 807 ) 808 809 def test_tostring_default_namespace_original_no_namespace(self): 810 elem = ET.XML('<body><tag/></body>') 811 EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$' 812 with self.assertRaisesRegex(ValueError, EXPECTED_MSG): 813 ET.tostring(elem, encoding='unicode', default_namespace='foobar') 814 815 def test_tostring_no_xml_declaration(self): 816 elem = ET.XML('<body><tag/></body>') 817 self.assertEqual( 818 ET.tostring(elem, encoding='unicode'), 819 '<body><tag /></body>' 820 ) 821 822 def test_tostring_xml_declaration(self): 823 elem = ET.XML('<body><tag/></body>') 824 self.assertEqual( 825 ET.tostring(elem, encoding='utf8', xml_declaration=True), 826 b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>" 827 ) 828 829 def test_tostring_xml_declaration_unicode_encoding(self): 830 elem = ET.XML('<body><tag/></body>') 831 preferredencoding = locale.getpreferredencoding() 832 self.assertEqual( 833 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>", 834 ET.tostring(elem, encoding='unicode', xml_declaration=True) 835 ) 836 837 def test_tostring_xml_declaration_cases(self): 838 elem = ET.XML('<body><tag>ø</tag></body>') 839 preferredencoding = locale.getpreferredencoding() 840 TESTCASES = [ 841 # (expected_retval, encoding, xml_declaration) 842 # ... xml_declaration = None 843 (b'<body><tag>ø</tag></body>', None, None), 844 (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None), 845 (b'<body><tag>ø</tag></body>', 'US-ASCII', None), 846 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 847 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None), 848 ('<body><tag>ø</tag></body>', 'unicode', None), 849 850 # ... xml_declaration = False 851 (b"<body><tag>ø</tag></body>", None, False), 852 (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False), 853 (b"<body><tag>ø</tag></body>", 'US-ASCII', False), 854 (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False), 855 ("<body><tag>ø</tag></body>", 'unicode', False), 856 857 # ... xml_declaration = True 858 (b"<?xml version='1.0' encoding='us-ascii'?>\n" 859 b"<body><tag>ø</tag></body>", None, True), 860 (b"<?xml version='1.0' encoding='UTF-8'?>\n" 861 b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True), 862 (b"<?xml version='1.0' encoding='US-ASCII'?>\n" 863 b"<body><tag>ø</tag></body>", 'US-ASCII', True), 864 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 865 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True), 866 (f"<?xml version='1.0' encoding='{preferredencoding}'?>\n" 867 "<body><tag>ø</tag></body>", 'unicode', True), 868 869 ] 870 for expected_retval, encoding, xml_declaration in TESTCASES: 871 with self.subTest(f'encoding={encoding} ' 872 f'xml_declaration={xml_declaration}'): 873 self.assertEqual( 874 ET.tostring( 875 elem, 876 encoding=encoding, 877 xml_declaration=xml_declaration 878 ), 879 expected_retval 880 ) 881 882 def test_tostringlist_default_namespace(self): 883 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 884 self.assertEqual( 885 ''.join(ET.tostringlist(elem, encoding='unicode')), 886 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 887 ) 888 self.assertEqual( 889 ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')), 890 '<body xmlns="http://effbot.org/ns"><tag /></body>' 891 ) 892 893 def test_tostringlist_xml_declaration(self): 894 elem = ET.XML('<body><tag/></body>') 895 self.assertEqual( 896 ''.join(ET.tostringlist(elem, encoding='unicode')), 897 '<body><tag /></body>' 898 ) 899 self.assertEqual( 900 b''.join(ET.tostringlist(elem, xml_declaration=True)), 901 b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>" 902 ) 903 904 preferredencoding = locale.getpreferredencoding() 905 stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) 906 self.assertEqual( 907 ''.join(stringlist), 908 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>" 909 ) 910 self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>") 911 self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:]) 912 913 def test_encoding(self): 914 def check(encoding, body=''): 915 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 916 (encoding, body)) 917 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 918 self.assertEqual(ET.XML(xml).text, body) 919 check("ascii", 'a') 920 check("us-ascii", 'a') 921 check("iso-8859-1", '\xbd') 922 check("iso-8859-15", '\u20ac') 923 check("cp437", '\u221a') 924 check("mac-roman", '\u02da') 925 926 def xml(encoding): 927 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 928 def bxml(encoding): 929 return xml(encoding).encode(encoding) 930 supported_encodings = [ 931 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 932 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 933 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 934 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 935 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 936 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 937 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', 938 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 939 'cp1256', 'cp1257', 'cp1258', 940 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 941 'mac-roman', 'mac-turkish', 942 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 943 'iso2022-jp-3', 'iso2022-jp-ext', 944 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 945 'hz', 'ptcp154', 946 ] 947 for encoding in supported_encodings: 948 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 949 950 unsupported_ascii_compatible_encodings = [ 951 'big5', 'big5hkscs', 952 'cp932', 'cp949', 'cp950', 953 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 954 'gb2312', 'gbk', 'gb18030', 955 'iso2022-kr', 'johab', 956 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 957 'utf-7', 958 ] 959 for encoding in unsupported_ascii_compatible_encodings: 960 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 961 962 unsupported_ascii_incompatible_encodings = [ 963 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 964 'utf_32', 'utf_32_be', 'utf_32_le', 965 ] 966 for encoding in unsupported_ascii_incompatible_encodings: 967 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 968 969 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 970 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 971 972 def test_methods(self): 973 # Test serialization methods. 974 975 e = ET.XML("<html><link/><script>1 < 2</script></html>") 976 e.tail = "\n" 977 self.assertEqual(serialize(e), 978 '<html><link /><script>1 < 2</script></html>\n') 979 self.assertEqual(serialize(e, method=None), 980 '<html><link /><script>1 < 2</script></html>\n') 981 self.assertEqual(serialize(e, method="xml"), 982 '<html><link /><script>1 < 2</script></html>\n') 983 self.assertEqual(serialize(e, method="html"), 984 '<html><link><script>1 < 2</script></html>\n') 985 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 986 987 def test_issue18347(self): 988 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 989 self.assertEqual(serialize(e), 990 '<html><CamelCase>text</CamelCase></html>') 991 self.assertEqual(serialize(e, method="html"), 992 '<html><CamelCase>text</CamelCase></html>') 993 994 def test_entity(self): 995 # Test entity handling. 996 997 # 1) good entities 998 999 e = ET.XML("<document title='舰'>test</document>") 1000 self.assertEqual(serialize(e, encoding="us-ascii"), 1001 b'<document title="舰">test</document>') 1002 self.serialize_check(e, '<document title="\u8230">test</document>') 1003 1004 # 2) bad entities 1005 1006 with self.assertRaises(ET.ParseError) as cm: 1007 ET.XML("<document>&entity;</document>") 1008 self.assertEqual(str(cm.exception), 1009 'undefined entity: line 1, column 10') 1010 1011 with self.assertRaises(ET.ParseError) as cm: 1012 ET.XML(ENTITY_XML) 1013 self.assertEqual(str(cm.exception), 1014 'undefined entity &entity;: line 5, column 10') 1015 1016 # 3) custom entity 1017 1018 parser = ET.XMLParser() 1019 parser.entity["entity"] = "text" 1020 parser.feed(ENTITY_XML) 1021 root = parser.close() 1022 self.serialize_check(root, '<document>text</document>') 1023 1024 # 4) external (SYSTEM) entity 1025 1026 with self.assertRaises(ET.ParseError) as cm: 1027 ET.XML(EXTERNAL_ENTITY_XML) 1028 self.assertEqual(str(cm.exception), 1029 'undefined entity &entity;: line 4, column 10') 1030 1031 def test_namespace(self): 1032 # Test namespace issues. 1033 1034 # 1) xml namespace 1035 1036 elem = ET.XML("<tag xml:lang='en' />") 1037 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 1038 1039 # 2) other "well-known" namespaces 1040 1041 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1042 self.serialize_check(elem, 1043 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 1044 1045 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1046 self.serialize_check(elem, 1047 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 1048 1049 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1050 self.serialize_check(elem, 1051 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 1052 1053 # 3) unknown namespaces 1054 elem = ET.XML(SAMPLE_XML_NS) 1055 self.serialize_check(elem, 1056 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 1057 ' <ns0:tag>text</ns0:tag>\n' 1058 ' <ns0:tag />\n' 1059 ' <ns0:section>\n' 1060 ' <ns0:tag>subtext</ns0:tag>\n' 1061 ' </ns0:section>\n' 1062 '</ns0:body>') 1063 1064 def test_qname(self): 1065 # Test QName handling. 1066 1067 # 1) decorated tags 1068 1069 elem = ET.Element("{uri}tag") 1070 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 1071 elem = ET.Element(ET.QName("{uri}tag")) 1072 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 1073 elem = ET.Element(ET.QName("uri", "tag")) 1074 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 1075 elem = ET.Element(ET.QName("uri", "tag")) 1076 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1077 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1078 self.serialize_check(elem, 1079 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 1080 1081 # 2) decorated attributes 1082 1083 elem.clear() 1084 elem.attrib["{uri}key"] = "value" 1085 self.serialize_check(elem, 1086 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 1087 1088 elem.clear() 1089 elem.attrib[ET.QName("{uri}key")] = "value" 1090 self.serialize_check(elem, 1091 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 1092 1093 # 3) decorated values are not converted by default, but the 1094 # QName wrapper can be used for values 1095 1096 elem.clear() 1097 elem.attrib["{uri}key"] = "{uri}value" 1098 self.serialize_check(elem, 1099 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 1100 1101 elem.clear() 1102 elem.attrib["{uri}key"] = ET.QName("{uri}value") 1103 self.serialize_check(elem, 1104 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 1105 1106 elem.clear() 1107 subelem = ET.Element("tag") 1108 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1109 elem.append(subelem) 1110 elem.append(subelem) 1111 self.serialize_check(elem, 1112 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 1113 '<tag ns1:key="ns2:value" />' 1114 '<tag ns1:key="ns2:value" />' 1115 '</ns0:tag>') # 3.3 1116 1117 # 4) Direct QName tests 1118 1119 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 1120 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 1121 q1 = ET.QName('ns', 'tag') 1122 q2 = ET.QName('ns', 'tag') 1123 self.assertEqual(q1, q2) 1124 q2 = ET.QName('ns', 'other-tag') 1125 self.assertNotEqual(q1, q2) 1126 self.assertNotEqual(q1, 'ns:tag') 1127 self.assertEqual(q1, '{ns}tag') 1128 1129 def test_doctype_public(self): 1130 # Test PUBLIC doctype. 1131 1132 elem = ET.XML('<!DOCTYPE html PUBLIC' 1133 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1134 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1135 '<html>text</html>') 1136 1137 def test_xpath_tokenizer(self): 1138 # Test the XPath tokenizer. 1139 from xml.etree import ElementPath 1140 def check(p, expected, namespaces=None): 1141 self.assertEqual([op or tag 1142 for op, tag in ElementPath.xpath_tokenizer(p, namespaces)], 1143 expected) 1144 1145 # tests from the xml specification 1146 check("*", ['*']) 1147 check("text()", ['text', '()']) 1148 check("@name", ['@', 'name']) 1149 check("@*", ['@', '*']) 1150 check("para[1]", ['para', '[', '1', ']']) 1151 check("para[last()]", ['para', '[', 'last', '()', ']']) 1152 check("*/para", ['*', '/', 'para']) 1153 check("/doc/chapter[5]/section[2]", 1154 ['/', 'doc', '/', 'chapter', '[', '5', ']', 1155 '/', 'section', '[', '2', ']']) 1156 check("chapter//para", ['chapter', '//', 'para']) 1157 check("//para", ['//', 'para']) 1158 check("//olist/item", ['//', 'olist', '/', 'item']) 1159 check(".", ['.']) 1160 check(".//para", ['.', '//', 'para']) 1161 check("..", ['..']) 1162 check("../@lang", ['..', '/', '@', 'lang']) 1163 check("chapter[title]", ['chapter', '[', 'title', ']']) 1164 check("employee[@secretary and @assistant]", ['employee', 1165 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 1166 1167 # additional tests 1168 check("@{ns}attr", ['@', '{ns}attr']) 1169 check("{http://spam}egg", ['{http://spam}egg']) 1170 check("./spam.egg", ['.', '/', 'spam.egg']) 1171 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 1172 1173 # wildcard tags 1174 check("{ns}*", ['{ns}*']) 1175 check("{}*", ['{}*']) 1176 check("{*}tag", ['{*}tag']) 1177 check("{*}*", ['{*}*']) 1178 check(".//{*}tag", ['.', '//', '{*}tag']) 1179 1180 # namespace prefix resolution 1181 check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'], 1182 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1183 check("type", ['{http://www.w3.org/2001/XMLSchema}type'], 1184 {'': 'http://www.w3.org/2001/XMLSchema'}) 1185 check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'], 1186 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1187 check("@type", ['@', 'type'], 1188 {'': 'http://www.w3.org/2001/XMLSchema'}) 1189 check("@{*}type", ['@', '{*}type'], 1190 {'': 'http://www.w3.org/2001/XMLSchema'}) 1191 check("@{ns}attr", ['@', '{ns}attr'], 1192 {'': 'http://www.w3.org/2001/XMLSchema', 1193 'ns': 'http://www.w3.org/2001/XMLSchema'}) 1194 1195 def test_processinginstruction(self): 1196 # Test ProcessingInstruction directly 1197 1198 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 1199 b'<?test instruction?>') 1200 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 1201 b'<?test instruction?>') 1202 1203 # Issue #2746 1204 1205 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 1206 b'<?test <testing&>?>') 1207 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), 1208 b"<?xml version='1.0' encoding='latin-1'?>\n" 1209 b"<?test <testing&>\xe3?>") 1210 1211 def test_html_empty_elems_serialization(self): 1212 # issue 15970 1213 # from http://www.w3.org/TR/html401/index/elements.html 1214 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 1215 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: 1216 for elem in [element, element.lower()]: 1217 expected = '<%s>' % elem 1218 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1219 self.assertEqual(serialized, expected) 1220 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1221 method='html') 1222 self.assertEqual(serialized, expected) 1223 1224 def test_dump_attribute_order(self): 1225 # See BPO 34160 1226 e = ET.Element('cirriculum', status='public', company='example') 1227 with support.captured_stdout() as stdout: 1228 ET.dump(e) 1229 self.assertEqual(stdout.getvalue(), 1230 '<cirriculum status="public" company="example" />\n') 1231 1232 def test_tree_write_attribute_order(self): 1233 # See BPO 34160 1234 root = ET.Element('cirriculum', status='public', company='example') 1235 self.assertEqual(serialize(root), 1236 '<cirriculum status="public" company="example" />') 1237 self.assertEqual(serialize(root, method='html'), 1238 '<cirriculum status="public" company="example"></cirriculum>') 1239 1240 1241class XMLPullParserTest(unittest.TestCase): 1242 1243 def _feed(self, parser, data, chunk_size=None): 1244 if chunk_size is None: 1245 parser.feed(data) 1246 else: 1247 for i in range(0, len(data), chunk_size): 1248 parser.feed(data[i:i+chunk_size]) 1249 1250 def assert_events(self, parser, expected, max_events=None): 1251 self.assertEqual( 1252 [(event, (elem.tag, elem.text)) 1253 for event, elem in islice(parser.read_events(), max_events)], 1254 expected) 1255 1256 def assert_event_tuples(self, parser, expected, max_events=None): 1257 self.assertEqual( 1258 list(islice(parser.read_events(), max_events)), 1259 expected) 1260 1261 def assert_event_tags(self, parser, expected, max_events=None): 1262 events = islice(parser.read_events(), max_events) 1263 self.assertEqual([(action, elem.tag) for action, elem in events], 1264 expected) 1265 1266 def test_simple_xml(self): 1267 for chunk_size in (None, 1, 5): 1268 with self.subTest(chunk_size=chunk_size): 1269 parser = ET.XMLPullParser() 1270 self.assert_event_tags(parser, []) 1271 self._feed(parser, "<!-- comment -->\n", chunk_size) 1272 self.assert_event_tags(parser, []) 1273 self._feed(parser, 1274 "<root>\n <element key='value'>text</element", 1275 chunk_size) 1276 self.assert_event_tags(parser, []) 1277 self._feed(parser, ">\n", chunk_size) 1278 self.assert_event_tags(parser, [('end', 'element')]) 1279 self._feed(parser, "<element>text</element>tail\n", chunk_size) 1280 self._feed(parser, "<empty-element/>\n", chunk_size) 1281 self.assert_event_tags(parser, [ 1282 ('end', 'element'), 1283 ('end', 'empty-element'), 1284 ]) 1285 self._feed(parser, "</root>\n", chunk_size) 1286 self.assert_event_tags(parser, [('end', 'root')]) 1287 self.assertIsNone(parser.close()) 1288 1289 def test_feed_while_iterating(self): 1290 parser = ET.XMLPullParser() 1291 it = parser.read_events() 1292 self._feed(parser, "<root>\n <element key='value'>text</element>\n") 1293 action, elem = next(it) 1294 self.assertEqual((action, elem.tag), ('end', 'element')) 1295 self._feed(parser, "</root>\n") 1296 action, elem = next(it) 1297 self.assertEqual((action, elem.tag), ('end', 'root')) 1298 with self.assertRaises(StopIteration): 1299 next(it) 1300 1301 def test_simple_xml_with_ns(self): 1302 parser = ET.XMLPullParser() 1303 self.assert_event_tags(parser, []) 1304 self._feed(parser, "<!-- comment -->\n") 1305 self.assert_event_tags(parser, []) 1306 self._feed(parser, "<root xmlns='namespace'>\n") 1307 self.assert_event_tags(parser, []) 1308 self._feed(parser, "<element key='value'>text</element") 1309 self.assert_event_tags(parser, []) 1310 self._feed(parser, ">\n") 1311 self.assert_event_tags(parser, [('end', '{namespace}element')]) 1312 self._feed(parser, "<element>text</element>tail\n") 1313 self._feed(parser, "<empty-element/>\n") 1314 self.assert_event_tags(parser, [ 1315 ('end', '{namespace}element'), 1316 ('end', '{namespace}empty-element'), 1317 ]) 1318 self._feed(parser, "</root>\n") 1319 self.assert_event_tags(parser, [('end', '{namespace}root')]) 1320 self.assertIsNone(parser.close()) 1321 1322 def test_ns_events(self): 1323 parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) 1324 self._feed(parser, "<!-- comment -->\n") 1325 self._feed(parser, "<root xmlns='namespace'>\n") 1326 self.assertEqual( 1327 list(parser.read_events()), 1328 [('start-ns', ('', 'namespace'))]) 1329 self._feed(parser, "<element key='value'>text</element") 1330 self._feed(parser, ">\n") 1331 self._feed(parser, "<element>text</element>tail\n") 1332 self._feed(parser, "<empty-element/>\n") 1333 self._feed(parser, "</root>\n") 1334 self.assertEqual(list(parser.read_events()), [('end-ns', None)]) 1335 self.assertIsNone(parser.close()) 1336 1337 def test_ns_events_start(self): 1338 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end')) 1339 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1340 self.assert_event_tuples(parser, [ 1341 ('start-ns', ('', 'abc')), 1342 ('start-ns', ('p', 'xyz')), 1343 ], max_events=2) 1344 self.assert_event_tags(parser, [ 1345 ('start', '{abc}tag'), 1346 ], max_events=1) 1347 1348 self._feed(parser, "<child />\n") 1349 self.assert_event_tags(parser, [ 1350 ('start', '{abc}child'), 1351 ('end', '{abc}child'), 1352 ]) 1353 1354 self._feed(parser, "</tag>\n") 1355 parser.close() 1356 self.assert_event_tags(parser, [ 1357 ('end', '{abc}tag'), 1358 ]) 1359 1360 def test_ns_events_start_end(self): 1361 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns')) 1362 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1363 self.assert_event_tuples(parser, [ 1364 ('start-ns', ('', 'abc')), 1365 ('start-ns', ('p', 'xyz')), 1366 ], max_events=2) 1367 self.assert_event_tags(parser, [ 1368 ('start', '{abc}tag'), 1369 ], max_events=1) 1370 1371 self._feed(parser, "<child />\n") 1372 self.assert_event_tags(parser, [ 1373 ('start', '{abc}child'), 1374 ('end', '{abc}child'), 1375 ]) 1376 1377 self._feed(parser, "</tag>\n") 1378 parser.close() 1379 self.assert_event_tags(parser, [ 1380 ('end', '{abc}tag'), 1381 ], max_events=1) 1382 self.assert_event_tuples(parser, [ 1383 ('end-ns', None), 1384 ('end-ns', None), 1385 ]) 1386 1387 def test_events(self): 1388 parser = ET.XMLPullParser(events=()) 1389 self._feed(parser, "<root/>\n") 1390 self.assert_event_tags(parser, []) 1391 1392 parser = ET.XMLPullParser(events=('start', 'end')) 1393 self._feed(parser, "<!-- text here -->\n") 1394 self.assert_events(parser, []) 1395 1396 parser = ET.XMLPullParser(events=('start', 'end')) 1397 self._feed(parser, "<root>\n") 1398 self.assert_event_tags(parser, [('start', 'root')]) 1399 self._feed(parser, "<element key='value'>text</element") 1400 self.assert_event_tags(parser, [('start', 'element')]) 1401 self._feed(parser, ">\n") 1402 self.assert_event_tags(parser, [('end', 'element')]) 1403 self._feed(parser, 1404 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1405 self.assert_event_tags(parser, [ 1406 ('start', '{foo}element'), 1407 ('start', '{foo}empty-element'), 1408 ('end', '{foo}empty-element'), 1409 ('end', '{foo}element'), 1410 ]) 1411 self._feed(parser, "</root>") 1412 self.assertIsNone(parser.close()) 1413 self.assert_event_tags(parser, [('end', 'root')]) 1414 1415 parser = ET.XMLPullParser(events=('start',)) 1416 self._feed(parser, "<!-- comment -->\n") 1417 self.assert_event_tags(parser, []) 1418 self._feed(parser, "<root>\n") 1419 self.assert_event_tags(parser, [('start', 'root')]) 1420 self._feed(parser, "<element key='value'>text</element") 1421 self.assert_event_tags(parser, [('start', 'element')]) 1422 self._feed(parser, ">\n") 1423 self.assert_event_tags(parser, []) 1424 self._feed(parser, 1425 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1426 self.assert_event_tags(parser, [ 1427 ('start', '{foo}element'), 1428 ('start', '{foo}empty-element'), 1429 ]) 1430 self._feed(parser, "</root>") 1431 self.assertIsNone(parser.close()) 1432 1433 def test_events_comment(self): 1434 parser = ET.XMLPullParser(events=('start', 'comment', 'end')) 1435 self._feed(parser, "<!-- text here -->\n") 1436 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1437 self._feed(parser, "<!-- more text here -->\n") 1438 self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))]) 1439 self._feed(parser, "<root-tag>text") 1440 self.assert_event_tags(parser, [('start', 'root-tag')]) 1441 self._feed(parser, "<!-- inner comment-->\n") 1442 self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))]) 1443 self._feed(parser, "</root-tag>\n") 1444 self.assert_event_tags(parser, [('end', 'root-tag')]) 1445 self._feed(parser, "<!-- outer comment -->\n") 1446 self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))]) 1447 1448 parser = ET.XMLPullParser(events=('comment',)) 1449 self._feed(parser, "<!-- text here -->\n") 1450 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1451 1452 def test_events_pi(self): 1453 parser = ET.XMLPullParser(events=('start', 'pi', 'end')) 1454 self._feed(parser, "<?pitarget?>\n") 1455 self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))]) 1456 parser = ET.XMLPullParser(events=('pi',)) 1457 self._feed(parser, "<?pitarget some text ?>\n") 1458 self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))]) 1459 1460 def test_events_sequence(self): 1461 # Test that events can be some sequence that's not just a tuple or list 1462 eventset = {'end', 'start'} 1463 parser = ET.XMLPullParser(events=eventset) 1464 self._feed(parser, "<foo>bar</foo>") 1465 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1466 1467 class DummyIter: 1468 def __init__(self): 1469 self.events = iter(['start', 'end', 'start-ns']) 1470 def __iter__(self): 1471 return self 1472 def __next__(self): 1473 return next(self.events) 1474 1475 parser = ET.XMLPullParser(events=DummyIter()) 1476 self._feed(parser, "<foo>bar</foo>") 1477 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1478 1479 def test_unknown_event(self): 1480 with self.assertRaises(ValueError): 1481 ET.XMLPullParser(events=('start', 'end', 'bogus')) 1482 1483 1484# 1485# xinclude tests (samples from appendix C of the xinclude specification) 1486 1487XINCLUDE = {} 1488 1489XINCLUDE["C1.xml"] = """\ 1490<?xml version='1.0'?> 1491<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1492 <p>120 Mz is adequate for an average home user.</p> 1493 <xi:include href="disclaimer.xml"/> 1494</document> 1495""" 1496 1497XINCLUDE["disclaimer.xml"] = """\ 1498<?xml version='1.0'?> 1499<disclaimer> 1500 <p>The opinions represented herein represent those of the individual 1501 and should not be interpreted as official policy endorsed by this 1502 organization.</p> 1503</disclaimer> 1504""" 1505 1506XINCLUDE["C2.xml"] = """\ 1507<?xml version='1.0'?> 1508<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1509 <p>This document has been accessed 1510 <xi:include href="count.txt" parse="text"/> times.</p> 1511</document> 1512""" 1513 1514XINCLUDE["count.txt"] = "324387" 1515 1516XINCLUDE["C2b.xml"] = """\ 1517<?xml version='1.0'?> 1518<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1519 <p>This document has been <em>accessed</em> 1520 <xi:include href="count.txt" parse="text"/> times.</p> 1521</document> 1522""" 1523 1524XINCLUDE["C3.xml"] = """\ 1525<?xml version='1.0'?> 1526<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1527 <p>The following is the source of the "data.xml" resource:</p> 1528 <example><xi:include href="data.xml" parse="text"/></example> 1529</document> 1530""" 1531 1532XINCLUDE["data.xml"] = """\ 1533<?xml version='1.0'?> 1534<data> 1535 <item><![CDATA[Brooks & Shields]]></item> 1536</data> 1537""" 1538 1539XINCLUDE["C5.xml"] = """\ 1540<?xml version='1.0'?> 1541<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1542 <xi:include href="example.txt" parse="text"> 1543 <xi:fallback> 1544 <xi:include href="fallback-example.txt" parse="text"> 1545 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1546 </xi:include> 1547 </xi:fallback> 1548 </xi:include> 1549</div> 1550""" 1551 1552XINCLUDE["default.xml"] = """\ 1553<?xml version='1.0'?> 1554<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1555 <p>Example.</p> 1556 <xi:include href="{}"/> 1557</document> 1558""".format(html.escape(SIMPLE_XMLFILE, True)) 1559 1560# 1561# badly formatted xi:include tags 1562 1563XINCLUDE_BAD = {} 1564 1565XINCLUDE_BAD["B1.xml"] = """\ 1566<?xml version='1.0'?> 1567<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1568 <p>120 Mz is adequate for an average home user.</p> 1569 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1570</document> 1571""" 1572 1573XINCLUDE_BAD["B2.xml"] = """\ 1574<?xml version='1.0'?> 1575<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1576 <xi:fallback></xi:fallback> 1577</div> 1578""" 1579 1580class XIncludeTest(unittest.TestCase): 1581 1582 def xinclude_loader(self, href, parse="xml", encoding=None): 1583 try: 1584 data = XINCLUDE[href] 1585 except KeyError: 1586 raise OSError("resource not found") 1587 if parse == "xml": 1588 data = ET.XML(data) 1589 return data 1590 1591 def none_loader(self, href, parser, encoding=None): 1592 return None 1593 1594 def _my_loader(self, href, parse): 1595 # Used to avoid a test-dependency problem where the default loader 1596 # of ElementInclude uses the pyET parser for cET tests. 1597 if parse == 'xml': 1598 with open(href, 'rb') as f: 1599 return ET.parse(f).getroot() 1600 else: 1601 return None 1602 1603 def test_xinclude_default(self): 1604 from xml.etree import ElementInclude 1605 doc = self.xinclude_loader('default.xml') 1606 ElementInclude.include(doc, self._my_loader) 1607 self.assertEqual(serialize(doc), 1608 '<document>\n' 1609 ' <p>Example.</p>\n' 1610 ' <root>\n' 1611 ' <element key="value">text</element>\n' 1612 ' <element>text</element>tail\n' 1613 ' <empty-element />\n' 1614 '</root>\n' 1615 '</document>') 1616 1617 def test_xinclude(self): 1618 from xml.etree import ElementInclude 1619 1620 # Basic inclusion example (XInclude C.1) 1621 document = self.xinclude_loader("C1.xml") 1622 ElementInclude.include(document, self.xinclude_loader) 1623 self.assertEqual(serialize(document), 1624 '<document>\n' 1625 ' <p>120 Mz is adequate for an average home user.</p>\n' 1626 ' <disclaimer>\n' 1627 ' <p>The opinions represented herein represent those of the individual\n' 1628 ' and should not be interpreted as official policy endorsed by this\n' 1629 ' organization.</p>\n' 1630 '</disclaimer>\n' 1631 '</document>') # C1 1632 1633 # Textual inclusion example (XInclude C.2) 1634 document = self.xinclude_loader("C2.xml") 1635 ElementInclude.include(document, self.xinclude_loader) 1636 self.assertEqual(serialize(document), 1637 '<document>\n' 1638 ' <p>This document has been accessed\n' 1639 ' 324387 times.</p>\n' 1640 '</document>') # C2 1641 1642 # Textual inclusion after sibling element (based on modified XInclude C.2) 1643 document = self.xinclude_loader("C2b.xml") 1644 ElementInclude.include(document, self.xinclude_loader) 1645 self.assertEqual(serialize(document), 1646 '<document>\n' 1647 ' <p>This document has been <em>accessed</em>\n' 1648 ' 324387 times.</p>\n' 1649 '</document>') # C2b 1650 1651 # Textual inclusion of XML example (XInclude C.3) 1652 document = self.xinclude_loader("C3.xml") 1653 ElementInclude.include(document, self.xinclude_loader) 1654 self.assertEqual(serialize(document), 1655 '<document>\n' 1656 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1657 " <example><?xml version='1.0'?>\n" 1658 '<data>\n' 1659 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1660 '</data>\n' 1661 '</example>\n' 1662 '</document>') # C3 1663 1664 # Fallback example (XInclude C.5) 1665 # Note! Fallback support is not yet implemented 1666 document = self.xinclude_loader("C5.xml") 1667 with self.assertRaises(OSError) as cm: 1668 ElementInclude.include(document, self.xinclude_loader) 1669 self.assertEqual(str(cm.exception), 'resource not found') 1670 self.assertEqual(serialize(document), 1671 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1672 ' <ns0:include href="example.txt" parse="text">\n' 1673 ' <ns0:fallback>\n' 1674 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1675 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n' 1676 ' </ns0:include>\n' 1677 ' </ns0:fallback>\n' 1678 ' </ns0:include>\n' 1679 '</div>') # C5 1680 1681 def test_xinclude_failures(self): 1682 from xml.etree import ElementInclude 1683 1684 # Test failure to locate included XML file. 1685 document = ET.XML(XINCLUDE["C1.xml"]) 1686 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1687 ElementInclude.include(document, loader=self.none_loader) 1688 self.assertEqual(str(cm.exception), 1689 "cannot load 'disclaimer.xml' as 'xml'") 1690 1691 # Test failure to locate included text file. 1692 document = ET.XML(XINCLUDE["C2.xml"]) 1693 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1694 ElementInclude.include(document, loader=self.none_loader) 1695 self.assertEqual(str(cm.exception), 1696 "cannot load 'count.txt' as 'text'") 1697 1698 # Test bad parse type. 1699 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1700 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1701 ElementInclude.include(document, loader=self.none_loader) 1702 self.assertEqual(str(cm.exception), 1703 "unknown parse type in xi:include tag ('BAD_TYPE')") 1704 1705 # Test xi:fallback outside xi:include. 1706 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1707 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1708 ElementInclude.include(document, loader=self.none_loader) 1709 self.assertEqual(str(cm.exception), 1710 "xi:fallback tag must be child of xi:include " 1711 "('{http://www.w3.org/2001/XInclude}fallback')") 1712 1713# -------------------------------------------------------------------- 1714# reported bugs 1715 1716class BugsTest(unittest.TestCase): 1717 1718 def test_bug_xmltoolkit21(self): 1719 # marshaller gives obscure errors for non-string values 1720 1721 def check(elem): 1722 with self.assertRaises(TypeError) as cm: 1723 serialize(elem) 1724 self.assertEqual(str(cm.exception), 1725 'cannot serialize 123 (type int)') 1726 1727 elem = ET.Element(123) 1728 check(elem) # tag 1729 1730 elem = ET.Element("elem") 1731 elem.text = 123 1732 check(elem) # text 1733 1734 elem = ET.Element("elem") 1735 elem.tail = 123 1736 check(elem) # tail 1737 1738 elem = ET.Element("elem") 1739 elem.set(123, "123") 1740 check(elem) # attribute key 1741 1742 elem = ET.Element("elem") 1743 elem.set("123", 123) 1744 check(elem) # attribute value 1745 1746 def test_bug_xmltoolkit25(self): 1747 # typo in ElementTree.findtext 1748 1749 elem = ET.XML(SAMPLE_XML) 1750 tree = ET.ElementTree(elem) 1751 self.assertEqual(tree.findtext("tag"), 'text') 1752 self.assertEqual(tree.findtext("section/tag"), 'subtext') 1753 1754 def test_bug_xmltoolkit28(self): 1755 # .//tag causes exceptions 1756 1757 tree = ET.XML("<doc><table><tbody/></table></doc>") 1758 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 1759 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 1760 1761 def test_bug_xmltoolkitX1(self): 1762 # dump() doesn't flush the output buffer 1763 1764 tree = ET.XML("<doc><table><tbody/></table></doc>") 1765 with support.captured_stdout() as stdout: 1766 ET.dump(tree) 1767 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 1768 1769 def test_bug_xmltoolkit39(self): 1770 # non-ascii element and attribute names doesn't work 1771 1772 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1773 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1774 1775 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1776 b"<tag \xe4ttr='välue' />") 1777 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'}) 1778 self.assertEqual(ET.tostring(tree, "utf-8"), 1779 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1780 1781 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1782 b'<t\xe4g>text</t\xe4g>') 1783 self.assertEqual(ET.tostring(tree, "utf-8"), 1784 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 1785 1786 tree = ET.Element("t\u00e4g") 1787 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1788 1789 tree = ET.Element("tag") 1790 tree.set("\u00e4ttr", "v\u00e4lue") 1791 self.assertEqual(ET.tostring(tree, "utf-8"), 1792 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1793 1794 def test_bug_xmltoolkit54(self): 1795 # problems handling internally defined entities 1796 1797 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 1798 '<doc>&ldots;</doc>') 1799 self.assertEqual(serialize(e, encoding="us-ascii"), 1800 b'<doc>舰</doc>') 1801 self.assertEqual(serialize(e), '<doc>\u8230</doc>') 1802 1803 def test_bug_xmltoolkit55(self): 1804 # make sure we're reporting the first error, not the last 1805 1806 with self.assertRaises(ET.ParseError) as cm: 1807 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>" 1808 b'<doc>&ldots;&ndots;&rdots;</doc>') 1809 self.assertEqual(str(cm.exception), 1810 'undefined entity &ldots;: line 1, column 36') 1811 1812 def test_bug_xmltoolkit60(self): 1813 # Handle crash in stream source. 1814 1815 class ExceptionFile: 1816 def read(self, x): 1817 raise OSError 1818 1819 self.assertRaises(OSError, ET.parse, ExceptionFile()) 1820 1821 def test_bug_xmltoolkit62(self): 1822 # Don't crash when using custom entities. 1823 1824 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'} 1825 parser = ET.XMLParser() 1826 parser.entity.update(ENTITIES) 1827 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 1828<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 1829<patent-application-publication> 1830<subdoc-abstract> 1831<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 1832</subdoc-abstract> 1833</patent-application-publication>""") 1834 t = parser.close() 1835 self.assertEqual(t.find('.//paragraph').text, 1836 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 1837 1838 @unittest.skipIf(sys.gettrace(), "Skips under coverage.") 1839 def test_bug_xmltoolkit63(self): 1840 # Check reference leak. 1841 def xmltoolkit63(): 1842 tree = ET.TreeBuilder() 1843 tree.start("tag", {}) 1844 tree.data("text") 1845 tree.end("tag") 1846 1847 xmltoolkit63() 1848 count = sys.getrefcount(None) 1849 for i in range(1000): 1850 xmltoolkit63() 1851 self.assertEqual(sys.getrefcount(None), count) 1852 1853 def test_bug_200708_newline(self): 1854 # Preserve newlines in attributes. 1855 1856 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 1857 self.assertEqual(ET.tostring(e), 1858 b'<SomeTag text="def _f(): return 3 " />') 1859 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 1860 'def _f():\n return 3\n') 1861 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 1862 b'<SomeTag text="def _f(): return 3 " />') 1863 1864 def test_bug_200708_close(self): 1865 # Test default builder. 1866 parser = ET.XMLParser() # default 1867 parser.feed("<element>some text</element>") 1868 self.assertEqual(parser.close().tag, 'element') 1869 1870 # Test custom builder. 1871 class EchoTarget: 1872 def close(self): 1873 return ET.Element("element") # simulate root 1874 parser = ET.XMLParser(target=EchoTarget()) 1875 parser.feed("<element>some text</element>") 1876 self.assertEqual(parser.close().tag, 'element') 1877 1878 def test_bug_200709_default_namespace(self): 1879 e = ET.Element("{default}elem") 1880 s = ET.SubElement(e, "{default}elem") 1881 self.assertEqual(serialize(e, default_namespace="default"), # 1 1882 '<elem xmlns="default"><elem /></elem>') 1883 1884 e = ET.Element("{default}elem") 1885 s = ET.SubElement(e, "{default}elem") 1886 s = ET.SubElement(e, "{not-default}elem") 1887 self.assertEqual(serialize(e, default_namespace="default"), # 2 1888 '<elem xmlns="default" xmlns:ns1="not-default">' 1889 '<elem />' 1890 '<ns1:elem />' 1891 '</elem>') 1892 1893 e = ET.Element("{default}elem") 1894 s = ET.SubElement(e, "{default}elem") 1895 s = ET.SubElement(e, "elem") # unprefixed name 1896 with self.assertRaises(ValueError) as cm: 1897 serialize(e, default_namespace="default") # 3 1898 self.assertEqual(str(cm.exception), 1899 'cannot use non-qualified names with default_namespace option') 1900 1901 def test_bug_200709_register_namespace(self): 1902 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1903 self.assertEqual(ET.tostring(e), 1904 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 1905 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 1906 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 1907 self.assertEqual(ET.tostring(e), 1908 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 1909 1910 # And the Dublin Core namespace is in the default list: 1911 1912 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 1913 self.assertEqual(ET.tostring(e), 1914 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 1915 1916 def test_bug_200709_element_comment(self): 1917 # Not sure if this can be fixed, really (since the serializer needs 1918 # ET.Comment, not cET.comment). 1919 1920 a = ET.Element('a') 1921 a.append(ET.Comment('foo')) 1922 self.assertEqual(a[0].tag, ET.Comment) 1923 1924 a = ET.Element('a') 1925 a.append(ET.PI('foo')) 1926 self.assertEqual(a[0].tag, ET.PI) 1927 1928 def test_bug_200709_element_insert(self): 1929 a = ET.Element('a') 1930 b = ET.SubElement(a, 'b') 1931 c = ET.SubElement(a, 'c') 1932 d = ET.Element('d') 1933 a.insert(0, d) 1934 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 1935 a.insert(-1, d) 1936 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 1937 1938 def test_bug_200709_iter_comment(self): 1939 a = ET.Element('a') 1940 b = ET.SubElement(a, 'b') 1941 comment_b = ET.Comment("TEST-b") 1942 b.append(comment_b) 1943 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 1944 1945 # -------------------------------------------------------------------- 1946 # reported on bugs.python.org 1947 1948 def test_bug_1534630(self): 1949 bob = ET.TreeBuilder() 1950 e = bob.data("data") 1951 e = bob.start("tag", {}) 1952 e = bob.end("tag") 1953 e = bob.close() 1954 self.assertEqual(serialize(e), '<tag />') 1955 1956 def test_issue6233(self): 1957 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 1958 b'<body>t\xc3\xa3g</body>') 1959 self.assertEqual(ET.tostring(e, 'ascii'), 1960 b"<?xml version='1.0' encoding='ascii'?>\n" 1961 b'<body>tãg</body>') 1962 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1963 b'<body>t\xe3g</body>') 1964 self.assertEqual(ET.tostring(e, 'ascii'), 1965 b"<?xml version='1.0' encoding='ascii'?>\n" 1966 b'<body>tãg</body>') 1967 1968 def test_issue3151(self): 1969 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 1970 self.assertEqual(e.tag, '{${stuff}}localname') 1971 t = ET.ElementTree(e) 1972 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />') 1973 1974 def test_issue6565(self): 1975 elem = ET.XML("<body><tag/></body>") 1976 self.assertEqual(summarize_list(elem), ['tag']) 1977 newelem = ET.XML(SAMPLE_XML) 1978 elem[:] = newelem[:] 1979 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 1980 1981 def test_issue10777(self): 1982 # Registering a namespace twice caused a "dictionary changed size during 1983 # iteration" bug. 1984 1985 ET.register_namespace('test10777', 'http://myuri/') 1986 ET.register_namespace('test10777', 'http://myuri/') 1987 1988 def test_lost_text(self): 1989 # Issue #25902: Borrowed text can disappear 1990 class Text: 1991 def __bool__(self): 1992 e.text = 'changed' 1993 return True 1994 1995 e = ET.Element('tag') 1996 e.text = Text() 1997 i = e.itertext() 1998 t = next(i) 1999 self.assertIsInstance(t, Text) 2000 self.assertIsInstance(e.text, str) 2001 self.assertEqual(e.text, 'changed') 2002 2003 def test_lost_tail(self): 2004 # Issue #25902: Borrowed tail can disappear 2005 class Text: 2006 def __bool__(self): 2007 e[0].tail = 'changed' 2008 return True 2009 2010 e = ET.Element('root') 2011 e.append(ET.Element('tag')) 2012 e[0].tail = Text() 2013 i = e.itertext() 2014 t = next(i) 2015 self.assertIsInstance(t, Text) 2016 self.assertIsInstance(e[0].tail, str) 2017 self.assertEqual(e[0].tail, 'changed') 2018 2019 def test_lost_elem(self): 2020 # Issue #25902: Borrowed element can disappear 2021 class Tag: 2022 def __eq__(self, other): 2023 e[0] = ET.Element('changed') 2024 next(i) 2025 return True 2026 2027 e = ET.Element('root') 2028 e.append(ET.Element(Tag())) 2029 e.append(ET.Element('tag')) 2030 i = e.iter('tag') 2031 try: 2032 t = next(i) 2033 except ValueError: 2034 self.skipTest('generators are not reentrant') 2035 self.assertIsInstance(t.tag, Tag) 2036 self.assertIsInstance(e[0].tag, str) 2037 self.assertEqual(e[0].tag, 'changed') 2038 2039 def check_expat224_utf8_bug(self, text): 2040 xml = b'<a b="%s"/>' % text 2041 root = ET.XML(xml) 2042 self.assertEqual(root.get('b'), text.decode('utf-8')) 2043 2044 def test_expat224_utf8_bug(self): 2045 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. 2046 # Check that Expat 2.2.4 fixed the bug. 2047 # 2048 # Test buffer bounds at odd and even positions. 2049 2050 text = b'\xc3\xa0' * 1024 2051 self.check_expat224_utf8_bug(text) 2052 2053 text = b'x' + b'\xc3\xa0' * 1024 2054 self.check_expat224_utf8_bug(text) 2055 2056 def test_expat224_utf8_bug_file(self): 2057 with open(UTF8_BUG_XMLFILE, 'rb') as fp: 2058 raw = fp.read() 2059 root = ET.fromstring(raw) 2060 xmlattr = root.get('b') 2061 2062 # "Parse" manually the XML file to extract the value of the 'b' 2063 # attribute of the <a b='xxx' /> XML element 2064 text = raw.decode('utf-8').strip() 2065 text = text.replace('\r\n', ' ') 2066 text = text[6:-4] 2067 self.assertEqual(root.get('b'), text) 2068 2069 2070 2071# -------------------------------------------------------------------- 2072 2073 2074class BasicElementTest(ElementTestCase, unittest.TestCase): 2075 2076 def test___init__(self): 2077 tag = "foo" 2078 attrib = { "zix": "wyp" } 2079 2080 element_foo = ET.Element(tag, attrib) 2081 2082 # traits of an element 2083 self.assertIsInstance(element_foo, ET.Element) 2084 self.assertIn("tag", dir(element_foo)) 2085 self.assertIn("attrib", dir(element_foo)) 2086 self.assertIn("text", dir(element_foo)) 2087 self.assertIn("tail", dir(element_foo)) 2088 2089 # string attributes have expected values 2090 self.assertEqual(element_foo.tag, tag) 2091 self.assertIsNone(element_foo.text) 2092 self.assertIsNone(element_foo.tail) 2093 2094 # attrib is a copy 2095 self.assertIsNot(element_foo.attrib, attrib) 2096 self.assertEqual(element_foo.attrib, attrib) 2097 2098 # attrib isn't linked 2099 attrib["bar"] = "baz" 2100 self.assertIsNot(element_foo.attrib, attrib) 2101 self.assertNotEqual(element_foo.attrib, attrib) 2102 2103 def test___copy__(self): 2104 element_foo = ET.Element("foo", { "zix": "wyp" }) 2105 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2106 2107 element_foo2 = copy.copy(element_foo) 2108 2109 # elements are not the same 2110 self.assertIsNot(element_foo2, element_foo) 2111 2112 # string attributes are equal 2113 self.assertEqual(element_foo2.tag, element_foo.tag) 2114 self.assertEqual(element_foo2.text, element_foo.text) 2115 self.assertEqual(element_foo2.tail, element_foo.tail) 2116 2117 # number of children is the same 2118 self.assertEqual(len(element_foo2), len(element_foo)) 2119 2120 # children are the same 2121 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2122 self.assertIs(child1, child2) 2123 2124 # attrib is a copy 2125 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2126 2127 def test___deepcopy__(self): 2128 element_foo = ET.Element("foo", { "zix": "wyp" }) 2129 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2130 2131 element_foo2 = copy.deepcopy(element_foo) 2132 2133 # elements are not the same 2134 self.assertIsNot(element_foo2, element_foo) 2135 2136 # string attributes are equal 2137 self.assertEqual(element_foo2.tag, element_foo.tag) 2138 self.assertEqual(element_foo2.text, element_foo.text) 2139 self.assertEqual(element_foo2.tail, element_foo.tail) 2140 2141 # number of children is the same 2142 self.assertEqual(len(element_foo2), len(element_foo)) 2143 2144 # children are not the same 2145 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2146 self.assertIsNot(child1, child2) 2147 2148 # attrib is a copy 2149 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2150 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2151 2152 # attrib isn't linked 2153 element_foo.attrib["bar"] = "baz" 2154 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2155 self.assertNotEqual(element_foo2.attrib, element_foo.attrib) 2156 2157 def test_augmentation_type_errors(self): 2158 e = ET.Element('joe') 2159 self.assertRaises(TypeError, e.append, 'b') 2160 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo']) 2161 self.assertRaises(TypeError, e.insert, 0, 'foo') 2162 e[:] = [ET.Element('bar')] 2163 with self.assertRaises(TypeError): 2164 e[0] = 'foo' 2165 with self.assertRaises(TypeError): 2166 e[:] = [ET.Element('bar'), 'foo'] 2167 2168 if hasattr(e, '__setstate__'): 2169 state = { 2170 'tag': 'tag', 2171 '_children': [None], # non-Element 2172 'attrib': 'attr', 2173 'tail': 'tail', 2174 'text': 'text', 2175 } 2176 self.assertRaises(TypeError, e.__setstate__, state) 2177 2178 if hasattr(e, '__deepcopy__'): 2179 class E(ET.Element): 2180 def __deepcopy__(self, memo): 2181 return None # non-Element 2182 e[:] = [E('bar')] 2183 self.assertRaises(TypeError, copy.deepcopy, e) 2184 2185 def test_cyclic_gc(self): 2186 class Dummy: 2187 pass 2188 2189 # Test the shortest cycle: d->element->d 2190 d = Dummy() 2191 d.dummyref = ET.Element('joe', attr=d) 2192 wref = weakref.ref(d) 2193 del d 2194 gc_collect() 2195 self.assertIsNone(wref()) 2196 2197 # A longer cycle: d->e->e2->d 2198 e = ET.Element('joe') 2199 d = Dummy() 2200 d.dummyref = e 2201 wref = weakref.ref(d) 2202 e2 = ET.SubElement(e, 'foo', attr=d) 2203 del d, e, e2 2204 gc_collect() 2205 self.assertIsNone(wref()) 2206 2207 # A cycle between Element objects as children of one another 2208 # e1->e2->e3->e1 2209 e1 = ET.Element('e1') 2210 e2 = ET.Element('e2') 2211 e3 = ET.Element('e3') 2212 e3.append(e1) 2213 e2.append(e3) 2214 e1.append(e2) 2215 wref = weakref.ref(e1) 2216 del e1, e2, e3 2217 gc_collect() 2218 self.assertIsNone(wref()) 2219 2220 def test_weakref(self): 2221 flag = False 2222 def wref_cb(w): 2223 nonlocal flag 2224 flag = True 2225 e = ET.Element('e') 2226 wref = weakref.ref(e, wref_cb) 2227 self.assertEqual(wref().tag, 'e') 2228 del e 2229 self.assertEqual(flag, True) 2230 self.assertEqual(wref(), None) 2231 2232 def test_get_keyword_args(self): 2233 e1 = ET.Element('foo' , x=1, y=2, z=3) 2234 self.assertEqual(e1.get('x', default=7), 1) 2235 self.assertEqual(e1.get('w', default=7), 7) 2236 2237 def test_pickle(self): 2238 # issue #16076: the C implementation wasn't pickleable. 2239 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2240 for dumper, loader in product(self.modules, repeat=2): 2241 e = dumper.Element('foo', bar=42) 2242 e.text = "text goes here" 2243 e.tail = "opposite of head" 2244 dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) 2245 e.append(dumper.Element('child')) 2246 e.findall('.//grandchild')[0].set('attr', 'other value') 2247 2248 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', 2249 dumper, loader, proto) 2250 2251 self.assertEqual(e2.tag, 'foo') 2252 self.assertEqual(e2.attrib['bar'], 42) 2253 self.assertEqual(len(e2), 2) 2254 self.assertEqualElements(e, e2) 2255 2256 def test_pickle_issue18997(self): 2257 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2258 for dumper, loader in product(self.modules, repeat=2): 2259 XMLTEXT = """<?xml version="1.0"?> 2260 <group><dogs>4</dogs> 2261 </group>""" 2262 e1 = dumper.fromstring(XMLTEXT) 2263 if hasattr(e1, '__getstate__'): 2264 self.assertEqual(e1.__getstate__()['tag'], 'group') 2265 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', 2266 dumper, loader, proto) 2267 self.assertEqual(e2.tag, 'group') 2268 self.assertEqual(e2[0].tag, 'dogs') 2269 2270 2271class BadElementTest(ElementTestCase, unittest.TestCase): 2272 def test_extend_mutable_list(self): 2273 class X: 2274 @property 2275 def __class__(self): 2276 L[:] = [ET.Element('baz')] 2277 return ET.Element 2278 L = [X()] 2279 e = ET.Element('foo') 2280 try: 2281 e.extend(L) 2282 except TypeError: 2283 pass 2284 2285 class Y(X, ET.Element): 2286 pass 2287 L = [Y('x')] 2288 e = ET.Element('foo') 2289 e.extend(L) 2290 2291 def test_extend_mutable_list2(self): 2292 class X: 2293 @property 2294 def __class__(self): 2295 del L[:] 2296 return ET.Element 2297 L = [X(), ET.Element('baz')] 2298 e = ET.Element('foo') 2299 try: 2300 e.extend(L) 2301 except TypeError: 2302 pass 2303 2304 class Y(X, ET.Element): 2305 pass 2306 L = [Y('bar'), ET.Element('baz')] 2307 e = ET.Element('foo') 2308 e.extend(L) 2309 2310 def test_remove_with_mutating(self): 2311 class X(ET.Element): 2312 def __eq__(self, o): 2313 del e[:] 2314 return False 2315 e = ET.Element('foo') 2316 e.extend([X('bar')]) 2317 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 2318 2319 e = ET.Element('foo') 2320 e.extend([ET.Element('bar')]) 2321 self.assertRaises(ValueError, e.remove, X('baz')) 2322 2323 def test_recursive_repr(self): 2324 # Issue #25455 2325 e = ET.Element('foo') 2326 with swap_attr(e, 'tag', e): 2327 with self.assertRaises(RuntimeError): 2328 repr(e) # Should not crash 2329 2330 def test_element_get_text(self): 2331 # Issue #27863 2332 class X(str): 2333 def __del__(self): 2334 try: 2335 elem.text 2336 except NameError: 2337 pass 2338 2339 b = ET.TreeBuilder() 2340 b.start('tag', {}) 2341 b.data('ABCD') 2342 b.data(X('EFGH')) 2343 b.data('IJKL') 2344 b.end('tag') 2345 2346 elem = b.close() 2347 self.assertEqual(elem.text, 'ABCDEFGHIJKL') 2348 2349 def test_element_get_tail(self): 2350 # Issue #27863 2351 class X(str): 2352 def __del__(self): 2353 try: 2354 elem[0].tail 2355 except NameError: 2356 pass 2357 2358 b = ET.TreeBuilder() 2359 b.start('root', {}) 2360 b.start('tag', {}) 2361 b.end('tag') 2362 b.data('ABCD') 2363 b.data(X('EFGH')) 2364 b.data('IJKL') 2365 b.end('root') 2366 2367 elem = b.close() 2368 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') 2369 2370 def test_subscr(self): 2371 # Issue #27863 2372 class X: 2373 def __index__(self): 2374 del e[:] 2375 return 1 2376 2377 e = ET.Element('elem') 2378 e.append(ET.Element('child')) 2379 e[:X()] # shouldn't crash 2380 2381 e.append(ET.Element('child')) 2382 e[0:10:X()] # shouldn't crash 2383 2384 def test_ass_subscr(self): 2385 # Issue #27863 2386 class X: 2387 def __index__(self): 2388 e[:] = [] 2389 return 1 2390 2391 e = ET.Element('elem') 2392 for _ in range(10): 2393 e.insert(0, ET.Element('child')) 2394 2395 e[0:10:X()] = [] # shouldn't crash 2396 2397 def test_treebuilder_start(self): 2398 # Issue #27863 2399 def element_factory(x, y): 2400 return [] 2401 b = ET.TreeBuilder(element_factory=element_factory) 2402 2403 b.start('tag', {}) 2404 b.data('ABCD') 2405 self.assertRaises(AttributeError, b.start, 'tag2', {}) 2406 del b 2407 gc_collect() 2408 2409 def test_treebuilder_end(self): 2410 # Issue #27863 2411 def element_factory(x, y): 2412 return [] 2413 b = ET.TreeBuilder(element_factory=element_factory) 2414 2415 b.start('tag', {}) 2416 b.data('ABCD') 2417 self.assertRaises(AttributeError, b.end, 'tag') 2418 del b 2419 gc_collect() 2420 2421 2422class MutatingElementPath(str): 2423 def __new__(cls, elem, *args): 2424 self = str.__new__(cls, *args) 2425 self.elem = elem 2426 return self 2427 def __eq__(self, o): 2428 del self.elem[:] 2429 return True 2430MutatingElementPath.__hash__ = str.__hash__ 2431 2432class BadElementPath(str): 2433 def __eq__(self, o): 2434 raise 1/0 2435BadElementPath.__hash__ = str.__hash__ 2436 2437class BadElementPathTest(ElementTestCase, unittest.TestCase): 2438 def setUp(self): 2439 super().setUp() 2440 from xml.etree import ElementPath 2441 self.path_cache = ElementPath._cache 2442 ElementPath._cache = {} 2443 2444 def tearDown(self): 2445 from xml.etree import ElementPath 2446 ElementPath._cache = self.path_cache 2447 super().tearDown() 2448 2449 def test_find_with_mutating(self): 2450 e = ET.Element('foo') 2451 e.extend([ET.Element('bar')]) 2452 e.find(MutatingElementPath(e, 'x')) 2453 2454 def test_find_with_error(self): 2455 e = ET.Element('foo') 2456 e.extend([ET.Element('bar')]) 2457 try: 2458 e.find(BadElementPath('x')) 2459 except ZeroDivisionError: 2460 pass 2461 2462 def test_findtext_with_mutating(self): 2463 e = ET.Element('foo') 2464 e.extend([ET.Element('bar')]) 2465 e.findtext(MutatingElementPath(e, 'x')) 2466 2467 def test_findtext_with_error(self): 2468 e = ET.Element('foo') 2469 e.extend([ET.Element('bar')]) 2470 try: 2471 e.findtext(BadElementPath('x')) 2472 except ZeroDivisionError: 2473 pass 2474 2475 def test_findall_with_mutating(self): 2476 e = ET.Element('foo') 2477 e.extend([ET.Element('bar')]) 2478 e.findall(MutatingElementPath(e, 'x')) 2479 2480 def test_findall_with_error(self): 2481 e = ET.Element('foo') 2482 e.extend([ET.Element('bar')]) 2483 try: 2484 e.findall(BadElementPath('x')) 2485 except ZeroDivisionError: 2486 pass 2487 2488 2489class ElementTreeTypeTest(unittest.TestCase): 2490 def test_istype(self): 2491 self.assertIsInstance(ET.ParseError, type) 2492 self.assertIsInstance(ET.QName, type) 2493 self.assertIsInstance(ET.ElementTree, type) 2494 self.assertIsInstance(ET.Element, type) 2495 self.assertIsInstance(ET.TreeBuilder, type) 2496 self.assertIsInstance(ET.XMLParser, type) 2497 2498 def test_Element_subclass_trivial(self): 2499 class MyElement(ET.Element): 2500 pass 2501 2502 mye = MyElement('foo') 2503 self.assertIsInstance(mye, ET.Element) 2504 self.assertIsInstance(mye, MyElement) 2505 self.assertEqual(mye.tag, 'foo') 2506 2507 # test that attribute assignment works (issue 14849) 2508 mye.text = "joe" 2509 self.assertEqual(mye.text, "joe") 2510 2511 def test_Element_subclass_constructor(self): 2512 class MyElement(ET.Element): 2513 def __init__(self, tag, attrib={}, **extra): 2514 super(MyElement, self).__init__(tag + '__', attrib, **extra) 2515 2516 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 2517 self.assertEqual(mye.tag, 'foo__') 2518 self.assertEqual(sorted(mye.items()), 2519 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 2520 2521 def test_Element_subclass_new_method(self): 2522 class MyElement(ET.Element): 2523 def newmethod(self): 2524 return self.tag 2525 2526 mye = MyElement('joe') 2527 self.assertEqual(mye.newmethod(), 'joe') 2528 2529 def test_Element_subclass_find(self): 2530 class MyElement(ET.Element): 2531 pass 2532 2533 e = ET.Element('foo') 2534 e.text = 'text' 2535 sub = MyElement('bar') 2536 sub.text = 'subtext' 2537 e.append(sub) 2538 self.assertEqual(e.findtext('bar'), 'subtext') 2539 self.assertEqual(e.find('bar').tag, 'bar') 2540 found = list(e.findall('bar')) 2541 self.assertEqual(len(found), 1, found) 2542 self.assertEqual(found[0].tag, 'bar') 2543 2544 2545class ElementFindTest(unittest.TestCase): 2546 def test_find_simple(self): 2547 e = ET.XML(SAMPLE_XML) 2548 self.assertEqual(e.find('tag').tag, 'tag') 2549 self.assertEqual(e.find('section/tag').tag, 'tag') 2550 self.assertEqual(e.find('./tag').tag, 'tag') 2551 2552 e[2] = ET.XML(SAMPLE_SECTION) 2553 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 2554 2555 self.assertEqual(e.findtext('./tag'), 'text') 2556 self.assertEqual(e.findtext('section/tag'), 'subtext') 2557 2558 # section/nexttag is found but has no text 2559 self.assertEqual(e.findtext('section/nexttag'), '') 2560 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 2561 2562 # tog doesn't exist and 'default' kicks in 2563 self.assertIsNone(e.findtext('tog')) 2564 self.assertEqual(e.findtext('tog', 'default'), 'default') 2565 2566 # Issue #16922 2567 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 2568 2569 def test_find_xpath(self): 2570 LINEAR_XML = ''' 2571 <body> 2572 <tag class='a'/> 2573 <tag class='b'/> 2574 <tag class='c'/> 2575 <tag class='d'/> 2576 </body>''' 2577 e = ET.XML(LINEAR_XML) 2578 2579 # Test for numeric indexing and last() 2580 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 2581 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 2582 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 2583 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 2584 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 2585 2586 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]') 2587 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]') 2588 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') 2589 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') 2590 2591 def test_findall(self): 2592 e = ET.XML(SAMPLE_XML) 2593 e[2] = ET.XML(SAMPLE_SECTION) 2594 self.assertEqual(summarize_list(e.findall('.')), ['body']) 2595 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 2596 self.assertEqual(summarize_list(e.findall('tog')), []) 2597 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 2598 self.assertEqual(summarize_list(e.findall('*')), 2599 ['tag', 'tag', 'section']) 2600 self.assertEqual(summarize_list(e.findall('.//tag')), 2601 ['tag'] * 4) 2602 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 2603 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 2604 self.assertEqual(summarize_list(e.findall('section/*')), 2605 ['tag', 'nexttag', 'nextsection']) 2606 self.assertEqual(summarize_list(e.findall('section//*')), 2607 ['tag', 'nexttag', 'nextsection', 'tag']) 2608 self.assertEqual(summarize_list(e.findall('section/.//*')), 2609 ['tag', 'nexttag', 'nextsection', 'tag']) 2610 self.assertEqual(summarize_list(e.findall('*/*')), 2611 ['tag', 'nexttag', 'nextsection']) 2612 self.assertEqual(summarize_list(e.findall('*//*')), 2613 ['tag', 'nexttag', 'nextsection', 'tag']) 2614 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 2615 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 2616 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 2617 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 2618 2619 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 2620 ['tag'] * 3) 2621 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 2622 ['tag']) 2623 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 2624 ['tag'] * 2) 2625 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 2626 ['tag']) 2627 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 2628 ['section']) 2629 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 2630 self.assertEqual(summarize_list(e.findall('../tag')), []) 2631 self.assertEqual(summarize_list(e.findall('section/../tag')), 2632 ['tag'] * 2) 2633 self.assertEqual(e.findall('section//'), e.findall('section//*')) 2634 2635 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")), 2636 ['section']) 2637 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")), 2638 ['section']) 2639 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")), 2640 ['section']) 2641 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2642 ['section']) 2643 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), 2644 ['section']) 2645 2646 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), 2647 ['tag']) 2648 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), 2649 ['tag']) 2650 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')), 2651 ['tag']) 2652 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')), 2653 ['tag']) 2654 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2655 ['tag']) 2656 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")), 2657 []) 2658 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), 2659 []) 2660 2661 # duplicate section => 2x tag matches 2662 e[1] = e[2] 2663 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2664 ['section', 'section']) 2665 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2666 ['tag', 'tag']) 2667 2668 def test_test_find_with_ns(self): 2669 e = ET.XML(SAMPLE_XML_NS) 2670 self.assertEqual(summarize_list(e.findall('tag')), []) 2671 self.assertEqual( 2672 summarize_list(e.findall("{http://effbot.org/ns}tag")), 2673 ['{http://effbot.org/ns}tag'] * 2) 2674 self.assertEqual( 2675 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 2676 ['{http://effbot.org/ns}tag'] * 3) 2677 2678 def test_findall_different_nsmaps(self): 2679 root = ET.XML(''' 2680 <a xmlns:x="X" xmlns:y="Y"> 2681 <x:b><c/></x:b> 2682 <b/> 2683 <c><x:b/><b/></c><y:b/> 2684 </a>''') 2685 nsmap = {'xx': 'X'} 2686 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2687 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2688 nsmap = {'xx': 'Y'} 2689 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) 2690 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2691 nsmap = {'xx': 'X', '': 'Y'} 2692 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2693 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) 2694 2695 def test_findall_wildcard(self): 2696 root = ET.XML(''' 2697 <a xmlns:x="X" xmlns:y="Y"> 2698 <x:b><c/></x:b> 2699 <b/> 2700 <c><x:b/><b/></c><y:b/> 2701 </a>''') 2702 root.append(ET.Comment('test')) 2703 2704 self.assertEqual(summarize_list(root.findall("{*}b")), 2705 ['{X}b', 'b', '{Y}b']) 2706 self.assertEqual(summarize_list(root.findall("{*}c")), 2707 ['c']) 2708 self.assertEqual(summarize_list(root.findall("{X}*")), 2709 ['{X}b']) 2710 self.assertEqual(summarize_list(root.findall("{Y}*")), 2711 ['{Y}b']) 2712 self.assertEqual(summarize_list(root.findall("{}*")), 2713 ['b', 'c']) 2714 self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency 2715 ['b']) 2716 self.assertEqual(summarize_list(root.findall("{}b")), 2717 summarize_list(root.findall("b"))) 2718 self.assertEqual(summarize_list(root.findall("{*}*")), 2719 ['{X}b', 'b', 'c', '{Y}b']) 2720 # This is an unfortunate difference, but that's how find('*') works. 2721 self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]), 2722 summarize_list(root.findall("*"))) 2723 2724 self.assertEqual(summarize_list(root.findall(".//{*}b")), 2725 ['{X}b', 'b', '{X}b', 'b', '{Y}b']) 2726 self.assertEqual(summarize_list(root.findall(".//{*}c")), 2727 ['c', 'c']) 2728 self.assertEqual(summarize_list(root.findall(".//{X}*")), 2729 ['{X}b', '{X}b']) 2730 self.assertEqual(summarize_list(root.findall(".//{Y}*")), 2731 ['{Y}b']) 2732 self.assertEqual(summarize_list(root.findall(".//{}*")), 2733 ['c', 'b', 'c', 'b']) 2734 self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency 2735 ['b', 'b']) 2736 self.assertEqual(summarize_list(root.findall(".//{}b")), 2737 summarize_list(root.findall(".//b"))) 2738 2739 def test_bad_find(self): 2740 e = ET.XML(SAMPLE_XML) 2741 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): 2742 e.findall('/tag') 2743 2744 def test_find_through_ElementTree(self): 2745 e = ET.XML(SAMPLE_XML) 2746 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 2747 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 2748 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')), 2749 ['tag'] * 2) 2750 # this produces a warning 2751 msg = ("This search is broken in 1.3 and earlier, and will be fixed " 2752 "in a future version. If you rely on the current behaviour, " 2753 "change it to '.+'") 2754 with self.assertWarnsRegex(FutureWarning, msg): 2755 it = ET.ElementTree(e).findall('//tag') 2756 self.assertEqual(summarize_list(it), ['tag'] * 3) 2757 2758 2759class ElementIterTest(unittest.TestCase): 2760 def _ilist(self, elem, tag=None): 2761 return summarize_list(elem.iter(tag)) 2762 2763 def test_basic(self): 2764 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 2765 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 2766 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 2767 self.assertEqual(next(doc.iter()).tag, 'html') 2768 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 2769 self.assertEqual(''.join(doc.find('body').itertext()), 2770 'this is a paragraph.') 2771 self.assertEqual(next(doc.itertext()), 'this is a ') 2772 2773 # iterparse should return an iterator 2774 sourcefile = serialize(doc, to_string=False) 2775 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 2776 2777 # With an explicit parser too (issue #9708) 2778 sourcefile = serialize(doc, to_string=False) 2779 parser = ET.XMLParser(target=ET.TreeBuilder()) 2780 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 2781 'end') 2782 2783 tree = ET.ElementTree(None) 2784 self.assertRaises(AttributeError, tree.iter) 2785 2786 # Issue #16913 2787 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 2788 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 2789 2790 def test_corners(self): 2791 # single root, no subelements 2792 a = ET.Element('a') 2793 self.assertEqual(self._ilist(a), ['a']) 2794 2795 # one child 2796 b = ET.SubElement(a, 'b') 2797 self.assertEqual(self._ilist(a), ['a', 'b']) 2798 2799 # one child and one grandchild 2800 c = ET.SubElement(b, 'c') 2801 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 2802 2803 # two children, only first with grandchild 2804 d = ET.SubElement(a, 'd') 2805 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 2806 2807 # replace first child by second 2808 a[0] = a[1] 2809 del a[1] 2810 self.assertEqual(self._ilist(a), ['a', 'd']) 2811 2812 def test_iter_by_tag(self): 2813 doc = ET.XML(''' 2814 <document> 2815 <house> 2816 <room>bedroom1</room> 2817 <room>bedroom2</room> 2818 </house> 2819 <shed>nothing here 2820 </shed> 2821 <house> 2822 <room>bedroom8</room> 2823 </house> 2824 </document>''') 2825 2826 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 2827 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 2828 2829 # test that iter also accepts 'tag' as a keyword arg 2830 self.assertEqual( 2831 summarize_list(doc.iter(tag='room')), 2832 ['room'] * 3) 2833 2834 # make sure both tag=None and tag='*' return all tags 2835 all_tags = ['document', 'house', 'room', 'room', 2836 'shed', 'house', 'room'] 2837 self.assertEqual(summarize_list(doc.iter()), all_tags) 2838 self.assertEqual(self._ilist(doc), all_tags) 2839 self.assertEqual(self._ilist(doc, '*'), all_tags) 2840 2841 # Element.getiterator() is deprecated. 2842 @checkwarnings(("This method will be removed in future versions. " 2843 "Use .+ instead.", DeprecationWarning)) 2844 def test_getiterator(self): 2845 doc = ET.XML(''' 2846 <document> 2847 <house> 2848 <room>bedroom1</room> 2849 <room>bedroom2</room> 2850 </house> 2851 <shed>nothing here 2852 </shed> 2853 <house> 2854 <room>bedroom8</room> 2855 </house> 2856 </document>''') 2857 2858 self.assertEqual(summarize_list(doc.getiterator('room')), 2859 ['room'] * 3) 2860 self.assertEqual(summarize_list(doc.getiterator('house')), 2861 ['house'] * 2) 2862 2863 # test that getiterator also accepts 'tag' as a keyword arg 2864 self.assertEqual( 2865 summarize_list(doc.getiterator(tag='room')), 2866 ['room'] * 3) 2867 2868 # make sure both tag=None and tag='*' return all tags 2869 all_tags = ['document', 'house', 'room', 'room', 2870 'shed', 'house', 'room'] 2871 self.assertEqual(summarize_list(doc.getiterator()), all_tags) 2872 self.assertEqual(summarize_list(doc.getiterator(None)), all_tags) 2873 self.assertEqual(summarize_list(doc.getiterator('*')), all_tags) 2874 2875 def test_copy(self): 2876 a = ET.Element('a') 2877 it = a.iter() 2878 with self.assertRaises(TypeError): 2879 copy.copy(it) 2880 2881 def test_pickle(self): 2882 a = ET.Element('a') 2883 it = a.iter() 2884 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 2885 with self.assertRaises((TypeError, pickle.PicklingError)): 2886 pickle.dumps(it, proto) 2887 2888 2889class TreeBuilderTest(unittest.TestCase): 2890 sample1 = ('<!DOCTYPE html PUBLIC' 2891 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 2892 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 2893 '<html>text<div>subtext</div>tail</html>') 2894 2895 sample2 = '''<toplevel>sometext</toplevel>''' 2896 2897 def _check_sample1_element(self, e): 2898 self.assertEqual(e.tag, 'html') 2899 self.assertEqual(e.text, 'text') 2900 self.assertEqual(e.tail, None) 2901 self.assertEqual(e.attrib, {}) 2902 children = list(e) 2903 self.assertEqual(len(children), 1) 2904 child = children[0] 2905 self.assertEqual(child.tag, 'div') 2906 self.assertEqual(child.text, 'subtext') 2907 self.assertEqual(child.tail, 'tail') 2908 self.assertEqual(child.attrib, {}) 2909 2910 def test_dummy_builder(self): 2911 class BaseDummyBuilder: 2912 def close(self): 2913 return 42 2914 2915 class DummyBuilder(BaseDummyBuilder): 2916 data = start = end = lambda *a: None 2917 2918 parser = ET.XMLParser(target=DummyBuilder()) 2919 parser.feed(self.sample1) 2920 self.assertEqual(parser.close(), 42) 2921 2922 parser = ET.XMLParser(target=BaseDummyBuilder()) 2923 parser.feed(self.sample1) 2924 self.assertEqual(parser.close(), 42) 2925 2926 parser = ET.XMLParser(target=object()) 2927 parser.feed(self.sample1) 2928 self.assertIsNone(parser.close()) 2929 2930 def test_treebuilder_comment(self): 2931 b = ET.TreeBuilder() 2932 self.assertEqual(b.comment('ctext').tag, ET.Comment) 2933 self.assertEqual(b.comment('ctext').text, 'ctext') 2934 2935 b = ET.TreeBuilder(comment_factory=ET.Comment) 2936 self.assertEqual(b.comment('ctext').tag, ET.Comment) 2937 self.assertEqual(b.comment('ctext').text, 'ctext') 2938 2939 b = ET.TreeBuilder(comment_factory=len) 2940 self.assertEqual(b.comment('ctext'), len('ctext')) 2941 2942 def test_treebuilder_pi(self): 2943 b = ET.TreeBuilder() 2944 self.assertEqual(b.pi('target', None).tag, ET.PI) 2945 self.assertEqual(b.pi('target', None).text, 'target') 2946 2947 b = ET.TreeBuilder(pi_factory=ET.PI) 2948 self.assertEqual(b.pi('target').tag, ET.PI) 2949 self.assertEqual(b.pi('target').text, "target") 2950 self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI) 2951 self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ") 2952 2953 b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text)) 2954 self.assertEqual(b.pi('target'), (len('target'), None)) 2955 self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text ')) 2956 2957 def test_late_tail(self): 2958 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 2959 class TreeBuilderSubclass(ET.TreeBuilder): 2960 pass 2961 2962 xml = "<a>text<!-- comment -->tail</a>" 2963 a = ET.fromstring(xml) 2964 self.assertEqual(a.text, "texttail") 2965 2966 parser = ET.XMLParser(target=TreeBuilderSubclass()) 2967 parser.feed(xml) 2968 a = parser.close() 2969 self.assertEqual(a.text, "texttail") 2970 2971 xml = "<a>text<?pi data?>tail</a>" 2972 a = ET.fromstring(xml) 2973 self.assertEqual(a.text, "texttail") 2974 2975 xml = "<a>text<?pi data?>tail</a>" 2976 parser = ET.XMLParser(target=TreeBuilderSubclass()) 2977 parser.feed(xml) 2978 a = parser.close() 2979 self.assertEqual(a.text, "texttail") 2980 2981 def test_late_tail_mix_pi_comments(self): 2982 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 2983 # Test appending tails to comments/pis. 2984 class TreeBuilderSubclass(ET.TreeBuilder): 2985 pass 2986 2987 xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>" 2988 parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) 2989 parser.feed(xml) 2990 a = parser.close() 2991 self.assertEqual(a[0].text, ' comment ') 2992 self.assertEqual(a[0].tail, '\ntail') 2993 self.assertEqual(a.text, "text ") 2994 2995 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True)) 2996 parser.feed(xml) 2997 a = parser.close() 2998 self.assertEqual(a[0].text, ' comment ') 2999 self.assertEqual(a[0].tail, '\ntail') 3000 self.assertEqual(a.text, "text ") 3001 3002 xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>" 3003 parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True)) 3004 parser.feed(xml) 3005 a = parser.close() 3006 self.assertEqual(a[0].text, 'pi data') 3007 self.assertEqual(a[0].tail, 'tail') 3008 self.assertEqual(a.text, "text\n") 3009 3010 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True)) 3011 parser.feed(xml) 3012 a = parser.close() 3013 self.assertEqual(a[0].text, 'pi data') 3014 self.assertEqual(a[0].tail, 'tail') 3015 self.assertEqual(a.text, "text\n") 3016 3017 def test_treebuilder_elementfactory_none(self): 3018 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 3019 parser.feed(self.sample1) 3020 e = parser.close() 3021 self._check_sample1_element(e) 3022 3023 def test_subclass(self): 3024 class MyTreeBuilder(ET.TreeBuilder): 3025 def foobar(self, x): 3026 return x * 2 3027 3028 tb = MyTreeBuilder() 3029 self.assertEqual(tb.foobar(10), 20) 3030 3031 parser = ET.XMLParser(target=tb) 3032 parser.feed(self.sample1) 3033 3034 e = parser.close() 3035 self._check_sample1_element(e) 3036 3037 def test_subclass_comment_pi(self): 3038 class MyTreeBuilder(ET.TreeBuilder): 3039 def foobar(self, x): 3040 return x * 2 3041 3042 tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI) 3043 self.assertEqual(tb.foobar(10), 20) 3044 3045 parser = ET.XMLParser(target=tb) 3046 parser.feed(self.sample1) 3047 parser.feed('<!-- a comment--><?and a pi?>') 3048 3049 e = parser.close() 3050 self._check_sample1_element(e) 3051 3052 def test_element_factory(self): 3053 lst = [] 3054 def myfactory(tag, attrib): 3055 nonlocal lst 3056 lst.append(tag) 3057 return ET.Element(tag, attrib) 3058 3059 tb = ET.TreeBuilder(element_factory=myfactory) 3060 parser = ET.XMLParser(target=tb) 3061 parser.feed(self.sample2) 3062 parser.close() 3063 3064 self.assertEqual(lst, ['toplevel']) 3065 3066 def _check_element_factory_class(self, cls): 3067 tb = ET.TreeBuilder(element_factory=cls) 3068 3069 parser = ET.XMLParser(target=tb) 3070 parser.feed(self.sample1) 3071 e = parser.close() 3072 self.assertIsInstance(e, cls) 3073 self._check_sample1_element(e) 3074 3075 def test_element_factory_subclass(self): 3076 class MyElement(ET.Element): 3077 pass 3078 self._check_element_factory_class(MyElement) 3079 3080 def test_element_factory_pure_python_subclass(self): 3081 # Mimick SimpleTAL's behaviour (issue #16089): both versions of 3082 # TreeBuilder should be able to cope with a subclass of the 3083 # pure Python Element class. 3084 base = ET._Element_Py 3085 # Not from a C extension 3086 self.assertEqual(base.__module__, 'xml.etree.ElementTree') 3087 # Force some multiple inheritance with a C class to make things 3088 # more interesting. 3089 class MyElement(base, ValueError): 3090 pass 3091 self._check_element_factory_class(MyElement) 3092 3093 def test_doctype(self): 3094 class DoctypeParser: 3095 _doctype = None 3096 3097 def doctype(self, name, pubid, system): 3098 self._doctype = (name, pubid, system) 3099 3100 def close(self): 3101 return self._doctype 3102 3103 parser = ET.XMLParser(target=DoctypeParser()) 3104 parser.feed(self.sample1) 3105 3106 self.assertEqual(parser.close(), 3107 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3108 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3109 3110 def test_builder_lookup_errors(self): 3111 class RaisingBuilder: 3112 def __init__(self, raise_in=None, what=ValueError): 3113 self.raise_in = raise_in 3114 self.what = what 3115 3116 def __getattr__(self, name): 3117 if name == self.raise_in: 3118 raise self.what(self.raise_in) 3119 def handle(*args): 3120 pass 3121 return handle 3122 3123 ET.XMLParser(target=RaisingBuilder()) 3124 # cET also checks for 'close' and 'doctype', PyET does it only at need 3125 for event in ('start', 'data', 'end', 'comment', 'pi'): 3126 with self.assertRaisesRegex(ValueError, event): 3127 ET.XMLParser(target=RaisingBuilder(event)) 3128 3129 ET.XMLParser(target=RaisingBuilder(what=AttributeError)) 3130 for event in ('start', 'data', 'end', 'comment', 'pi'): 3131 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError)) 3132 parser.feed(self.sample1) 3133 self.assertIsNone(parser.close()) 3134 3135 3136class XMLParserTest(unittest.TestCase): 3137 sample1 = b'<file><line>22</line></file>' 3138 sample2 = (b'<!DOCTYPE html PUBLIC' 3139 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3140 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3141 b'<html>text</html>') 3142 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n' 3143 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>') 3144 3145 def _check_sample_element(self, e): 3146 self.assertEqual(e.tag, 'file') 3147 self.assertEqual(e[0].tag, 'line') 3148 self.assertEqual(e[0].text, '22') 3149 3150 def test_constructor_args(self): 3151 parser2 = ET.XMLParser(encoding='utf-8', 3152 target=ET.TreeBuilder()) 3153 parser2.feed(self.sample1) 3154 self._check_sample_element(parser2.close()) 3155 3156 def test_subclass(self): 3157 class MyParser(ET.XMLParser): 3158 pass 3159 parser = MyParser() 3160 parser.feed(self.sample1) 3161 self._check_sample_element(parser.close()) 3162 3163 def test_doctype_warning(self): 3164 with warnings.catch_warnings(): 3165 warnings.simplefilter('error', DeprecationWarning) 3166 parser = ET.XMLParser() 3167 parser.feed(self.sample2) 3168 parser.close() 3169 3170 def test_subclass_doctype(self): 3171 _doctype = None 3172 class MyParserWithDoctype(ET.XMLParser): 3173 def doctype(self, *args, **kwargs): 3174 nonlocal _doctype 3175 _doctype = (args, kwargs) 3176 3177 parser = MyParserWithDoctype() 3178 with self.assertWarnsRegex(RuntimeWarning, 'doctype'): 3179 parser.feed(self.sample2) 3180 parser.close() 3181 self.assertIsNone(_doctype) 3182 3183 _doctype = _doctype2 = None 3184 with warnings.catch_warnings(): 3185 warnings.simplefilter('error', DeprecationWarning) 3186 warnings.simplefilter('error', RuntimeWarning) 3187 class DoctypeParser: 3188 def doctype(self, name, pubid, system): 3189 nonlocal _doctype2 3190 _doctype2 = (name, pubid, system) 3191 3192 parser = MyParserWithDoctype(target=DoctypeParser()) 3193 parser.feed(self.sample2) 3194 parser.close() 3195 self.assertIsNone(_doctype) 3196 self.assertEqual(_doctype2, 3197 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3198 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3199 3200 def test_inherited_doctype(self): 3201 '''Ensure that ordinary usage is not deprecated (Issue 19176)''' 3202 with warnings.catch_warnings(): 3203 warnings.simplefilter('error', DeprecationWarning) 3204 warnings.simplefilter('error', RuntimeWarning) 3205 class MyParserWithoutDoctype(ET.XMLParser): 3206 pass 3207 parser = MyParserWithoutDoctype() 3208 parser.feed(self.sample2) 3209 parser.close() 3210 3211 def test_parse_string(self): 3212 parser = ET.XMLParser(target=ET.TreeBuilder()) 3213 parser.feed(self.sample3) 3214 e = parser.close() 3215 self.assertEqual(e.tag, 'money') 3216 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') 3217 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') 3218 3219 3220class NamespaceParseTest(unittest.TestCase): 3221 def test_find_with_namespace(self): 3222 nsmap = {'h': 'hello', 'f': 'foo'} 3223 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 3224 3225 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 3226 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 3227 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 3228 3229 3230class ElementSlicingTest(unittest.TestCase): 3231 def _elem_tags(self, elemlist): 3232 return [e.tag for e in elemlist] 3233 3234 def _subelem_tags(self, elem): 3235 return self._elem_tags(list(elem)) 3236 3237 def _make_elem_with_children(self, numchildren): 3238 """Create an Element with a tag 'a', with the given amount of children 3239 named 'a0', 'a1' ... and so on. 3240 3241 """ 3242 e = ET.Element('a') 3243 for i in range(numchildren): 3244 ET.SubElement(e, 'a%s' % i) 3245 return e 3246 3247 def test_getslice_single_index(self): 3248 e = self._make_elem_with_children(10) 3249 3250 self.assertEqual(e[1].tag, 'a1') 3251 self.assertEqual(e[-2].tag, 'a8') 3252 3253 self.assertRaises(IndexError, lambda: e[12]) 3254 self.assertRaises(IndexError, lambda: e[-12]) 3255 3256 def test_getslice_range(self): 3257 e = self._make_elem_with_children(6) 3258 3259 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 3260 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 3261 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 3262 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 3263 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 3264 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 3265 3266 def test_getslice_steps(self): 3267 e = self._make_elem_with_children(10) 3268 3269 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 3270 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 3271 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 3272 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 3273 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 3274 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 3275 3276 def test_getslice_negative_steps(self): 3277 e = self._make_elem_with_children(4) 3278 3279 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 3280 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 3281 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 3282 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 3283 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 3284 3285 def test_delslice(self): 3286 e = self._make_elem_with_children(4) 3287 del e[0:2] 3288 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 3289 3290 e = self._make_elem_with_children(4) 3291 del e[0:] 3292 self.assertEqual(self._subelem_tags(e), []) 3293 3294 e = self._make_elem_with_children(4) 3295 del e[::-1] 3296 self.assertEqual(self._subelem_tags(e), []) 3297 3298 e = self._make_elem_with_children(4) 3299 del e[::-2] 3300 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3301 3302 e = self._make_elem_with_children(4) 3303 del e[1::2] 3304 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3305 3306 e = self._make_elem_with_children(2) 3307 del e[::2] 3308 self.assertEqual(self._subelem_tags(e), ['a1']) 3309 3310 def test_setslice_single_index(self): 3311 e = self._make_elem_with_children(4) 3312 e[1] = ET.Element('b') 3313 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3314 3315 e[-2] = ET.Element('c') 3316 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3317 3318 with self.assertRaises(IndexError): 3319 e[5] = ET.Element('d') 3320 with self.assertRaises(IndexError): 3321 e[-5] = ET.Element('d') 3322 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3323 3324 def test_setslice_range(self): 3325 e = self._make_elem_with_children(4) 3326 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 3327 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 3328 3329 e = self._make_elem_with_children(4) 3330 e[1:3] = [ET.Element('b')] 3331 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 3332 3333 e = self._make_elem_with_children(4) 3334 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 3335 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 3336 3337 def test_setslice_steps(self): 3338 e = self._make_elem_with_children(6) 3339 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 3340 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 3341 3342 e = self._make_elem_with_children(6) 3343 with self.assertRaises(ValueError): 3344 e[1:5:2] = [ET.Element('b')] 3345 with self.assertRaises(ValueError): 3346 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 3347 with self.assertRaises(ValueError): 3348 e[1:5:2] = [] 3349 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 3350 3351 e = self._make_elem_with_children(4) 3352 e[1::sys.maxsize] = [ET.Element('b')] 3353 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3354 e[1::sys.maxsize<<64] = [ET.Element('c')] 3355 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3356 3357 def test_setslice_negative_steps(self): 3358 e = self._make_elem_with_children(4) 3359 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 3360 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 3361 3362 e = self._make_elem_with_children(4) 3363 with self.assertRaises(ValueError): 3364 e[2:0:-1] = [ET.Element('b')] 3365 with self.assertRaises(ValueError): 3366 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 3367 with self.assertRaises(ValueError): 3368 e[2:0:-1] = [] 3369 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 3370 3371 e = self._make_elem_with_children(4) 3372 e[1::-sys.maxsize] = [ET.Element('b')] 3373 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3374 e[1::-sys.maxsize-1] = [ET.Element('c')] 3375 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3376 e[1::-sys.maxsize<<64] = [ET.Element('d')] 3377 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 3378 3379 3380class IOTest(unittest.TestCase): 3381 def test_encoding(self): 3382 # Test encoding issues. 3383 elem = ET.Element("tag") 3384 elem.text = "abc" 3385 self.assertEqual(serialize(elem), '<tag>abc</tag>') 3386 for enc in ("utf-8", "us-ascii"): 3387 with self.subTest(enc): 3388 self.assertEqual(serialize(elem, encoding=enc), 3389 b'<tag>abc</tag>') 3390 self.assertEqual(serialize(elem, encoding=enc.upper()), 3391 b'<tag>abc</tag>') 3392 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3393 with self.subTest(enc): 3394 self.assertEqual(serialize(elem, encoding=enc), 3395 ("<?xml version='1.0' encoding='%s'?>\n" 3396 "<tag>abc</tag>" % enc).encode(enc)) 3397 upper = enc.upper() 3398 self.assertEqual(serialize(elem, encoding=upper), 3399 ("<?xml version='1.0' encoding='%s'?>\n" 3400 "<tag>abc</tag>" % upper).encode(enc)) 3401 3402 elem = ET.Element("tag") 3403 elem.text = "<&\"\'>" 3404 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 3405 self.assertEqual(serialize(elem, encoding="utf-8"), 3406 b'<tag><&"\'></tag>') 3407 self.assertEqual(serialize(elem, encoding="us-ascii"), 3408 b'<tag><&"\'></tag>') 3409 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3410 self.assertEqual(serialize(elem, encoding=enc), 3411 ("<?xml version='1.0' encoding='%s'?>\n" 3412 "<tag><&\"'></tag>" % enc).encode(enc)) 3413 3414 elem = ET.Element("tag") 3415 elem.attrib["key"] = "<&\"\'>" 3416 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 3417 self.assertEqual(serialize(elem, encoding="utf-8"), 3418 b'<tag key="<&"\'>" />') 3419 self.assertEqual(serialize(elem, encoding="us-ascii"), 3420 b'<tag key="<&"\'>" />') 3421 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3422 self.assertEqual(serialize(elem, encoding=enc), 3423 ("<?xml version='1.0' encoding='%s'?>\n" 3424 "<tag key=\"<&"'>\" />" % enc).encode(enc)) 3425 3426 elem = ET.Element("tag") 3427 elem.text = '\xe5\xf6\xf6<>' 3428 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>') 3429 self.assertEqual(serialize(elem, encoding="utf-8"), 3430 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 3431 self.assertEqual(serialize(elem, encoding="us-ascii"), 3432 b'<tag>åöö<></tag>') 3433 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3434 self.assertEqual(serialize(elem, encoding=enc), 3435 ("<?xml version='1.0' encoding='%s'?>\n" 3436 "<tag>åöö<></tag>" % enc).encode(enc)) 3437 3438 elem = ET.Element("tag") 3439 elem.attrib["key"] = '\xe5\xf6\xf6<>' 3440 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />') 3441 self.assertEqual(serialize(elem, encoding="utf-8"), 3442 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 3443 self.assertEqual(serialize(elem, encoding="us-ascii"), 3444 b'<tag key="åöö<>" />') 3445 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"): 3446 self.assertEqual(serialize(elem, encoding=enc), 3447 ("<?xml version='1.0' encoding='%s'?>\n" 3448 "<tag key=\"åöö<>\" />" % enc).encode(enc)) 3449 3450 def test_write_to_filename(self): 3451 self.addCleanup(support.unlink, TESTFN) 3452 tree = ET.ElementTree(ET.XML('''<site />''')) 3453 tree.write(TESTFN) 3454 with open(TESTFN, 'rb') as f: 3455 self.assertEqual(f.read(), b'''<site />''') 3456 3457 def test_write_to_text_file(self): 3458 self.addCleanup(support.unlink, TESTFN) 3459 tree = ET.ElementTree(ET.XML('''<site />''')) 3460 with open(TESTFN, 'w', encoding='utf-8') as f: 3461 tree.write(f, encoding='unicode') 3462 self.assertFalse(f.closed) 3463 with open(TESTFN, 'rb') as f: 3464 self.assertEqual(f.read(), b'''<site />''') 3465 3466 def test_write_to_binary_file(self): 3467 self.addCleanup(support.unlink, TESTFN) 3468 tree = ET.ElementTree(ET.XML('''<site />''')) 3469 with open(TESTFN, 'wb') as f: 3470 tree.write(f) 3471 self.assertFalse(f.closed) 3472 with open(TESTFN, 'rb') as f: 3473 self.assertEqual(f.read(), b'''<site />''') 3474 3475 def test_write_to_binary_file_with_bom(self): 3476 self.addCleanup(support.unlink, TESTFN) 3477 tree = ET.ElementTree(ET.XML('''<site />''')) 3478 # test BOM writing to buffered file 3479 with open(TESTFN, 'wb') as f: 3480 tree.write(f, encoding='utf-16') 3481 self.assertFalse(f.closed) 3482 with open(TESTFN, 'rb') as f: 3483 self.assertEqual(f.read(), 3484 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3485 '''<site />'''.encode("utf-16")) 3486 # test BOM writing to non-buffered file 3487 with open(TESTFN, 'wb', buffering=0) as f: 3488 tree.write(f, encoding='utf-16') 3489 self.assertFalse(f.closed) 3490 with open(TESTFN, 'rb') as f: 3491 self.assertEqual(f.read(), 3492 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3493 '''<site />'''.encode("utf-16")) 3494 3495 def test_read_from_stringio(self): 3496 tree = ET.ElementTree() 3497 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3498 tree.parse(stream) 3499 self.assertEqual(tree.getroot().tag, 'site') 3500 3501 def test_write_to_stringio(self): 3502 tree = ET.ElementTree(ET.XML('''<site />''')) 3503 stream = io.StringIO() 3504 tree.write(stream, encoding='unicode') 3505 self.assertEqual(stream.getvalue(), '''<site />''') 3506 3507 def test_read_from_bytesio(self): 3508 tree = ET.ElementTree() 3509 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3510 tree.parse(raw) 3511 self.assertEqual(tree.getroot().tag, 'site') 3512 3513 def test_write_to_bytesio(self): 3514 tree = ET.ElementTree(ET.XML('''<site />''')) 3515 raw = io.BytesIO() 3516 tree.write(raw) 3517 self.assertEqual(raw.getvalue(), b'''<site />''') 3518 3519 class dummy: 3520 pass 3521 3522 def test_read_from_user_text_reader(self): 3523 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3524 reader = self.dummy() 3525 reader.read = stream.read 3526 tree = ET.ElementTree() 3527 tree.parse(reader) 3528 self.assertEqual(tree.getroot().tag, 'site') 3529 3530 def test_write_to_user_text_writer(self): 3531 tree = ET.ElementTree(ET.XML('''<site />''')) 3532 stream = io.StringIO() 3533 writer = self.dummy() 3534 writer.write = stream.write 3535 tree.write(writer, encoding='unicode') 3536 self.assertEqual(stream.getvalue(), '''<site />''') 3537 3538 def test_read_from_user_binary_reader(self): 3539 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3540 reader = self.dummy() 3541 reader.read = raw.read 3542 tree = ET.ElementTree() 3543 tree.parse(reader) 3544 self.assertEqual(tree.getroot().tag, 'site') 3545 tree = ET.ElementTree() 3546 3547 def test_write_to_user_binary_writer(self): 3548 tree = ET.ElementTree(ET.XML('''<site />''')) 3549 raw = io.BytesIO() 3550 writer = self.dummy() 3551 writer.write = raw.write 3552 tree.write(writer) 3553 self.assertEqual(raw.getvalue(), b'''<site />''') 3554 3555 def test_write_to_user_binary_writer_with_bom(self): 3556 tree = ET.ElementTree(ET.XML('''<site />''')) 3557 raw = io.BytesIO() 3558 writer = self.dummy() 3559 writer.write = raw.write 3560 writer.seekable = lambda: True 3561 writer.tell = raw.tell 3562 tree.write(writer, encoding="utf-16") 3563 self.assertEqual(raw.getvalue(), 3564 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3565 '''<site />'''.encode("utf-16")) 3566 3567 def test_tostringlist_invariant(self): 3568 root = ET.fromstring('<tag>foo</tag>') 3569 self.assertEqual( 3570 ET.tostring(root, 'unicode'), 3571 ''.join(ET.tostringlist(root, 'unicode'))) 3572 self.assertEqual( 3573 ET.tostring(root, 'utf-16'), 3574 b''.join(ET.tostringlist(root, 'utf-16'))) 3575 3576 def test_short_empty_elements(self): 3577 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>') 3578 self.assertEqual( 3579 ET.tostring(root, 'unicode'), 3580 '<tag>a<x />b<y />c</tag>') 3581 self.assertEqual( 3582 ET.tostring(root, 'unicode', short_empty_elements=True), 3583 '<tag>a<x />b<y />c</tag>') 3584 self.assertEqual( 3585 ET.tostring(root, 'unicode', short_empty_elements=False), 3586 '<tag>a<x></x>b<y></y>c</tag>') 3587 3588 3589class ParseErrorTest(unittest.TestCase): 3590 def test_subclass(self): 3591 self.assertIsInstance(ET.ParseError(), SyntaxError) 3592 3593 def _get_error(self, s): 3594 try: 3595 ET.fromstring(s) 3596 except ET.ParseError as e: 3597 return e 3598 3599 def test_error_position(self): 3600 self.assertEqual(self._get_error('foo').position, (1, 0)) 3601 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 3602 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 3603 3604 def test_error_code(self): 3605 import xml.parsers.expat.errors as ERRORS 3606 self.assertEqual(self._get_error('foo').code, 3607 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) 3608 3609 3610class KeywordArgsTest(unittest.TestCase): 3611 # Test various issues with keyword arguments passed to ET.Element 3612 # constructor and methods 3613 def test_issue14818(self): 3614 x = ET.XML("<a>foo</a>") 3615 self.assertEqual(x.find('a', None), 3616 x.find(path='a', namespaces=None)) 3617 self.assertEqual(x.findtext('a', None, None), 3618 x.findtext(path='a', default=None, namespaces=None)) 3619 self.assertEqual(x.findall('a', None), 3620 x.findall(path='a', namespaces=None)) 3621 self.assertEqual(list(x.iterfind('a', None)), 3622 list(x.iterfind(path='a', namespaces=None))) 3623 3624 self.assertEqual(ET.Element('a').attrib, {}) 3625 elements = [ 3626 ET.Element('a', dict(href="#", id="foo")), 3627 ET.Element('a', attrib=dict(href="#", id="foo")), 3628 ET.Element('a', dict(href="#"), id="foo"), 3629 ET.Element('a', href="#", id="foo"), 3630 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 3631 ] 3632 for e in elements: 3633 self.assertEqual(e.tag, 'a') 3634 self.assertEqual(e.attrib, dict(href="#", id="foo")) 3635 3636 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 3637 self.assertEqual(e2.attrib['key1'], 'value1') 3638 3639 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3640 ET.Element('a', "I'm not a dict") 3641 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3642 ET.Element('a', attrib="I'm not a dict") 3643 3644# -------------------------------------------------------------------- 3645 3646class NoAcceleratorTest(unittest.TestCase): 3647 def setUp(self): 3648 if not pyET: 3649 raise unittest.SkipTest('only for the Python version') 3650 3651 # Test that the C accelerator was not imported for pyET 3652 def test_correct_import_pyET(self): 3653 # The type of methods defined in Python code is types.FunctionType, 3654 # while the type of methods defined inside _elementtree is 3655 # <class 'wrapper_descriptor'> 3656 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 3657 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 3658 3659 3660# -------------------------------------------------------------------- 3661 3662def c14n_roundtrip(xml, **options): 3663 return pyET.canonicalize(xml, **options) 3664 3665 3666class C14NTest(unittest.TestCase): 3667 maxDiff = None 3668 3669 # 3670 # simple roundtrip tests (from c14n.py) 3671 3672 def test_simple_roundtrip(self): 3673 # Basics 3674 self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>') 3675 self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME 3676 '<doc xmlns="uri"></doc>') 3677 self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"), 3678 '<prefix:doc xmlns:prefix="uri"></prefix:doc>') 3679 self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"), 3680 '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>') 3681 self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"), 3682 '<elem></elem>') 3683 3684 # C14N spec 3685 self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"), 3686 '<doc>Hello, world!</doc>') 3687 self.assertEqual(c14n_roundtrip("<value>2</value>"), 3688 '<value>2</value>') 3689 self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'), 3690 '<compute>value>"0" && value<"10" ?"valid":"error"</compute>') 3691 self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''), 3692 '<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>') 3693 self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"), 3694 '<norm attr=" \' 
	 \' "></norm>') 3695 self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"), 3696 '<normNames attr=" A 
	 B "></normNames>') 3697 self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"), 3698 '<normId id=" \' 
	 \' "></normId>') 3699 3700 # fragments from PJ's tests 3701 #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"), 3702 #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>') 3703 3704 def test_c14n_exclusion(self): 3705 xml = textwrap.dedent("""\ 3706 <root xmlns:x="http://example.com/x"> 3707 <a x:attr="attrx"> 3708 <b>abtext</b> 3709 </a> 3710 <b>btext</b> 3711 <c> 3712 <x:d>dtext</x:d> 3713 </c> 3714 </root> 3715 """) 3716 self.assertEqual( 3717 c14n_roundtrip(xml, strip_text=True), 3718 '<root>' 3719 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 3720 '<b>btext</b>' 3721 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 3722 '</root>') 3723 self.assertEqual( 3724 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']), 3725 '<root>' 3726 '<a><b>abtext</b></a>' 3727 '<b>btext</b>' 3728 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 3729 '</root>') 3730 self.assertEqual( 3731 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']), 3732 '<root>' 3733 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 3734 '<b>btext</b>' 3735 '<c></c>' 3736 '</root>') 3737 self.assertEqual( 3738 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'], 3739 exclude_tags=['{http://example.com/x}d']), 3740 '<root>' 3741 '<a><b>abtext</b></a>' 3742 '<b>btext</b>' 3743 '<c></c>' 3744 '</root>') 3745 self.assertEqual( 3746 c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']), 3747 '<root>' 3748 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 3749 '</root>') 3750 self.assertEqual( 3751 c14n_roundtrip(xml, exclude_tags=['a', 'b']), 3752 '<root>\n' 3753 ' \n' 3754 ' \n' 3755 ' <c>\n' 3756 ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n' 3757 ' </c>\n' 3758 '</root>') 3759 self.assertEqual( 3760 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']), 3761 '<root>' 3762 '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>' 3763 '<c></c>' 3764 '</root>') 3765 self.assertEqual( 3766 c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']), 3767 '<root>\n' 3768 ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n' 3769 ' \n' 3770 ' </a>\n' 3771 ' \n' 3772 ' <c>\n' 3773 ' \n' 3774 ' </c>\n' 3775 '</root>') 3776 3777 # 3778 # basic method=c14n tests from the c14n 2.0 specification. uses 3779 # test files under xmltestdata/c14n-20. 3780 3781 # note that this uses generated C14N versions of the standard ET.write 3782 # output, not roundtripped C14N (see above). 3783 3784 def test_xml_c14n2(self): 3785 datadir = findfile("c14n-20", subdir="xmltestdata") 3786 full_path = partial(os.path.join, datadir) 3787 3788 files = [filename[:-4] for filename in sorted(os.listdir(datadir)) 3789 if filename.endswith('.xml')] 3790 input_files = [ 3791 filename for filename in files 3792 if filename.startswith('in') 3793 ] 3794 configs = { 3795 filename: { 3796 # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> 3797 option.tag.split('}')[-1]: ((option.text or '').strip(), option) 3798 for option in ET.parse(full_path(filename) + ".xml").getroot() 3799 } 3800 for filename in files 3801 if filename.startswith('c14n') 3802 } 3803 3804 tests = { 3805 input_file: [ 3806 (filename, configs[filename.rsplit('_', 1)[-1]]) 3807 for filename in files 3808 if filename.startswith(f'out_{input_file}_') 3809 and filename.rsplit('_', 1)[-1] in configs 3810 ] 3811 for input_file in input_files 3812 } 3813 3814 # Make sure we found all test cases. 3815 self.assertEqual(30, len([ 3816 output_file for output_files in tests.values() 3817 for output_file in output_files])) 3818 3819 def get_option(config, option_name, default=None): 3820 return config.get(option_name, (default, ()))[0] 3821 3822 for input_file, output_files in tests.items(): 3823 for output_file, config in output_files: 3824 keep_comments = get_option( 3825 config, 'IgnoreComments') == 'true' # no, it's right :) 3826 strip_text = get_option( 3827 config, 'TrimTextNodes') == 'true' 3828 rewrite_prefixes = get_option( 3829 config, 'PrefixRewrite') == 'sequential' 3830 if 'QNameAware' in config: 3831 qattrs = [ 3832 f"{{{el.get('NS')}}}{el.get('Name')}" 3833 for el in config['QNameAware'][1].findall( 3834 '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') 3835 ] 3836 qtags = [ 3837 f"{{{el.get('NS')}}}{el.get('Name')}" 3838 for el in config['QNameAware'][1].findall( 3839 '{http://www.w3.org/2010/xml-c14n2}Element') 3840 ] 3841 else: 3842 qtags = qattrs = None 3843 3844 # Build subtest description from config. 3845 config_descr = ','.join( 3846 f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}" 3847 for name, (value, children) in sorted(config.items()) 3848 ) 3849 3850 with self.subTest(f"{output_file}({config_descr})"): 3851 if input_file == 'inNsRedecl' and not rewrite_prefixes: 3852 self.skipTest( 3853 f"Redeclared namespace handling is not supported in {output_file}") 3854 if input_file == 'inNsSuperfluous' and not rewrite_prefixes: 3855 self.skipTest( 3856 f"Redeclared namespace handling is not supported in {output_file}") 3857 if 'QNameAware' in config and config['QNameAware'][1].find( 3858 '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: 3859 self.skipTest( 3860 f"QName rewriting in XPath text is not supported in {output_file}") 3861 3862 f = full_path(input_file + ".xml") 3863 if input_file == 'inC14N5': 3864 # Hack: avoid setting up external entity resolution in the parser. 3865 with open(full_path('world.txt'), 'rb') as entity_file: 3866 with open(f, 'rb') as f: 3867 f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read())) 3868 3869 text = ET.canonicalize( 3870 from_file=f, 3871 with_comments=keep_comments, 3872 strip_text=strip_text, 3873 rewrite_prefixes=rewrite_prefixes, 3874 qname_aware_tags=qtags, qname_aware_attrs=qattrs) 3875 3876 with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: 3877 expected = f.read() 3878 if input_file == 'inC14N3': 3879 # FIXME: cET resolves default attributes but ET does not! 3880 expected = expected.replace(' attr="default"', '') 3881 text = text.replace(' attr="default"', '') 3882 self.assertEqual(expected, text) 3883 3884# -------------------------------------------------------------------- 3885 3886 3887def test_main(module=None): 3888 # When invoked without a module, runs the Python ET tests by loading pyET. 3889 # Otherwise, uses the given module as the ET. 3890 global pyET 3891 pyET = import_fresh_module('xml.etree.ElementTree', 3892 blocked=['_elementtree']) 3893 if module is None: 3894 module = pyET 3895 3896 global ET 3897 ET = module 3898 3899 test_classes = [ 3900 ModuleTest, 3901 ElementSlicingTest, 3902 BasicElementTest, 3903 BadElementTest, 3904 BadElementPathTest, 3905 ElementTreeTest, 3906 IOTest, 3907 ParseErrorTest, 3908 XIncludeTest, 3909 ElementTreeTypeTest, 3910 ElementFindTest, 3911 ElementIterTest, 3912 TreeBuilderTest, 3913 XMLParserTest, 3914 XMLPullParserTest, 3915 BugsTest, 3916 KeywordArgsTest, 3917 C14NTest, 3918 ] 3919 3920 # These tests will only run for the pure-Python version that doesn't import 3921 # _elementtree. We can't use skipUnless here, because pyET is filled in only 3922 # after the module is loaded. 3923 if pyET is not ET: 3924 test_classes.extend([ 3925 NoAcceleratorTest, 3926 ]) 3927 3928 # Provide default namespace mapping and path cache. 3929 from xml.etree import ElementPath 3930 nsmap = ET.register_namespace._namespace_map 3931 # Copy the default namespace mapping 3932 nsmap_copy = nsmap.copy() 3933 # Copy the path cache (should be empty) 3934 path_cache = ElementPath._cache 3935 ElementPath._cache = path_cache.copy() 3936 # Align the Comment/PI factories. 3937 if hasattr(ET, '_set_factories'): 3938 old_factories = ET._set_factories(ET.Comment, ET.PI) 3939 else: 3940 old_factories = None 3941 3942 try: 3943 support.run_unittest(*test_classes) 3944 finally: 3945 from xml.etree import ElementPath 3946 # Restore mapping and path cache 3947 nsmap.clear() 3948 nsmap.update(nsmap_copy) 3949 ElementPath._cache = path_cache 3950 if old_factories is not None: 3951 ET._set_factories(*old_factories) 3952 # don't interfere with subsequent tests 3953 ET = pyET = None 3954 3955 3956if __name__ == '__main__': 3957 test_main() 3958