1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order 2# to ensure consistency between the C implementation and the Python 3# implementation. 4# 5# For this purpose, the module-level "ET" symbol is temporarily 6# monkey-patched when running the "test_xml_etree_c" test suite. 7 8import copy 9import functools 10import html 11import io 12import itertools 13import locale 14import operator 15import os 16import pickle 17import sys 18import textwrap 19import types 20import unittest 21import warnings 22import weakref 23 24from functools import partial 25from itertools import product, islice 26from test import support 27from test.support import os_helper 28from test.support import warnings_helper 29from test.support import findfile, gc_collect, swap_attr, swap_item 30from test.support.import_helper import import_fresh_module 31from test.support.os_helper import TESTFN 32 33 34# pyET is the pure-Python implementation. 35# 36# ET is pyET in test_xml_etree and is the C accelerated version in 37# test_xml_etree_c. 38pyET = None 39ET = None 40 41SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 42try: 43 SIMPLE_XMLFILE.encode("utf-8") 44except UnicodeEncodeError: 45 raise unittest.SkipTest("filename is not encodable to utf8") 46SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 47UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") 48 49SAMPLE_XML = """\ 50<body> 51 <tag class='a'>text</tag> 52 <tag class='b' /> 53 <section> 54 <tag class='b' id='inner'>subtext</tag> 55 </section> 56</body> 57""" 58 59SAMPLE_SECTION = """\ 60<section> 61 <tag class='b' id='inner'>subtext</tag> 62 <nexttag /> 63 <nextsection> 64 <tag /> 65 </nextsection> 66</section> 67""" 68 69SAMPLE_XML_NS = """ 70<body xmlns="http://effbot.org/ns"> 71 <tag>text</tag> 72 <tag /> 73 <section> 74 <tag>subtext</tag> 75 </section> 76</body> 77""" 78 79SAMPLE_XML_NS_ELEMS = """ 80<root> 81<h:table xmlns:h="hello"> 82 <h:tr> 83 <h:td>Apples</h:td> 84 <h:td>Bananas</h:td> 85 </h:tr> 86</h:table> 87 88<f:table xmlns:f="foo"> 89 <f:name>African Coffee Table</f:name> 90 <f:width>80</f:width> 91 <f:length>120</f:length> 92</f:table> 93</root> 94""" 95 96ENTITY_XML = """\ 97<!DOCTYPE points [ 98<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 99%user-entities; 100]> 101<document>&entity;</document> 102""" 103 104EXTERNAL_ENTITY_XML = """\ 105<!DOCTYPE points [ 106<!ENTITY entity SYSTEM "file:///non-existing-file.xml"> 107]> 108<document>&entity;</document> 109""" 110 111ATTLIST_XML = """\ 112<?xml version="1.0" encoding="UTF-8"?> 113<!DOCTYPE Foo [ 114<!ELEMENT foo (bar*)> 115<!ELEMENT bar (#PCDATA)*> 116<!ATTLIST bar xml:lang CDATA "eng"> 117<!ENTITY qux "quux"> 118]> 119<foo> 120<bar>&qux;</bar> 121</foo> 122""" 123 124def checkwarnings(*filters, quiet=False): 125 def decorator(test): 126 def newtest(*args, **kwargs): 127 with warnings_helper.check_warnings(*filters, quiet=quiet): 128 test(*args, **kwargs) 129 functools.update_wrapper(newtest, test) 130 return newtest 131 return decorator 132 133 134class ModuleTest(unittest.TestCase): 135 def test_sanity(self): 136 # Import sanity. 137 138 from xml.etree import ElementTree 139 from xml.etree import ElementInclude 140 from xml.etree import ElementPath 141 142 def test_all(self): 143 names = ("xml.etree.ElementTree", "_elementtree") 144 support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",)) 145 146 147def serialize(elem, to_string=True, encoding='unicode', **options): 148 if encoding != 'unicode': 149 file = io.BytesIO() 150 else: 151 file = io.StringIO() 152 tree = ET.ElementTree(elem) 153 tree.write(file, encoding=encoding, **options) 154 if to_string: 155 return file.getvalue() 156 else: 157 file.seek(0) 158 return file 159 160def summarize_list(seq): 161 return [elem.tag for elem in seq] 162 163 164class ElementTestCase: 165 @classmethod 166 def setUpClass(cls): 167 cls.modules = {pyET, ET} 168 169 def pickleRoundTrip(self, obj, name, dumper, loader, proto): 170 try: 171 with swap_item(sys.modules, name, dumper): 172 temp = pickle.dumps(obj, proto) 173 with swap_item(sys.modules, name, loader): 174 result = pickle.loads(temp) 175 except pickle.PicklingError as pe: 176 # pyET must be second, because pyET may be (equal to) ET. 177 human = dict([(ET, "cET"), (pyET, "pyET")]) 178 raise support.TestFailed("Failed to round-trip %r from %r to %r" 179 % (obj, 180 human.get(dumper, dumper), 181 human.get(loader, loader))) from pe 182 return result 183 184 def assertEqualElements(self, alice, bob): 185 self.assertIsInstance(alice, (ET.Element, pyET.Element)) 186 self.assertIsInstance(bob, (ET.Element, pyET.Element)) 187 self.assertEqual(len(list(alice)), len(list(bob))) 188 for x, y in zip(alice, bob): 189 self.assertEqualElements(x, y) 190 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') 191 self.assertEqual(properties(alice), properties(bob)) 192 193# -------------------------------------------------------------------- 194# element tree tests 195 196class ElementTreeTest(unittest.TestCase): 197 198 def serialize_check(self, elem, expected): 199 self.assertEqual(serialize(elem), expected) 200 201 def test_interface(self): 202 # Test element tree interface. 203 204 def check_string(string): 205 len(string) 206 for char in string: 207 self.assertEqual(len(char), 1, 208 msg="expected one-character string, got %r" % char) 209 new_string = string + "" 210 new_string = string + " " 211 string[:0] 212 213 def check_mapping(mapping): 214 len(mapping) 215 keys = mapping.keys() 216 items = mapping.items() 217 for key in keys: 218 item = mapping[key] 219 mapping["key"] = "value" 220 self.assertEqual(mapping["key"], "value", 221 msg="expected value string, got %r" % mapping["key"]) 222 223 def check_element(element): 224 self.assertTrue(ET.iselement(element), msg="not an element") 225 direlem = dir(element) 226 for attr in 'tag', 'attrib', 'text', 'tail': 227 self.assertTrue(hasattr(element, attr), 228 msg='no %s member' % attr) 229 self.assertIn(attr, direlem, 230 msg='no %s visible by dir' % attr) 231 232 check_string(element.tag) 233 check_mapping(element.attrib) 234 if element.text is not None: 235 check_string(element.text) 236 if element.tail is not None: 237 check_string(element.tail) 238 for elem in element: 239 check_element(elem) 240 241 element = ET.Element("tag") 242 check_element(element) 243 tree = ET.ElementTree(element) 244 check_element(tree.getroot()) 245 element = ET.Element("t\xe4g", key="value") 246 tree = ET.ElementTree(element) 247 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$") 248 element = ET.Element("tag", key="value") 249 250 # Make sure all standard element methods exist. 251 252 def check_method(method): 253 self.assertTrue(hasattr(method, '__call__'), 254 msg="%s not callable" % method) 255 256 check_method(element.append) 257 check_method(element.extend) 258 check_method(element.insert) 259 check_method(element.remove) 260 check_method(element.find) 261 check_method(element.iterfind) 262 check_method(element.findall) 263 check_method(element.findtext) 264 check_method(element.clear) 265 check_method(element.get) 266 check_method(element.set) 267 check_method(element.keys) 268 check_method(element.items) 269 check_method(element.iter) 270 check_method(element.itertext) 271 272 # These methods return an iterable. See bug 6472. 273 274 def check_iter(it): 275 check_method(it.__next__) 276 277 check_iter(element.iterfind("tag")) 278 check_iter(element.iterfind("*")) 279 check_iter(tree.iterfind("tag")) 280 check_iter(tree.iterfind("*")) 281 282 # These aliases are provided: 283 284 self.assertEqual(ET.XML, ET.fromstring) 285 self.assertEqual(ET.PI, ET.ProcessingInstruction) 286 287 def test_set_attribute(self): 288 element = ET.Element('tag') 289 290 self.assertEqual(element.tag, 'tag') 291 element.tag = 'Tag' 292 self.assertEqual(element.tag, 'Tag') 293 element.tag = 'TAG' 294 self.assertEqual(element.tag, 'TAG') 295 296 self.assertIsNone(element.text) 297 element.text = 'Text' 298 self.assertEqual(element.text, 'Text') 299 element.text = 'TEXT' 300 self.assertEqual(element.text, 'TEXT') 301 302 self.assertIsNone(element.tail) 303 element.tail = 'Tail' 304 self.assertEqual(element.tail, 'Tail') 305 element.tail = 'TAIL' 306 self.assertEqual(element.tail, 'TAIL') 307 308 self.assertEqual(element.attrib, {}) 309 element.attrib = {'a': 'b', 'c': 'd'} 310 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 311 element.attrib = {'A': 'B', 'C': 'D'} 312 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 313 314 def test_simpleops(self): 315 # Basic method sanity checks. 316 317 elem = ET.XML("<body><tag/></body>") 318 self.serialize_check(elem, '<body><tag /></body>') 319 e = ET.Element("tag2") 320 elem.append(e) 321 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 322 elem.remove(e) 323 self.serialize_check(elem, '<body><tag /></body>') 324 elem.insert(0, e) 325 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 326 elem.remove(e) 327 elem.extend([e]) 328 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 329 elem.remove(e) 330 elem.extend(iter([e])) 331 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 332 elem.remove(e) 333 334 element = ET.Element("tag", key="value") 335 self.serialize_check(element, '<tag key="value" />') # 1 336 subelement = ET.Element("subtag") 337 element.append(subelement) 338 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 339 element.insert(0, subelement) 340 self.serialize_check(element, 341 '<tag key="value"><subtag /><subtag /></tag>') # 3 342 element.remove(subelement) 343 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 344 element.remove(subelement) 345 self.serialize_check(element, '<tag key="value" />') # 5 346 with self.assertRaises(ValueError) as cm: 347 element.remove(subelement) 348 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 349 self.serialize_check(element, '<tag key="value" />') # 6 350 element[0:0] = [subelement, subelement, subelement] 351 self.serialize_check(element[1], '<subtag />') 352 self.assertEqual(element[1:9], [element[1], element[2]]) 353 self.assertEqual(element[:9:2], [element[0], element[2]]) 354 del element[1:2] 355 self.serialize_check(element, 356 '<tag key="value"><subtag /><subtag /></tag>') 357 358 def test_cdata(self): 359 # Test CDATA handling (etc). 360 361 self.serialize_check(ET.XML("<tag>hello</tag>"), 362 '<tag>hello</tag>') 363 self.serialize_check(ET.XML("<tag>hello</tag>"), 364 '<tag>hello</tag>') 365 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 366 '<tag>hello</tag>') 367 368 def test_file_init(self): 369 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) 370 tree = ET.ElementTree(file=stringfile) 371 self.assertEqual(tree.find("tag").tag, 'tag') 372 self.assertEqual(tree.find("section/tag").tag, 'tag') 373 374 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 375 self.assertEqual(tree.find("element").tag, 'element') 376 self.assertEqual(tree.find("element/../empty-element").tag, 377 'empty-element') 378 379 def test_path_cache(self): 380 # Check that the path cache behaves sanely. 381 382 from xml.etree import ElementPath 383 384 elem = ET.XML(SAMPLE_XML) 385 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 386 cache_len_10 = len(ElementPath._cache) 387 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 388 self.assertEqual(len(ElementPath._cache), cache_len_10) 389 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 390 self.assertGreater(len(ElementPath._cache), cache_len_10) 391 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 392 self.assertLess(len(ElementPath._cache), 500) 393 394 def test_copy(self): 395 # Test copy handling (etc). 396 397 import copy 398 e1 = ET.XML("<tag>hello<foo/></tag>") 399 e2 = copy.copy(e1) 400 e3 = copy.deepcopy(e1) 401 e1.find("foo").tag = "bar" 402 self.serialize_check(e1, '<tag>hello<bar /></tag>') 403 self.serialize_check(e2, '<tag>hello<bar /></tag>') 404 self.serialize_check(e3, '<tag>hello<foo /></tag>') 405 406 def test_attrib(self): 407 # Test attribute handling. 408 409 elem = ET.Element("tag") 410 elem.get("key") # 1.1 411 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 412 413 elem.set("key", "value") 414 self.assertEqual(elem.get("key"), 'value') # 1.3 415 416 elem = ET.Element("tag", key="value") 417 self.assertEqual(elem.get("key"), 'value') # 2.1 418 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 419 420 attrib = {"key": "value"} 421 elem = ET.Element("tag", attrib) 422 attrib.clear() # check for aliasing issues 423 self.assertEqual(elem.get("key"), 'value') # 3.1 424 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 425 426 attrib = {"key": "value"} 427 elem = ET.Element("tag", **attrib) 428 attrib.clear() # check for aliasing issues 429 self.assertEqual(elem.get("key"), 'value') # 4.1 430 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 431 432 elem = ET.Element("tag", {"key": "other"}, key="value") 433 self.assertEqual(elem.get("key"), 'value') # 5.1 434 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 435 436 elem = ET.Element('test') 437 elem.text = "aa" 438 elem.set('testa', 'testval') 439 elem.set('testb', 'test2') 440 self.assertEqual(ET.tostring(elem), 441 b'<test testa="testval" testb="test2">aa</test>') 442 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 443 self.assertEqual(sorted(elem.items()), 444 [('testa', 'testval'), ('testb', 'test2')]) 445 self.assertEqual(elem.attrib['testb'], 'test2') 446 elem.attrib['testb'] = 'test1' 447 elem.attrib['testc'] = 'test2' 448 self.assertEqual(ET.tostring(elem), 449 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 450 451 # Test preserving white space chars in attributes 452 elem = ET.Element('test') 453 elem.set('a', '\r') 454 elem.set('b', '\r\n') 455 elem.set('c', '\t\n\r ') 456 elem.set('d', '\n\n\r\r\t\t ') 457 self.assertEqual(ET.tostring(elem), 458 b'<test a=" " b=" " c="	 " d=" 		 " />') 459 460 def test_makeelement(self): 461 # Test makeelement handling. 462 463 elem = ET.Element("tag") 464 attrib = {"key": "value"} 465 subelem = elem.makeelement("subtag", attrib) 466 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 467 elem.append(subelem) 468 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 469 470 elem.clear() 471 self.serialize_check(elem, '<tag />') 472 elem.append(subelem) 473 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 474 elem.extend([subelem, subelem]) 475 self.serialize_check(elem, 476 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 477 elem[:] = [subelem] 478 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 479 elem[:] = tuple([subelem]) 480 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 481 482 def test_parsefile(self): 483 # Test parsing from file. 484 485 tree = ET.parse(SIMPLE_XMLFILE) 486 stream = io.StringIO() 487 tree.write(stream, encoding='unicode') 488 self.assertEqual(stream.getvalue(), 489 '<root>\n' 490 ' <element key="value">text</element>\n' 491 ' <element>text</element>tail\n' 492 ' <empty-element />\n' 493 '</root>') 494 tree = ET.parse(SIMPLE_NS_XMLFILE) 495 stream = io.StringIO() 496 tree.write(stream, encoding='unicode') 497 self.assertEqual(stream.getvalue(), 498 '<ns0:root xmlns:ns0="namespace">\n' 499 ' <ns0:element key="value">text</ns0:element>\n' 500 ' <ns0:element>text</ns0:element>tail\n' 501 ' <ns0:empty-element />\n' 502 '</ns0:root>') 503 504 with open(SIMPLE_XMLFILE) as f: 505 data = f.read() 506 507 parser = ET.XMLParser() 508 self.assertRegex(parser.version, r'^Expat ') 509 parser.feed(data) 510 self.serialize_check(parser.close(), 511 '<root>\n' 512 ' <element key="value">text</element>\n' 513 ' <element>text</element>tail\n' 514 ' <empty-element />\n' 515 '</root>') 516 517 target = ET.TreeBuilder() 518 parser = ET.XMLParser(target=target) 519 parser.feed(data) 520 self.serialize_check(parser.close(), 521 '<root>\n' 522 ' <element key="value">text</element>\n' 523 ' <element>text</element>tail\n' 524 ' <empty-element />\n' 525 '</root>') 526 527 def test_parseliteral(self): 528 element = ET.XML("<html><body>text</body></html>") 529 self.assertEqual(ET.tostring(element, encoding='unicode'), 530 '<html><body>text</body></html>') 531 element = ET.fromstring("<html><body>text</body></html>") 532 self.assertEqual(ET.tostring(element, encoding='unicode'), 533 '<html><body>text</body></html>') 534 sequence = ["<html><body>", "text</bo", "dy></html>"] 535 element = ET.fromstringlist(sequence) 536 self.assertEqual(ET.tostring(element), 537 b'<html><body>text</body></html>') 538 self.assertEqual(b"".join(ET.tostringlist(element)), 539 b'<html><body>text</body></html>') 540 self.assertEqual(ET.tostring(element, "ascii"), 541 b"<?xml version='1.0' encoding='ascii'?>\n" 542 b"<html><body>text</body></html>") 543 _, ids = ET.XMLID("<html><body>text</body></html>") 544 self.assertEqual(len(ids), 0) 545 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 546 self.assertEqual(len(ids), 1) 547 self.assertEqual(ids["body"].tag, 'body') 548 549 def test_iterparse(self): 550 # Test iterparse interface. 551 552 iterparse = ET.iterparse 553 554 context = iterparse(SIMPLE_XMLFILE) 555 action, elem = next(context) 556 self.assertEqual((action, elem.tag), ('end', 'element')) 557 self.assertEqual([(action, elem.tag) for action, elem in context], [ 558 ('end', 'element'), 559 ('end', 'empty-element'), 560 ('end', 'root'), 561 ]) 562 self.assertEqual(context.root.tag, 'root') 563 564 context = iterparse(SIMPLE_NS_XMLFILE) 565 self.assertEqual([(action, elem.tag) for action, elem in context], [ 566 ('end', '{namespace}element'), 567 ('end', '{namespace}element'), 568 ('end', '{namespace}empty-element'), 569 ('end', '{namespace}root'), 570 ]) 571 572 events = () 573 context = iterparse(SIMPLE_XMLFILE, events) 574 self.assertEqual([(action, elem.tag) for action, elem in context], []) 575 576 events = () 577 context = iterparse(SIMPLE_XMLFILE, events=events) 578 self.assertEqual([(action, elem.tag) for action, elem in context], []) 579 580 events = ("start", "end") 581 context = iterparse(SIMPLE_XMLFILE, events) 582 self.assertEqual([(action, elem.tag) for action, elem in context], [ 583 ('start', 'root'), 584 ('start', 'element'), 585 ('end', 'element'), 586 ('start', 'element'), 587 ('end', 'element'), 588 ('start', 'empty-element'), 589 ('end', 'empty-element'), 590 ('end', 'root'), 591 ]) 592 593 events = ("start", "end", "start-ns", "end-ns") 594 context = iterparse(SIMPLE_NS_XMLFILE, events) 595 self.assertEqual([(action, elem.tag) if action in ("start", "end") 596 else (action, elem) 597 for action, elem in context], [ 598 ('start-ns', ('', 'namespace')), 599 ('start', '{namespace}root'), 600 ('start', '{namespace}element'), 601 ('end', '{namespace}element'), 602 ('start', '{namespace}element'), 603 ('end', '{namespace}element'), 604 ('start', '{namespace}empty-element'), 605 ('end', '{namespace}empty-element'), 606 ('end', '{namespace}root'), 607 ('end-ns', None), 608 ]) 609 610 events = ('start-ns', 'end-ns') 611 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) 612 res = [action for action, elem in context] 613 self.assertEqual(res, ['start-ns', 'end-ns']) 614 615 events = ("start", "end", "bogus") 616 with open(SIMPLE_XMLFILE, "rb") as f: 617 with self.assertRaises(ValueError) as cm: 618 iterparse(f, events) 619 self.assertFalse(f.closed) 620 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 621 622 with warnings_helper.check_no_resource_warning(self): 623 with self.assertRaises(ValueError) as cm: 624 iterparse(SIMPLE_XMLFILE, events) 625 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 626 del cm 627 628 source = io.BytesIO( 629 b"<?xml version='1.0' encoding='iso-8859-1'?>\n" 630 b"<body xmlns='http://éffbot.org/ns'\n" 631 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 632 events = ("start-ns",) 633 context = iterparse(source, events) 634 self.assertEqual([(action, elem) for action, elem in context], [ 635 ('start-ns', ('', 'http://\xe9ffbot.org/ns')), 636 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), 637 ]) 638 639 source = io.StringIO("<document />junk") 640 it = iterparse(source) 641 action, elem = next(it) 642 self.assertEqual((action, elem.tag), ('end', 'document')) 643 with self.assertRaises(ET.ParseError) as cm: 644 next(it) 645 self.assertEqual(str(cm.exception), 646 'junk after document element: line 1, column 12') 647 648 self.addCleanup(os_helper.unlink, TESTFN) 649 with open(TESTFN, "wb") as f: 650 f.write(b"<document />junk") 651 it = iterparse(TESTFN) 652 action, elem = next(it) 653 self.assertEqual((action, elem.tag), ('end', 'document')) 654 with warnings_helper.check_no_resource_warning(self): 655 with self.assertRaises(ET.ParseError) as cm: 656 next(it) 657 self.assertEqual(str(cm.exception), 658 'junk after document element: line 1, column 12') 659 del cm, it 660 661 def test_writefile(self): 662 elem = ET.Element("tag") 663 elem.text = "text" 664 self.serialize_check(elem, '<tag>text</tag>') 665 ET.SubElement(elem, "subtag").text = "subtext" 666 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 667 668 # Test tag suppression 669 elem.tag = None 670 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 671 elem.insert(0, ET.Comment("comment")) 672 self.serialize_check(elem, 673 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 674 675 elem[0] = ET.PI("key", "value") 676 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 677 678 def test_custom_builder(self): 679 # Test parser w. custom builder. 680 681 with open(SIMPLE_XMLFILE) as f: 682 data = f.read() 683 class Builder(list): 684 def start(self, tag, attrib): 685 self.append(("start", tag)) 686 def end(self, tag): 687 self.append(("end", tag)) 688 def data(self, text): 689 pass 690 builder = Builder() 691 parser = ET.XMLParser(target=builder) 692 parser.feed(data) 693 self.assertEqual(builder, [ 694 ('start', 'root'), 695 ('start', 'element'), 696 ('end', 'element'), 697 ('start', 'element'), 698 ('end', 'element'), 699 ('start', 'empty-element'), 700 ('end', 'empty-element'), 701 ('end', 'root'), 702 ]) 703 704 with open(SIMPLE_NS_XMLFILE) as f: 705 data = f.read() 706 class Builder(list): 707 def start(self, tag, attrib): 708 self.append(("start", tag)) 709 def end(self, tag): 710 self.append(("end", tag)) 711 def data(self, text): 712 pass 713 def pi(self, target, data): 714 self.append(("pi", target, data)) 715 def comment(self, data): 716 self.append(("comment", data)) 717 def start_ns(self, prefix, uri): 718 self.append(("start-ns", prefix, uri)) 719 def end_ns(self, prefix): 720 self.append(("end-ns", prefix)) 721 builder = Builder() 722 parser = ET.XMLParser(target=builder) 723 parser.feed(data) 724 self.assertEqual(builder, [ 725 ('pi', 'pi', 'data'), 726 ('comment', ' comment '), 727 ('start-ns', '', 'namespace'), 728 ('start', '{namespace}root'), 729 ('start', '{namespace}element'), 730 ('end', '{namespace}element'), 731 ('start', '{namespace}element'), 732 ('end', '{namespace}element'), 733 ('start', '{namespace}empty-element'), 734 ('end', '{namespace}empty-element'), 735 ('end', '{namespace}root'), 736 ('end-ns', ''), 737 ]) 738 739 def test_custom_builder_only_end_ns(self): 740 class Builder(list): 741 def end_ns(self, prefix): 742 self.append(("end-ns", prefix)) 743 744 builder = Builder() 745 parser = ET.XMLParser(target=builder) 746 parser.feed(textwrap.dedent("""\ 747 <?pi data?> 748 <!-- comment --> 749 <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'> 750 <a:element key='value'>text</a:element> 751 <p:element>text</p:element>tail 752 <empty-element/> 753 </root> 754 """)) 755 self.assertEqual(builder, [ 756 ('end-ns', 'a'), 757 ('end-ns', 'p'), 758 ('end-ns', ''), 759 ]) 760 761 def test_children(self): 762 # Test Element children iteration 763 764 with open(SIMPLE_XMLFILE, "rb") as f: 765 tree = ET.parse(f) 766 self.assertEqual([summarize_list(elem) 767 for elem in tree.getroot().iter()], [ 768 ['element', 'element', 'empty-element'], 769 [], 770 [], 771 [], 772 ]) 773 self.assertEqual([summarize_list(elem) 774 for elem in tree.iter()], [ 775 ['element', 'element', 'empty-element'], 776 [], 777 [], 778 [], 779 ]) 780 781 elem = ET.XML(SAMPLE_XML) 782 self.assertEqual(len(list(elem)), 3) 783 self.assertEqual(len(list(elem[2])), 1) 784 self.assertEqual(elem[:], list(elem)) 785 child1 = elem[0] 786 child2 = elem[2] 787 del elem[1:2] 788 self.assertEqual(len(list(elem)), 2) 789 self.assertEqual(child1, elem[0]) 790 self.assertEqual(child2, elem[1]) 791 elem[0:2] = [child2, child1] 792 self.assertEqual(child2, elem[0]) 793 self.assertEqual(child1, elem[1]) 794 self.assertNotEqual(child1, elem[0]) 795 elem.clear() 796 self.assertEqual(list(elem), []) 797 798 def test_writestring(self): 799 elem = ET.XML("<html><body>text</body></html>") 800 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 801 elem = ET.fromstring("<html><body>text</body></html>") 802 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 803 804 def test_indent(self): 805 elem = ET.XML("<root></root>") 806 ET.indent(elem) 807 self.assertEqual(ET.tostring(elem), b'<root />') 808 809 elem = ET.XML("<html><body>text</body></html>") 810 ET.indent(elem) 811 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') 812 813 elem = ET.XML("<html> <body>text</body> </html>") 814 ET.indent(elem) 815 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') 816 817 elem = ET.XML("<html><body>text</body>tail</html>") 818 ET.indent(elem) 819 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>') 820 821 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>") 822 ET.indent(elem) 823 self.assertEqual( 824 ET.tostring(elem), 825 b'<html>\n' 826 b' <body>\n' 827 b' <p>par</p>\n' 828 b' <p>text</p>\n' 829 b' <p>\n' 830 b' <br />\n' 831 b' </p>\n' 832 b' </body>\n' 833 b'</html>' 834 ) 835 836 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 837 ET.indent(elem) 838 self.assertEqual( 839 ET.tostring(elem), 840 b'<html>\n' 841 b' <body>\n' 842 b' <p>pre<br />post</p>\n' 843 b' <p>text</p>\n' 844 b' </body>\n' 845 b'</html>' 846 ) 847 848 def test_indent_space(self): 849 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 850 ET.indent(elem, space='\t') 851 self.assertEqual( 852 ET.tostring(elem), 853 b'<html>\n' 854 b'\t<body>\n' 855 b'\t\t<p>pre<br />post</p>\n' 856 b'\t\t<p>text</p>\n' 857 b'\t</body>\n' 858 b'</html>' 859 ) 860 861 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 862 ET.indent(elem, space='') 863 self.assertEqual( 864 ET.tostring(elem), 865 b'<html>\n' 866 b'<body>\n' 867 b'<p>pre<br />post</p>\n' 868 b'<p>text</p>\n' 869 b'</body>\n' 870 b'</html>' 871 ) 872 873 def test_indent_space_caching(self): 874 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>") 875 ET.indent(elem) 876 self.assertEqual( 877 {el.tail for el in elem.iter()}, 878 {None, "\n", "\n ", "\n "} 879 ) 880 self.assertEqual( 881 {el.text for el in elem.iter()}, 882 {None, "\n ", "\n ", "\n ", "par", "text"} 883 ) 884 self.assertEqual( 885 len({el.tail for el in elem.iter()}), 886 len({id(el.tail) for el in elem.iter()}), 887 ) 888 889 def test_indent_level(self): 890 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 891 with self.assertRaises(ValueError): 892 ET.indent(elem, level=-1) 893 self.assertEqual( 894 ET.tostring(elem), 895 b"<html><body><p>pre<br />post</p><p>text</p></body></html>" 896 ) 897 898 ET.indent(elem, level=2) 899 self.assertEqual( 900 ET.tostring(elem), 901 b'<html>\n' 902 b' <body>\n' 903 b' <p>pre<br />post</p>\n' 904 b' <p>text</p>\n' 905 b' </body>\n' 906 b' </html>' 907 ) 908 909 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 910 ET.indent(elem, level=1, space=' ') 911 self.assertEqual( 912 ET.tostring(elem), 913 b'<html>\n' 914 b' <body>\n' 915 b' <p>pre<br />post</p>\n' 916 b' <p>text</p>\n' 917 b' </body>\n' 918 b' </html>' 919 ) 920 921 def test_tostring_default_namespace(self): 922 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 923 self.assertEqual( 924 ET.tostring(elem, encoding='unicode'), 925 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 926 ) 927 self.assertEqual( 928 ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'), 929 '<body xmlns="http://effbot.org/ns"><tag /></body>' 930 ) 931 932 def test_tostring_default_namespace_different_namespace(self): 933 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 934 self.assertEqual( 935 ET.tostring(elem, encoding='unicode', default_namespace='foobar'), 936 '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>' 937 ) 938 939 def test_tostring_default_namespace_original_no_namespace(self): 940 elem = ET.XML('<body><tag/></body>') 941 EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$' 942 with self.assertRaisesRegex(ValueError, EXPECTED_MSG): 943 ET.tostring(elem, encoding='unicode', default_namespace='foobar') 944 945 def test_tostring_no_xml_declaration(self): 946 elem = ET.XML('<body><tag/></body>') 947 self.assertEqual( 948 ET.tostring(elem, encoding='unicode'), 949 '<body><tag /></body>' 950 ) 951 952 def test_tostring_xml_declaration(self): 953 elem = ET.XML('<body><tag/></body>') 954 self.assertEqual( 955 ET.tostring(elem, encoding='utf8', xml_declaration=True), 956 b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>" 957 ) 958 959 def test_tostring_xml_declaration_unicode_encoding(self): 960 elem = ET.XML('<body><tag/></body>') 961 preferredencoding = locale.getpreferredencoding() 962 self.assertEqual( 963 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>", 964 ET.tostring(elem, encoding='unicode', xml_declaration=True) 965 ) 966 967 def test_tostring_xml_declaration_cases(self): 968 elem = ET.XML('<body><tag>ø</tag></body>') 969 preferredencoding = locale.getpreferredencoding() 970 TESTCASES = [ 971 # (expected_retval, encoding, xml_declaration) 972 # ... xml_declaration = None 973 (b'<body><tag>ø</tag></body>', None, None), 974 (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None), 975 (b'<body><tag>ø</tag></body>', 'US-ASCII', None), 976 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 977 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None), 978 ('<body><tag>ø</tag></body>', 'unicode', None), 979 980 # ... xml_declaration = False 981 (b"<body><tag>ø</tag></body>", None, False), 982 (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False), 983 (b"<body><tag>ø</tag></body>", 'US-ASCII', False), 984 (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False), 985 ("<body><tag>ø</tag></body>", 'unicode', False), 986 987 # ... xml_declaration = True 988 (b"<?xml version='1.0' encoding='us-ascii'?>\n" 989 b"<body><tag>ø</tag></body>", None, True), 990 (b"<?xml version='1.0' encoding='UTF-8'?>\n" 991 b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True), 992 (b"<?xml version='1.0' encoding='US-ASCII'?>\n" 993 b"<body><tag>ø</tag></body>", 'US-ASCII', True), 994 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 995 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True), 996 (f"<?xml version='1.0' encoding='{preferredencoding}'?>\n" 997 "<body><tag>ø</tag></body>", 'unicode', True), 998 999 ] 1000 for expected_retval, encoding, xml_declaration in TESTCASES: 1001 with self.subTest(f'encoding={encoding} ' 1002 f'xml_declaration={xml_declaration}'): 1003 self.assertEqual( 1004 ET.tostring( 1005 elem, 1006 encoding=encoding, 1007 xml_declaration=xml_declaration 1008 ), 1009 expected_retval 1010 ) 1011 1012 def test_tostringlist_default_namespace(self): 1013 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 1014 self.assertEqual( 1015 ''.join(ET.tostringlist(elem, encoding='unicode')), 1016 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 1017 ) 1018 self.assertEqual( 1019 ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')), 1020 '<body xmlns="http://effbot.org/ns"><tag /></body>' 1021 ) 1022 1023 def test_tostringlist_xml_declaration(self): 1024 elem = ET.XML('<body><tag/></body>') 1025 self.assertEqual( 1026 ''.join(ET.tostringlist(elem, encoding='unicode')), 1027 '<body><tag /></body>' 1028 ) 1029 self.assertEqual( 1030 b''.join(ET.tostringlist(elem, xml_declaration=True)), 1031 b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>" 1032 ) 1033 1034 preferredencoding = locale.getpreferredencoding() 1035 stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) 1036 self.assertEqual( 1037 ''.join(stringlist), 1038 f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>" 1039 ) 1040 self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>") 1041 self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:]) 1042 1043 def test_encoding(self): 1044 def check(encoding, body=''): 1045 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 1046 (encoding, body)) 1047 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 1048 self.assertEqual(ET.XML(xml).text, body) 1049 check("ascii", 'a') 1050 check("us-ascii", 'a') 1051 check("iso-8859-1", '\xbd') 1052 check("iso-8859-15", '\u20ac') 1053 check("cp437", '\u221a') 1054 check("mac-roman", '\u02da') 1055 1056 def xml(encoding): 1057 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 1058 def bxml(encoding): 1059 return xml(encoding).encode(encoding) 1060 supported_encodings = [ 1061 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 1062 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 1063 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 1064 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 1065 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 1066 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 1067 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', 1068 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 1069 'cp1256', 'cp1257', 'cp1258', 1070 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 1071 'mac-roman', 'mac-turkish', 1072 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 1073 'iso2022-jp-3', 'iso2022-jp-ext', 1074 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 1075 'hz', 'ptcp154', 1076 ] 1077 for encoding in supported_encodings: 1078 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 1079 1080 unsupported_ascii_compatible_encodings = [ 1081 'big5', 'big5hkscs', 1082 'cp932', 'cp949', 'cp950', 1083 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 1084 'gb2312', 'gbk', 'gb18030', 1085 'iso2022-kr', 'johab', 1086 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 1087 'utf-7', 1088 ] 1089 for encoding in unsupported_ascii_compatible_encodings: 1090 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 1091 1092 unsupported_ascii_incompatible_encodings = [ 1093 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 1094 'utf_32', 'utf_32_be', 'utf_32_le', 1095 ] 1096 for encoding in unsupported_ascii_incompatible_encodings: 1097 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 1098 1099 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 1100 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 1101 1102 def test_methods(self): 1103 # Test serialization methods. 1104 1105 e = ET.XML("<html><link/><script>1 < 2</script></html>") 1106 e.tail = "\n" 1107 self.assertEqual(serialize(e), 1108 '<html><link /><script>1 < 2</script></html>\n') 1109 self.assertEqual(serialize(e, method=None), 1110 '<html><link /><script>1 < 2</script></html>\n') 1111 self.assertEqual(serialize(e, method="xml"), 1112 '<html><link /><script>1 < 2</script></html>\n') 1113 self.assertEqual(serialize(e, method="html"), 1114 '<html><link><script>1 < 2</script></html>\n') 1115 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 1116 1117 def test_issue18347(self): 1118 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 1119 self.assertEqual(serialize(e), 1120 '<html><CamelCase>text</CamelCase></html>') 1121 self.assertEqual(serialize(e, method="html"), 1122 '<html><CamelCase>text</CamelCase></html>') 1123 1124 def test_entity(self): 1125 # Test entity handling. 1126 1127 # 1) good entities 1128 1129 e = ET.XML("<document title='舰'>test</document>") 1130 self.assertEqual(serialize(e, encoding="us-ascii"), 1131 b'<document title="舰">test</document>') 1132 self.serialize_check(e, '<document title="\u8230">test</document>') 1133 1134 # 2) bad entities 1135 1136 with self.assertRaises(ET.ParseError) as cm: 1137 ET.XML("<document>&entity;</document>") 1138 self.assertEqual(str(cm.exception), 1139 'undefined entity: line 1, column 10') 1140 1141 with self.assertRaises(ET.ParseError) as cm: 1142 ET.XML(ENTITY_XML) 1143 self.assertEqual(str(cm.exception), 1144 'undefined entity &entity;: line 5, column 10') 1145 1146 # 3) custom entity 1147 1148 parser = ET.XMLParser() 1149 parser.entity["entity"] = "text" 1150 parser.feed(ENTITY_XML) 1151 root = parser.close() 1152 self.serialize_check(root, '<document>text</document>') 1153 1154 # 4) external (SYSTEM) entity 1155 1156 with self.assertRaises(ET.ParseError) as cm: 1157 ET.XML(EXTERNAL_ENTITY_XML) 1158 self.assertEqual(str(cm.exception), 1159 'undefined entity &entity;: line 4, column 10') 1160 1161 def test_namespace(self): 1162 # Test namespace issues. 1163 1164 # 1) xml namespace 1165 1166 elem = ET.XML("<tag xml:lang='en' />") 1167 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 1168 1169 # 2) other "well-known" namespaces 1170 1171 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1172 self.serialize_check(elem, 1173 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 1174 1175 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1176 self.serialize_check(elem, 1177 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 1178 1179 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1180 self.serialize_check(elem, 1181 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 1182 1183 # 3) unknown namespaces 1184 elem = ET.XML(SAMPLE_XML_NS) 1185 self.serialize_check(elem, 1186 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 1187 ' <ns0:tag>text</ns0:tag>\n' 1188 ' <ns0:tag />\n' 1189 ' <ns0:section>\n' 1190 ' <ns0:tag>subtext</ns0:tag>\n' 1191 ' </ns0:section>\n' 1192 '</ns0:body>') 1193 1194 def test_qname(self): 1195 # Test QName handling. 1196 1197 # 1) decorated tags 1198 1199 elem = ET.Element("{uri}tag") 1200 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 1201 elem = ET.Element(ET.QName("{uri}tag")) 1202 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 1203 elem = ET.Element(ET.QName("uri", "tag")) 1204 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 1205 elem = ET.Element(ET.QName("uri", "tag")) 1206 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1207 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1208 self.serialize_check(elem, 1209 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 1210 1211 # 2) decorated attributes 1212 1213 elem.clear() 1214 elem.attrib["{uri}key"] = "value" 1215 self.serialize_check(elem, 1216 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 1217 1218 elem.clear() 1219 elem.attrib[ET.QName("{uri}key")] = "value" 1220 self.serialize_check(elem, 1221 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 1222 1223 # 3) decorated values are not converted by default, but the 1224 # QName wrapper can be used for values 1225 1226 elem.clear() 1227 elem.attrib["{uri}key"] = "{uri}value" 1228 self.serialize_check(elem, 1229 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 1230 1231 elem.clear() 1232 elem.attrib["{uri}key"] = ET.QName("{uri}value") 1233 self.serialize_check(elem, 1234 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 1235 1236 elem.clear() 1237 subelem = ET.Element("tag") 1238 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1239 elem.append(subelem) 1240 elem.append(subelem) 1241 self.serialize_check(elem, 1242 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 1243 '<tag ns1:key="ns2:value" />' 1244 '<tag ns1:key="ns2:value" />' 1245 '</ns0:tag>') # 3.3 1246 1247 # 4) Direct QName tests 1248 1249 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 1250 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 1251 q1 = ET.QName('ns', 'tag') 1252 q2 = ET.QName('ns', 'tag') 1253 self.assertEqual(q1, q2) 1254 q2 = ET.QName('ns', 'other-tag') 1255 self.assertNotEqual(q1, q2) 1256 self.assertNotEqual(q1, 'ns:tag') 1257 self.assertEqual(q1, '{ns}tag') 1258 1259 def test_doctype_public(self): 1260 # Test PUBLIC doctype. 1261 1262 elem = ET.XML('<!DOCTYPE html PUBLIC' 1263 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1264 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1265 '<html>text</html>') 1266 1267 def test_xpath_tokenizer(self): 1268 # Test the XPath tokenizer. 1269 from xml.etree import ElementPath 1270 def check(p, expected, namespaces=None): 1271 self.assertEqual([op or tag 1272 for op, tag in ElementPath.xpath_tokenizer(p, namespaces)], 1273 expected) 1274 1275 # tests from the xml specification 1276 check("*", ['*']) 1277 check("text()", ['text', '()']) 1278 check("@name", ['@', 'name']) 1279 check("@*", ['@', '*']) 1280 check("para[1]", ['para', '[', '1', ']']) 1281 check("para[last()]", ['para', '[', 'last', '()', ']']) 1282 check("*/para", ['*', '/', 'para']) 1283 check("/doc/chapter[5]/section[2]", 1284 ['/', 'doc', '/', 'chapter', '[', '5', ']', 1285 '/', 'section', '[', '2', ']']) 1286 check("chapter//para", ['chapter', '//', 'para']) 1287 check("//para", ['//', 'para']) 1288 check("//olist/item", ['//', 'olist', '/', 'item']) 1289 check(".", ['.']) 1290 check(".//para", ['.', '//', 'para']) 1291 check("..", ['..']) 1292 check("../@lang", ['..', '/', '@', 'lang']) 1293 check("chapter[title]", ['chapter', '[', 'title', ']']) 1294 check("employee[@secretary and @assistant]", ['employee', 1295 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 1296 1297 # additional tests 1298 check("@{ns}attr", ['@', '{ns}attr']) 1299 check("{http://spam}egg", ['{http://spam}egg']) 1300 check("./spam.egg", ['.', '/', 'spam.egg']) 1301 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 1302 1303 # wildcard tags 1304 check("{ns}*", ['{ns}*']) 1305 check("{}*", ['{}*']) 1306 check("{*}tag", ['{*}tag']) 1307 check("{*}*", ['{*}*']) 1308 check(".//{*}tag", ['.', '//', '{*}tag']) 1309 1310 # namespace prefix resolution 1311 check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'], 1312 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1313 check("type", ['{http://www.w3.org/2001/XMLSchema}type'], 1314 {'': 'http://www.w3.org/2001/XMLSchema'}) 1315 check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'], 1316 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1317 check("@type", ['@', 'type'], 1318 {'': 'http://www.w3.org/2001/XMLSchema'}) 1319 check("@{*}type", ['@', '{*}type'], 1320 {'': 'http://www.w3.org/2001/XMLSchema'}) 1321 check("@{ns}attr", ['@', '{ns}attr'], 1322 {'': 'http://www.w3.org/2001/XMLSchema', 1323 'ns': 'http://www.w3.org/2001/XMLSchema'}) 1324 1325 def test_processinginstruction(self): 1326 # Test ProcessingInstruction directly 1327 1328 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 1329 b'<?test instruction?>') 1330 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 1331 b'<?test instruction?>') 1332 1333 # Issue #2746 1334 1335 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 1336 b'<?test <testing&>?>') 1337 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), 1338 b"<?xml version='1.0' encoding='latin-1'?>\n" 1339 b"<?test <testing&>\xe3?>") 1340 1341 def test_html_empty_elems_serialization(self): 1342 # issue 15970 1343 # from http://www.w3.org/TR/html401/index/elements.html 1344 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR', 1345 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']: 1346 for elem in [element, element.lower()]: 1347 expected = '<%s>' % elem 1348 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1349 self.assertEqual(serialized, expected) 1350 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1351 method='html') 1352 self.assertEqual(serialized, expected) 1353 1354 def test_dump_attribute_order(self): 1355 # See BPO 34160 1356 e = ET.Element('cirriculum', status='public', company='example') 1357 with support.captured_stdout() as stdout: 1358 ET.dump(e) 1359 self.assertEqual(stdout.getvalue(), 1360 '<cirriculum status="public" company="example" />\n') 1361 1362 def test_tree_write_attribute_order(self): 1363 # See BPO 34160 1364 root = ET.Element('cirriculum', status='public', company='example') 1365 self.assertEqual(serialize(root), 1366 '<cirriculum status="public" company="example" />') 1367 self.assertEqual(serialize(root, method='html'), 1368 '<cirriculum status="public" company="example"></cirriculum>') 1369 1370 def test_attlist_default(self): 1371 # Test default attribute values; See BPO 42151. 1372 root = ET.fromstring(ATTLIST_XML) 1373 self.assertEqual(root[0].attrib, 1374 {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'}) 1375 1376 1377class XMLPullParserTest(unittest.TestCase): 1378 1379 def _feed(self, parser, data, chunk_size=None): 1380 if chunk_size is None: 1381 parser.feed(data) 1382 else: 1383 for i in range(0, len(data), chunk_size): 1384 parser.feed(data[i:i+chunk_size]) 1385 1386 def assert_events(self, parser, expected, max_events=None): 1387 self.assertEqual( 1388 [(event, (elem.tag, elem.text)) 1389 for event, elem in islice(parser.read_events(), max_events)], 1390 expected) 1391 1392 def assert_event_tuples(self, parser, expected, max_events=None): 1393 self.assertEqual( 1394 list(islice(parser.read_events(), max_events)), 1395 expected) 1396 1397 def assert_event_tags(self, parser, expected, max_events=None): 1398 events = islice(parser.read_events(), max_events) 1399 self.assertEqual([(action, elem.tag) for action, elem in events], 1400 expected) 1401 1402 def test_simple_xml(self): 1403 for chunk_size in (None, 1, 5): 1404 with self.subTest(chunk_size=chunk_size): 1405 parser = ET.XMLPullParser() 1406 self.assert_event_tags(parser, []) 1407 self._feed(parser, "<!-- comment -->\n", chunk_size) 1408 self.assert_event_tags(parser, []) 1409 self._feed(parser, 1410 "<root>\n <element key='value'>text</element", 1411 chunk_size) 1412 self.assert_event_tags(parser, []) 1413 self._feed(parser, ">\n", chunk_size) 1414 self.assert_event_tags(parser, [('end', 'element')]) 1415 self._feed(parser, "<element>text</element>tail\n", chunk_size) 1416 self._feed(parser, "<empty-element/>\n", chunk_size) 1417 self.assert_event_tags(parser, [ 1418 ('end', 'element'), 1419 ('end', 'empty-element'), 1420 ]) 1421 self._feed(parser, "</root>\n", chunk_size) 1422 self.assert_event_tags(parser, [('end', 'root')]) 1423 self.assertIsNone(parser.close()) 1424 1425 def test_feed_while_iterating(self): 1426 parser = ET.XMLPullParser() 1427 it = parser.read_events() 1428 self._feed(parser, "<root>\n <element key='value'>text</element>\n") 1429 action, elem = next(it) 1430 self.assertEqual((action, elem.tag), ('end', 'element')) 1431 self._feed(parser, "</root>\n") 1432 action, elem = next(it) 1433 self.assertEqual((action, elem.tag), ('end', 'root')) 1434 with self.assertRaises(StopIteration): 1435 next(it) 1436 1437 def test_simple_xml_with_ns(self): 1438 parser = ET.XMLPullParser() 1439 self.assert_event_tags(parser, []) 1440 self._feed(parser, "<!-- comment -->\n") 1441 self.assert_event_tags(parser, []) 1442 self._feed(parser, "<root xmlns='namespace'>\n") 1443 self.assert_event_tags(parser, []) 1444 self._feed(parser, "<element key='value'>text</element") 1445 self.assert_event_tags(parser, []) 1446 self._feed(parser, ">\n") 1447 self.assert_event_tags(parser, [('end', '{namespace}element')]) 1448 self._feed(parser, "<element>text</element>tail\n") 1449 self._feed(parser, "<empty-element/>\n") 1450 self.assert_event_tags(parser, [ 1451 ('end', '{namespace}element'), 1452 ('end', '{namespace}empty-element'), 1453 ]) 1454 self._feed(parser, "</root>\n") 1455 self.assert_event_tags(parser, [('end', '{namespace}root')]) 1456 self.assertIsNone(parser.close()) 1457 1458 def test_ns_events(self): 1459 parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) 1460 self._feed(parser, "<!-- comment -->\n") 1461 self._feed(parser, "<root xmlns='namespace'>\n") 1462 self.assertEqual( 1463 list(parser.read_events()), 1464 [('start-ns', ('', 'namespace'))]) 1465 self._feed(parser, "<element key='value'>text</element") 1466 self._feed(parser, ">\n") 1467 self._feed(parser, "<element>text</element>tail\n") 1468 self._feed(parser, "<empty-element/>\n") 1469 self._feed(parser, "</root>\n") 1470 self.assertEqual(list(parser.read_events()), [('end-ns', None)]) 1471 self.assertIsNone(parser.close()) 1472 1473 def test_ns_events_start(self): 1474 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end')) 1475 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1476 self.assert_event_tuples(parser, [ 1477 ('start-ns', ('', 'abc')), 1478 ('start-ns', ('p', 'xyz')), 1479 ], max_events=2) 1480 self.assert_event_tags(parser, [ 1481 ('start', '{abc}tag'), 1482 ], max_events=1) 1483 1484 self._feed(parser, "<child />\n") 1485 self.assert_event_tags(parser, [ 1486 ('start', '{abc}child'), 1487 ('end', '{abc}child'), 1488 ]) 1489 1490 self._feed(parser, "</tag>\n") 1491 parser.close() 1492 self.assert_event_tags(parser, [ 1493 ('end', '{abc}tag'), 1494 ]) 1495 1496 def test_ns_events_start_end(self): 1497 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns')) 1498 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1499 self.assert_event_tuples(parser, [ 1500 ('start-ns', ('', 'abc')), 1501 ('start-ns', ('p', 'xyz')), 1502 ], max_events=2) 1503 self.assert_event_tags(parser, [ 1504 ('start', '{abc}tag'), 1505 ], max_events=1) 1506 1507 self._feed(parser, "<child />\n") 1508 self.assert_event_tags(parser, [ 1509 ('start', '{abc}child'), 1510 ('end', '{abc}child'), 1511 ]) 1512 1513 self._feed(parser, "</tag>\n") 1514 parser.close() 1515 self.assert_event_tags(parser, [ 1516 ('end', '{abc}tag'), 1517 ], max_events=1) 1518 self.assert_event_tuples(parser, [ 1519 ('end-ns', None), 1520 ('end-ns', None), 1521 ]) 1522 1523 def test_events(self): 1524 parser = ET.XMLPullParser(events=()) 1525 self._feed(parser, "<root/>\n") 1526 self.assert_event_tags(parser, []) 1527 1528 parser = ET.XMLPullParser(events=('start', 'end')) 1529 self._feed(parser, "<!-- text here -->\n") 1530 self.assert_events(parser, []) 1531 1532 parser = ET.XMLPullParser(events=('start', 'end')) 1533 self._feed(parser, "<root>\n") 1534 self.assert_event_tags(parser, [('start', 'root')]) 1535 self._feed(parser, "<element key='value'>text</element") 1536 self.assert_event_tags(parser, [('start', 'element')]) 1537 self._feed(parser, ">\n") 1538 self.assert_event_tags(parser, [('end', 'element')]) 1539 self._feed(parser, 1540 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1541 self.assert_event_tags(parser, [ 1542 ('start', '{foo}element'), 1543 ('start', '{foo}empty-element'), 1544 ('end', '{foo}empty-element'), 1545 ('end', '{foo}element'), 1546 ]) 1547 self._feed(parser, "</root>") 1548 self.assertIsNone(parser.close()) 1549 self.assert_event_tags(parser, [('end', 'root')]) 1550 1551 parser = ET.XMLPullParser(events=('start',)) 1552 self._feed(parser, "<!-- comment -->\n") 1553 self.assert_event_tags(parser, []) 1554 self._feed(parser, "<root>\n") 1555 self.assert_event_tags(parser, [('start', 'root')]) 1556 self._feed(parser, "<element key='value'>text</element") 1557 self.assert_event_tags(parser, [('start', 'element')]) 1558 self._feed(parser, ">\n") 1559 self.assert_event_tags(parser, []) 1560 self._feed(parser, 1561 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1562 self.assert_event_tags(parser, [ 1563 ('start', '{foo}element'), 1564 ('start', '{foo}empty-element'), 1565 ]) 1566 self._feed(parser, "</root>") 1567 self.assertIsNone(parser.close()) 1568 1569 def test_events_comment(self): 1570 parser = ET.XMLPullParser(events=('start', 'comment', 'end')) 1571 self._feed(parser, "<!-- text here -->\n") 1572 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1573 self._feed(parser, "<!-- more text here -->\n") 1574 self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))]) 1575 self._feed(parser, "<root-tag>text") 1576 self.assert_event_tags(parser, [('start', 'root-tag')]) 1577 self._feed(parser, "<!-- inner comment-->\n") 1578 self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))]) 1579 self._feed(parser, "</root-tag>\n") 1580 self.assert_event_tags(parser, [('end', 'root-tag')]) 1581 self._feed(parser, "<!-- outer comment -->\n") 1582 self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))]) 1583 1584 parser = ET.XMLPullParser(events=('comment',)) 1585 self._feed(parser, "<!-- text here -->\n") 1586 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1587 1588 def test_events_pi(self): 1589 parser = ET.XMLPullParser(events=('start', 'pi', 'end')) 1590 self._feed(parser, "<?pitarget?>\n") 1591 self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))]) 1592 parser = ET.XMLPullParser(events=('pi',)) 1593 self._feed(parser, "<?pitarget some text ?>\n") 1594 self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))]) 1595 1596 def test_events_sequence(self): 1597 # Test that events can be some sequence that's not just a tuple or list 1598 eventset = {'end', 'start'} 1599 parser = ET.XMLPullParser(events=eventset) 1600 self._feed(parser, "<foo>bar</foo>") 1601 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1602 1603 class DummyIter: 1604 def __init__(self): 1605 self.events = iter(['start', 'end', 'start-ns']) 1606 def __iter__(self): 1607 return self 1608 def __next__(self): 1609 return next(self.events) 1610 1611 parser = ET.XMLPullParser(events=DummyIter()) 1612 self._feed(parser, "<foo>bar</foo>") 1613 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1614 1615 def test_unknown_event(self): 1616 with self.assertRaises(ValueError): 1617 ET.XMLPullParser(events=('start', 'end', 'bogus')) 1618 1619 1620# 1621# xinclude tests (samples from appendix C of the xinclude specification) 1622 1623XINCLUDE = {} 1624 1625XINCLUDE["C1.xml"] = """\ 1626<?xml version='1.0'?> 1627<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1628 <p>120 Mz is adequate for an average home user.</p> 1629 <xi:include href="disclaimer.xml"/> 1630</document> 1631""" 1632 1633XINCLUDE["disclaimer.xml"] = """\ 1634<?xml version='1.0'?> 1635<disclaimer> 1636 <p>The opinions represented herein represent those of the individual 1637 and should not be interpreted as official policy endorsed by this 1638 organization.</p> 1639</disclaimer> 1640""" 1641 1642XINCLUDE["C2.xml"] = """\ 1643<?xml version='1.0'?> 1644<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1645 <p>This document has been accessed 1646 <xi:include href="count.txt" parse="text"/> times.</p> 1647</document> 1648""" 1649 1650XINCLUDE["count.txt"] = "324387" 1651 1652XINCLUDE["C2b.xml"] = """\ 1653<?xml version='1.0'?> 1654<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1655 <p>This document has been <em>accessed</em> 1656 <xi:include href="count.txt" parse="text"/> times.</p> 1657</document> 1658""" 1659 1660XINCLUDE["C3.xml"] = """\ 1661<?xml version='1.0'?> 1662<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1663 <p>The following is the source of the "data.xml" resource:</p> 1664 <example><xi:include href="data.xml" parse="text"/></example> 1665</document> 1666""" 1667 1668XINCLUDE["data.xml"] = """\ 1669<?xml version='1.0'?> 1670<data> 1671 <item><![CDATA[Brooks & Shields]]></item> 1672</data> 1673""" 1674 1675XINCLUDE["C5.xml"] = """\ 1676<?xml version='1.0'?> 1677<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1678 <xi:include href="example.txt" parse="text"> 1679 <xi:fallback> 1680 <xi:include href="fallback-example.txt" parse="text"> 1681 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1682 </xi:include> 1683 </xi:fallback> 1684 </xi:include> 1685</div> 1686""" 1687 1688XINCLUDE["default.xml"] = """\ 1689<?xml version='1.0'?> 1690<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1691 <p>Example.</p> 1692 <xi:include href="{}"/> 1693</document> 1694""".format(html.escape(SIMPLE_XMLFILE, True)) 1695 1696XINCLUDE["include_c1_repeated.xml"] = """\ 1697<?xml version='1.0'?> 1698<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1699 <p>The following is the source code of Recursive1.xml:</p> 1700 <xi:include href="C1.xml"/> 1701 <xi:include href="C1.xml"/> 1702 <xi:include href="C1.xml"/> 1703 <xi:include href="C1.xml"/> 1704</document> 1705""" 1706 1707# 1708# badly formatted xi:include tags 1709 1710XINCLUDE_BAD = {} 1711 1712XINCLUDE_BAD["B1.xml"] = """\ 1713<?xml version='1.0'?> 1714<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1715 <p>120 Mz is adequate for an average home user.</p> 1716 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1717</document> 1718""" 1719 1720XINCLUDE_BAD["B2.xml"] = """\ 1721<?xml version='1.0'?> 1722<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1723 <xi:fallback></xi:fallback> 1724</div> 1725""" 1726 1727XINCLUDE["Recursive1.xml"] = """\ 1728<?xml version='1.0'?> 1729<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1730 <p>The following is the source code of Recursive2.xml:</p> 1731 <xi:include href="Recursive2.xml"/> 1732</document> 1733""" 1734 1735XINCLUDE["Recursive2.xml"] = """\ 1736<?xml version='1.0'?> 1737<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1738 <p>The following is the source code of Recursive3.xml:</p> 1739 <xi:include href="Recursive3.xml"/> 1740</document> 1741""" 1742 1743XINCLUDE["Recursive3.xml"] = """\ 1744<?xml version='1.0'?> 1745<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1746 <p>The following is the source code of Recursive1.xml:</p> 1747 <xi:include href="Recursive1.xml"/> 1748</document> 1749""" 1750 1751 1752class XIncludeTest(unittest.TestCase): 1753 1754 def xinclude_loader(self, href, parse="xml", encoding=None): 1755 try: 1756 data = XINCLUDE[href] 1757 except KeyError: 1758 raise OSError("resource not found") 1759 if parse == "xml": 1760 data = ET.XML(data) 1761 return data 1762 1763 def none_loader(self, href, parser, encoding=None): 1764 return None 1765 1766 def _my_loader(self, href, parse): 1767 # Used to avoid a test-dependency problem where the default loader 1768 # of ElementInclude uses the pyET parser for cET tests. 1769 if parse == 'xml': 1770 with open(href, 'rb') as f: 1771 return ET.parse(f).getroot() 1772 else: 1773 return None 1774 1775 def test_xinclude_default(self): 1776 from xml.etree import ElementInclude 1777 doc = self.xinclude_loader('default.xml') 1778 ElementInclude.include(doc, self._my_loader) 1779 self.assertEqual(serialize(doc), 1780 '<document>\n' 1781 ' <p>Example.</p>\n' 1782 ' <root>\n' 1783 ' <element key="value">text</element>\n' 1784 ' <element>text</element>tail\n' 1785 ' <empty-element />\n' 1786 '</root>\n' 1787 '</document>') 1788 1789 def test_xinclude(self): 1790 from xml.etree import ElementInclude 1791 1792 # Basic inclusion example (XInclude C.1) 1793 document = self.xinclude_loader("C1.xml") 1794 ElementInclude.include(document, self.xinclude_loader) 1795 self.assertEqual(serialize(document), 1796 '<document>\n' 1797 ' <p>120 Mz is adequate for an average home user.</p>\n' 1798 ' <disclaimer>\n' 1799 ' <p>The opinions represented herein represent those of the individual\n' 1800 ' and should not be interpreted as official policy endorsed by this\n' 1801 ' organization.</p>\n' 1802 '</disclaimer>\n' 1803 '</document>') # C1 1804 1805 # Textual inclusion example (XInclude C.2) 1806 document = self.xinclude_loader("C2.xml") 1807 ElementInclude.include(document, self.xinclude_loader) 1808 self.assertEqual(serialize(document), 1809 '<document>\n' 1810 ' <p>This document has been accessed\n' 1811 ' 324387 times.</p>\n' 1812 '</document>') # C2 1813 1814 # Textual inclusion after sibling element (based on modified XInclude C.2) 1815 document = self.xinclude_loader("C2b.xml") 1816 ElementInclude.include(document, self.xinclude_loader) 1817 self.assertEqual(serialize(document), 1818 '<document>\n' 1819 ' <p>This document has been <em>accessed</em>\n' 1820 ' 324387 times.</p>\n' 1821 '</document>') # C2b 1822 1823 # Textual inclusion of XML example (XInclude C.3) 1824 document = self.xinclude_loader("C3.xml") 1825 ElementInclude.include(document, self.xinclude_loader) 1826 self.assertEqual(serialize(document), 1827 '<document>\n' 1828 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1829 " <example><?xml version='1.0'?>\n" 1830 '<data>\n' 1831 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1832 '</data>\n' 1833 '</example>\n' 1834 '</document>') # C3 1835 1836 # Fallback example (XInclude C.5) 1837 # Note! Fallback support is not yet implemented 1838 document = self.xinclude_loader("C5.xml") 1839 with self.assertRaises(OSError) as cm: 1840 ElementInclude.include(document, self.xinclude_loader) 1841 self.assertEqual(str(cm.exception), 'resource not found') 1842 self.assertEqual(serialize(document), 1843 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1844 ' <ns0:include href="example.txt" parse="text">\n' 1845 ' <ns0:fallback>\n' 1846 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1847 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n' 1848 ' </ns0:include>\n' 1849 ' </ns0:fallback>\n' 1850 ' </ns0:include>\n' 1851 '</div>') # C5 1852 1853 def test_xinclude_repeated(self): 1854 from xml.etree import ElementInclude 1855 1856 document = self.xinclude_loader("include_c1_repeated.xml") 1857 ElementInclude.include(document, self.xinclude_loader) 1858 self.assertEqual(1+4*2, len(document.findall(".//p"))) 1859 1860 def test_xinclude_failures(self): 1861 from xml.etree import ElementInclude 1862 1863 # Test failure to locate included XML file. 1864 document = ET.XML(XINCLUDE["C1.xml"]) 1865 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1866 ElementInclude.include(document, loader=self.none_loader) 1867 self.assertEqual(str(cm.exception), 1868 "cannot load 'disclaimer.xml' as 'xml'") 1869 1870 # Test failure to locate included text file. 1871 document = ET.XML(XINCLUDE["C2.xml"]) 1872 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1873 ElementInclude.include(document, loader=self.none_loader) 1874 self.assertEqual(str(cm.exception), 1875 "cannot load 'count.txt' as 'text'") 1876 1877 # Test bad parse type. 1878 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 1879 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1880 ElementInclude.include(document, loader=self.none_loader) 1881 self.assertEqual(str(cm.exception), 1882 "unknown parse type in xi:include tag ('BAD_TYPE')") 1883 1884 # Test xi:fallback outside xi:include. 1885 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 1886 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1887 ElementInclude.include(document, loader=self.none_loader) 1888 self.assertEqual(str(cm.exception), 1889 "xi:fallback tag must be child of xi:include " 1890 "('{http://www.w3.org/2001/XInclude}fallback')") 1891 1892 # Test infinitely recursive includes. 1893 document = self.xinclude_loader("Recursive1.xml") 1894 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1895 ElementInclude.include(document, self.xinclude_loader) 1896 self.assertEqual(str(cm.exception), 1897 "recursive include of Recursive2.xml") 1898 1899 # Test 'max_depth' limitation. 1900 document = self.xinclude_loader("Recursive1.xml") 1901 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1902 ElementInclude.include(document, self.xinclude_loader, max_depth=None) 1903 self.assertEqual(str(cm.exception), 1904 "recursive include of Recursive2.xml") 1905 1906 document = self.xinclude_loader("Recursive1.xml") 1907 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 1908 ElementInclude.include(document, self.xinclude_loader, max_depth=0) 1909 self.assertEqual(str(cm.exception), 1910 "maximum xinclude depth reached when including file Recursive2.xml") 1911 1912 document = self.xinclude_loader("Recursive1.xml") 1913 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 1914 ElementInclude.include(document, self.xinclude_loader, max_depth=1) 1915 self.assertEqual(str(cm.exception), 1916 "maximum xinclude depth reached when including file Recursive3.xml") 1917 1918 document = self.xinclude_loader("Recursive1.xml") 1919 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 1920 ElementInclude.include(document, self.xinclude_loader, max_depth=2) 1921 self.assertEqual(str(cm.exception), 1922 "maximum xinclude depth reached when including file Recursive1.xml") 1923 1924 document = self.xinclude_loader("Recursive1.xml") 1925 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 1926 ElementInclude.include(document, self.xinclude_loader, max_depth=3) 1927 self.assertEqual(str(cm.exception), 1928 "recursive include of Recursive2.xml") 1929 1930 1931# -------------------------------------------------------------------- 1932# reported bugs 1933 1934class BugsTest(unittest.TestCase): 1935 1936 def test_bug_xmltoolkit21(self): 1937 # marshaller gives obscure errors for non-string values 1938 1939 def check(elem): 1940 with self.assertRaises(TypeError) as cm: 1941 serialize(elem) 1942 self.assertEqual(str(cm.exception), 1943 'cannot serialize 123 (type int)') 1944 1945 elem = ET.Element(123) 1946 check(elem) # tag 1947 1948 elem = ET.Element("elem") 1949 elem.text = 123 1950 check(elem) # text 1951 1952 elem = ET.Element("elem") 1953 elem.tail = 123 1954 check(elem) # tail 1955 1956 elem = ET.Element("elem") 1957 elem.set(123, "123") 1958 check(elem) # attribute key 1959 1960 elem = ET.Element("elem") 1961 elem.set("123", 123) 1962 check(elem) # attribute value 1963 1964 def test_bug_xmltoolkit25(self): 1965 # typo in ElementTree.findtext 1966 1967 elem = ET.XML(SAMPLE_XML) 1968 tree = ET.ElementTree(elem) 1969 self.assertEqual(tree.findtext("tag"), 'text') 1970 self.assertEqual(tree.findtext("section/tag"), 'subtext') 1971 1972 def test_bug_xmltoolkit28(self): 1973 # .//tag causes exceptions 1974 1975 tree = ET.XML("<doc><table><tbody/></table></doc>") 1976 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 1977 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 1978 1979 def test_bug_xmltoolkitX1(self): 1980 # dump() doesn't flush the output buffer 1981 1982 tree = ET.XML("<doc><table><tbody/></table></doc>") 1983 with support.captured_stdout() as stdout: 1984 ET.dump(tree) 1985 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 1986 1987 def test_bug_xmltoolkit39(self): 1988 # non-ascii element and attribute names doesn't work 1989 1990 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 1991 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 1992 1993 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 1994 b"<tag \xe4ttr='välue' />") 1995 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'}) 1996 self.assertEqual(ET.tostring(tree, "utf-8"), 1997 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 1998 1999 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 2000 b'<t\xe4g>text</t\xe4g>') 2001 self.assertEqual(ET.tostring(tree, "utf-8"), 2002 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 2003 2004 tree = ET.Element("t\u00e4g") 2005 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 2006 2007 tree = ET.Element("tag") 2008 tree.set("\u00e4ttr", "v\u00e4lue") 2009 self.assertEqual(ET.tostring(tree, "utf-8"), 2010 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 2011 2012 def test_bug_xmltoolkit54(self): 2013 # problems handling internally defined entities 2014 2015 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 2016 '<doc>&ldots;</doc>') 2017 self.assertEqual(serialize(e, encoding="us-ascii"), 2018 b'<doc>舰</doc>') 2019 self.assertEqual(serialize(e), '<doc>\u8230</doc>') 2020 2021 def test_bug_xmltoolkit55(self): 2022 # make sure we're reporting the first error, not the last 2023 2024 with self.assertRaises(ET.ParseError) as cm: 2025 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>" 2026 b'<doc>&ldots;&ndots;&rdots;</doc>') 2027 self.assertEqual(str(cm.exception), 2028 'undefined entity &ldots;: line 1, column 36') 2029 2030 def test_bug_xmltoolkit60(self): 2031 # Handle crash in stream source. 2032 2033 class ExceptionFile: 2034 def read(self, x): 2035 raise OSError 2036 2037 self.assertRaises(OSError, ET.parse, ExceptionFile()) 2038 2039 def test_bug_xmltoolkit62(self): 2040 # Don't crash when using custom entities. 2041 2042 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'} 2043 parser = ET.XMLParser() 2044 parser.entity.update(ENTITIES) 2045 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 2046<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 2047<patent-application-publication> 2048<subdoc-abstract> 2049<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 2050</subdoc-abstract> 2051</patent-application-publication>""") 2052 t = parser.close() 2053 self.assertEqual(t.find('.//paragraph').text, 2054 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 2055 2056 @unittest.skipIf(sys.gettrace(), "Skips under coverage.") 2057 def test_bug_xmltoolkit63(self): 2058 # Check reference leak. 2059 def xmltoolkit63(): 2060 tree = ET.TreeBuilder() 2061 tree.start("tag", {}) 2062 tree.data("text") 2063 tree.end("tag") 2064 2065 xmltoolkit63() 2066 count = sys.getrefcount(None) 2067 for i in range(1000): 2068 xmltoolkit63() 2069 self.assertEqual(sys.getrefcount(None), count) 2070 2071 def test_bug_200708_newline(self): 2072 # Preserve newlines in attributes. 2073 2074 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 2075 self.assertEqual(ET.tostring(e), 2076 b'<SomeTag text="def _f(): return 3 " />') 2077 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 2078 'def _f():\n return 3\n') 2079 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 2080 b'<SomeTag text="def _f(): return 3 " />') 2081 2082 def test_bug_200708_close(self): 2083 # Test default builder. 2084 parser = ET.XMLParser() # default 2085 parser.feed("<element>some text</element>") 2086 self.assertEqual(parser.close().tag, 'element') 2087 2088 # Test custom builder. 2089 class EchoTarget: 2090 def close(self): 2091 return ET.Element("element") # simulate root 2092 parser = ET.XMLParser(target=EchoTarget()) 2093 parser.feed("<element>some text</element>") 2094 self.assertEqual(parser.close().tag, 'element') 2095 2096 def test_bug_200709_default_namespace(self): 2097 e = ET.Element("{default}elem") 2098 s = ET.SubElement(e, "{default}elem") 2099 self.assertEqual(serialize(e, default_namespace="default"), # 1 2100 '<elem xmlns="default"><elem /></elem>') 2101 2102 e = ET.Element("{default}elem") 2103 s = ET.SubElement(e, "{default}elem") 2104 s = ET.SubElement(e, "{not-default}elem") 2105 self.assertEqual(serialize(e, default_namespace="default"), # 2 2106 '<elem xmlns="default" xmlns:ns1="not-default">' 2107 '<elem />' 2108 '<ns1:elem />' 2109 '</elem>') 2110 2111 e = ET.Element("{default}elem") 2112 s = ET.SubElement(e, "{default}elem") 2113 s = ET.SubElement(e, "elem") # unprefixed name 2114 with self.assertRaises(ValueError) as cm: 2115 serialize(e, default_namespace="default") # 3 2116 self.assertEqual(str(cm.exception), 2117 'cannot use non-qualified names with default_namespace option') 2118 2119 def test_bug_200709_register_namespace(self): 2120 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 2121 self.assertEqual(ET.tostring(e), 2122 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 2123 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 2124 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 2125 self.assertEqual(ET.tostring(e), 2126 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 2127 2128 # And the Dublin Core namespace is in the default list: 2129 2130 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 2131 self.assertEqual(ET.tostring(e), 2132 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 2133 2134 def test_bug_200709_element_comment(self): 2135 # Not sure if this can be fixed, really (since the serializer needs 2136 # ET.Comment, not cET.comment). 2137 2138 a = ET.Element('a') 2139 a.append(ET.Comment('foo')) 2140 self.assertEqual(a[0].tag, ET.Comment) 2141 2142 a = ET.Element('a') 2143 a.append(ET.PI('foo')) 2144 self.assertEqual(a[0].tag, ET.PI) 2145 2146 def test_bug_200709_element_insert(self): 2147 a = ET.Element('a') 2148 b = ET.SubElement(a, 'b') 2149 c = ET.SubElement(a, 'c') 2150 d = ET.Element('d') 2151 a.insert(0, d) 2152 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 2153 a.insert(-1, d) 2154 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 2155 2156 def test_bug_200709_iter_comment(self): 2157 a = ET.Element('a') 2158 b = ET.SubElement(a, 'b') 2159 comment_b = ET.Comment("TEST-b") 2160 b.append(comment_b) 2161 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 2162 2163 # -------------------------------------------------------------------- 2164 # reported on bugs.python.org 2165 2166 def test_bug_1534630(self): 2167 bob = ET.TreeBuilder() 2168 e = bob.data("data") 2169 e = bob.start("tag", {}) 2170 e = bob.end("tag") 2171 e = bob.close() 2172 self.assertEqual(serialize(e), '<tag />') 2173 2174 def test_issue6233(self): 2175 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 2176 b'<body>t\xc3\xa3g</body>') 2177 self.assertEqual(ET.tostring(e, 'ascii'), 2178 b"<?xml version='1.0' encoding='ascii'?>\n" 2179 b'<body>tãg</body>') 2180 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 2181 b'<body>t\xe3g</body>') 2182 self.assertEqual(ET.tostring(e, 'ascii'), 2183 b"<?xml version='1.0' encoding='ascii'?>\n" 2184 b'<body>tãg</body>') 2185 2186 def test_issue3151(self): 2187 e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>') 2188 self.assertEqual(e.tag, '{${stuff}}localname') 2189 t = ET.ElementTree(e) 2190 self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />') 2191 2192 def test_issue6565(self): 2193 elem = ET.XML("<body><tag/></body>") 2194 self.assertEqual(summarize_list(elem), ['tag']) 2195 newelem = ET.XML(SAMPLE_XML) 2196 elem[:] = newelem[:] 2197 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 2198 2199 def test_issue10777(self): 2200 # Registering a namespace twice caused a "dictionary changed size during 2201 # iteration" bug. 2202 2203 ET.register_namespace('test10777', 'http://myuri/') 2204 ET.register_namespace('test10777', 'http://myuri/') 2205 2206 def test_lost_text(self): 2207 # Issue #25902: Borrowed text can disappear 2208 class Text: 2209 def __bool__(self): 2210 e.text = 'changed' 2211 return True 2212 2213 e = ET.Element('tag') 2214 e.text = Text() 2215 i = e.itertext() 2216 t = next(i) 2217 self.assertIsInstance(t, Text) 2218 self.assertIsInstance(e.text, str) 2219 self.assertEqual(e.text, 'changed') 2220 2221 def test_lost_tail(self): 2222 # Issue #25902: Borrowed tail can disappear 2223 class Text: 2224 def __bool__(self): 2225 e[0].tail = 'changed' 2226 return True 2227 2228 e = ET.Element('root') 2229 e.append(ET.Element('tag')) 2230 e[0].tail = Text() 2231 i = e.itertext() 2232 t = next(i) 2233 self.assertIsInstance(t, Text) 2234 self.assertIsInstance(e[0].tail, str) 2235 self.assertEqual(e[0].tail, 'changed') 2236 2237 def test_lost_elem(self): 2238 # Issue #25902: Borrowed element can disappear 2239 class Tag: 2240 def __eq__(self, other): 2241 e[0] = ET.Element('changed') 2242 next(i) 2243 return True 2244 2245 e = ET.Element('root') 2246 e.append(ET.Element(Tag())) 2247 e.append(ET.Element('tag')) 2248 i = e.iter('tag') 2249 try: 2250 t = next(i) 2251 except ValueError: 2252 self.skipTest('generators are not reentrant') 2253 self.assertIsInstance(t.tag, Tag) 2254 self.assertIsInstance(e[0].tag, str) 2255 self.assertEqual(e[0].tag, 'changed') 2256 2257 def check_expat224_utf8_bug(self, text): 2258 xml = b'<a b="%s"/>' % text 2259 root = ET.XML(xml) 2260 self.assertEqual(root.get('b'), text.decode('utf-8')) 2261 2262 def test_expat224_utf8_bug(self): 2263 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. 2264 # Check that Expat 2.2.4 fixed the bug. 2265 # 2266 # Test buffer bounds at odd and even positions. 2267 2268 text = b'\xc3\xa0' * 1024 2269 self.check_expat224_utf8_bug(text) 2270 2271 text = b'x' + b'\xc3\xa0' * 1024 2272 self.check_expat224_utf8_bug(text) 2273 2274 def test_expat224_utf8_bug_file(self): 2275 with open(UTF8_BUG_XMLFILE, 'rb') as fp: 2276 raw = fp.read() 2277 root = ET.fromstring(raw) 2278 xmlattr = root.get('b') 2279 2280 # "Parse" manually the XML file to extract the value of the 'b' 2281 # attribute of the <a b='xxx' /> XML element 2282 text = raw.decode('utf-8').strip() 2283 text = text.replace('\r\n', ' ') 2284 text = text[6:-4] 2285 self.assertEqual(root.get('b'), text) 2286 2287 def test_39495_treebuilder_start(self): 2288 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag") 2289 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None) 2290 2291 2292 2293# -------------------------------------------------------------------- 2294 2295 2296class BasicElementTest(ElementTestCase, unittest.TestCase): 2297 2298 def test___init__(self): 2299 tag = "foo" 2300 attrib = { "zix": "wyp" } 2301 2302 element_foo = ET.Element(tag, attrib) 2303 2304 # traits of an element 2305 self.assertIsInstance(element_foo, ET.Element) 2306 self.assertIn("tag", dir(element_foo)) 2307 self.assertIn("attrib", dir(element_foo)) 2308 self.assertIn("text", dir(element_foo)) 2309 self.assertIn("tail", dir(element_foo)) 2310 2311 # string attributes have expected values 2312 self.assertEqual(element_foo.tag, tag) 2313 self.assertIsNone(element_foo.text) 2314 self.assertIsNone(element_foo.tail) 2315 2316 # attrib is a copy 2317 self.assertIsNot(element_foo.attrib, attrib) 2318 self.assertEqual(element_foo.attrib, attrib) 2319 2320 # attrib isn't linked 2321 attrib["bar"] = "baz" 2322 self.assertIsNot(element_foo.attrib, attrib) 2323 self.assertNotEqual(element_foo.attrib, attrib) 2324 2325 def test_copy(self): 2326 # Only run this test if Element.copy() is defined. 2327 if "copy" not in dir(ET.Element): 2328 raise unittest.SkipTest("Element.copy() not present") 2329 2330 element_foo = ET.Element("foo", { "zix": "wyp" }) 2331 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2332 2333 with self.assertWarns(DeprecationWarning): 2334 element_foo2 = element_foo.copy() 2335 2336 # elements are not the same 2337 self.assertIsNot(element_foo2, element_foo) 2338 2339 # string attributes are equal 2340 self.assertEqual(element_foo2.tag, element_foo.tag) 2341 self.assertEqual(element_foo2.text, element_foo.text) 2342 self.assertEqual(element_foo2.tail, element_foo.tail) 2343 2344 # number of children is the same 2345 self.assertEqual(len(element_foo2), len(element_foo)) 2346 2347 # children are the same 2348 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2349 self.assertIs(child1, child2) 2350 2351 # attrib is a copy 2352 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2353 2354 def test___copy__(self): 2355 element_foo = ET.Element("foo", { "zix": "wyp" }) 2356 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2357 2358 element_foo2 = copy.copy(element_foo) 2359 2360 # elements are not the same 2361 self.assertIsNot(element_foo2, element_foo) 2362 2363 # string attributes are equal 2364 self.assertEqual(element_foo2.tag, element_foo.tag) 2365 self.assertEqual(element_foo2.text, element_foo.text) 2366 self.assertEqual(element_foo2.tail, element_foo.tail) 2367 2368 # number of children is the same 2369 self.assertEqual(len(element_foo2), len(element_foo)) 2370 2371 # children are the same 2372 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2373 self.assertIs(child1, child2) 2374 2375 # attrib is a copy 2376 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2377 2378 def test___deepcopy__(self): 2379 element_foo = ET.Element("foo", { "zix": "wyp" }) 2380 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2381 2382 element_foo2 = copy.deepcopy(element_foo) 2383 2384 # elements are not the same 2385 self.assertIsNot(element_foo2, element_foo) 2386 2387 # string attributes are equal 2388 self.assertEqual(element_foo2.tag, element_foo.tag) 2389 self.assertEqual(element_foo2.text, element_foo.text) 2390 self.assertEqual(element_foo2.tail, element_foo.tail) 2391 2392 # number of children is the same 2393 self.assertEqual(len(element_foo2), len(element_foo)) 2394 2395 # children are not the same 2396 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2397 self.assertIsNot(child1, child2) 2398 2399 # attrib is a copy 2400 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2401 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2402 2403 # attrib isn't linked 2404 element_foo.attrib["bar"] = "baz" 2405 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2406 self.assertNotEqual(element_foo2.attrib, element_foo.attrib) 2407 2408 def test_augmentation_type_errors(self): 2409 e = ET.Element('joe') 2410 self.assertRaises(TypeError, e.append, 'b') 2411 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo']) 2412 self.assertRaises(TypeError, e.insert, 0, 'foo') 2413 e[:] = [ET.Element('bar')] 2414 with self.assertRaises(TypeError): 2415 e[0] = 'foo' 2416 with self.assertRaises(TypeError): 2417 e[:] = [ET.Element('bar'), 'foo'] 2418 2419 if hasattr(e, '__setstate__'): 2420 state = { 2421 'tag': 'tag', 2422 '_children': [None], # non-Element 2423 'attrib': 'attr', 2424 'tail': 'tail', 2425 'text': 'text', 2426 } 2427 self.assertRaises(TypeError, e.__setstate__, state) 2428 2429 if hasattr(e, '__deepcopy__'): 2430 class E(ET.Element): 2431 def __deepcopy__(self, memo): 2432 return None # non-Element 2433 e[:] = [E('bar')] 2434 self.assertRaises(TypeError, copy.deepcopy, e) 2435 2436 def test_cyclic_gc(self): 2437 class Dummy: 2438 pass 2439 2440 # Test the shortest cycle: d->element->d 2441 d = Dummy() 2442 d.dummyref = ET.Element('joe', attr=d) 2443 wref = weakref.ref(d) 2444 del d 2445 gc_collect() 2446 self.assertIsNone(wref()) 2447 2448 # A longer cycle: d->e->e2->d 2449 e = ET.Element('joe') 2450 d = Dummy() 2451 d.dummyref = e 2452 wref = weakref.ref(d) 2453 e2 = ET.SubElement(e, 'foo', attr=d) 2454 del d, e, e2 2455 gc_collect() 2456 self.assertIsNone(wref()) 2457 2458 # A cycle between Element objects as children of one another 2459 # e1->e2->e3->e1 2460 e1 = ET.Element('e1') 2461 e2 = ET.Element('e2') 2462 e3 = ET.Element('e3') 2463 e3.append(e1) 2464 e2.append(e3) 2465 e1.append(e2) 2466 wref = weakref.ref(e1) 2467 del e1, e2, e3 2468 gc_collect() 2469 self.assertIsNone(wref()) 2470 2471 def test_weakref(self): 2472 flag = False 2473 def wref_cb(w): 2474 nonlocal flag 2475 flag = True 2476 e = ET.Element('e') 2477 wref = weakref.ref(e, wref_cb) 2478 self.assertEqual(wref().tag, 'e') 2479 del e 2480 gc_collect() # For PyPy or other GCs. 2481 self.assertEqual(flag, True) 2482 self.assertEqual(wref(), None) 2483 2484 def test_get_keyword_args(self): 2485 e1 = ET.Element('foo' , x=1, y=2, z=3) 2486 self.assertEqual(e1.get('x', default=7), 1) 2487 self.assertEqual(e1.get('w', default=7), 7) 2488 2489 def test_pickle(self): 2490 # issue #16076: the C implementation wasn't pickleable. 2491 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2492 for dumper, loader in product(self.modules, repeat=2): 2493 e = dumper.Element('foo', bar=42) 2494 e.text = "text goes here" 2495 e.tail = "opposite of head" 2496 dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) 2497 e.append(dumper.Element('child')) 2498 e.findall('.//grandchild')[0].set('attr', 'other value') 2499 2500 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', 2501 dumper, loader, proto) 2502 2503 self.assertEqual(e2.tag, 'foo') 2504 self.assertEqual(e2.attrib['bar'], 42) 2505 self.assertEqual(len(e2), 2) 2506 self.assertEqualElements(e, e2) 2507 2508 def test_pickle_issue18997(self): 2509 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2510 for dumper, loader in product(self.modules, repeat=2): 2511 XMLTEXT = """<?xml version="1.0"?> 2512 <group><dogs>4</dogs> 2513 </group>""" 2514 e1 = dumper.fromstring(XMLTEXT) 2515 if hasattr(e1, '__getstate__'): 2516 self.assertEqual(e1.__getstate__()['tag'], 'group') 2517 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', 2518 dumper, loader, proto) 2519 self.assertEqual(e2.tag, 'group') 2520 self.assertEqual(e2[0].tag, 'dogs') 2521 2522 2523class BadElementTest(ElementTestCase, unittest.TestCase): 2524 def test_extend_mutable_list(self): 2525 class X: 2526 @property 2527 def __class__(self): 2528 L[:] = [ET.Element('baz')] 2529 return ET.Element 2530 L = [X()] 2531 e = ET.Element('foo') 2532 try: 2533 e.extend(L) 2534 except TypeError: 2535 pass 2536 2537 class Y(X, ET.Element): 2538 pass 2539 L = [Y('x')] 2540 e = ET.Element('foo') 2541 e.extend(L) 2542 2543 def test_extend_mutable_list2(self): 2544 class X: 2545 @property 2546 def __class__(self): 2547 del L[:] 2548 return ET.Element 2549 L = [X(), ET.Element('baz')] 2550 e = ET.Element('foo') 2551 try: 2552 e.extend(L) 2553 except TypeError: 2554 pass 2555 2556 class Y(X, ET.Element): 2557 pass 2558 L = [Y('bar'), ET.Element('baz')] 2559 e = ET.Element('foo') 2560 e.extend(L) 2561 2562 def test_remove_with_mutating(self): 2563 class X(ET.Element): 2564 def __eq__(self, o): 2565 del e[:] 2566 return False 2567 e = ET.Element('foo') 2568 e.extend([X('bar')]) 2569 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 2570 2571 e = ET.Element('foo') 2572 e.extend([ET.Element('bar')]) 2573 self.assertRaises(ValueError, e.remove, X('baz')) 2574 2575 def test_recursive_repr(self): 2576 # Issue #25455 2577 e = ET.Element('foo') 2578 with swap_attr(e, 'tag', e): 2579 with self.assertRaises(RuntimeError): 2580 repr(e) # Should not crash 2581 2582 def test_element_get_text(self): 2583 # Issue #27863 2584 class X(str): 2585 def __del__(self): 2586 try: 2587 elem.text 2588 except NameError: 2589 pass 2590 2591 b = ET.TreeBuilder() 2592 b.start('tag', {}) 2593 b.data('ABCD') 2594 b.data(X('EFGH')) 2595 b.data('IJKL') 2596 b.end('tag') 2597 2598 elem = b.close() 2599 self.assertEqual(elem.text, 'ABCDEFGHIJKL') 2600 2601 def test_element_get_tail(self): 2602 # Issue #27863 2603 class X(str): 2604 def __del__(self): 2605 try: 2606 elem[0].tail 2607 except NameError: 2608 pass 2609 2610 b = ET.TreeBuilder() 2611 b.start('root', {}) 2612 b.start('tag', {}) 2613 b.end('tag') 2614 b.data('ABCD') 2615 b.data(X('EFGH')) 2616 b.data('IJKL') 2617 b.end('root') 2618 2619 elem = b.close() 2620 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') 2621 2622 def test_subscr(self): 2623 # Issue #27863 2624 class X: 2625 def __index__(self): 2626 del e[:] 2627 return 1 2628 2629 e = ET.Element('elem') 2630 e.append(ET.Element('child')) 2631 e[:X()] # shouldn't crash 2632 2633 e.append(ET.Element('child')) 2634 e[0:10:X()] # shouldn't crash 2635 2636 def test_ass_subscr(self): 2637 # Issue #27863 2638 class X: 2639 def __index__(self): 2640 e[:] = [] 2641 return 1 2642 2643 e = ET.Element('elem') 2644 for _ in range(10): 2645 e.insert(0, ET.Element('child')) 2646 2647 e[0:10:X()] = [] # shouldn't crash 2648 2649 def test_treebuilder_start(self): 2650 # Issue #27863 2651 def element_factory(x, y): 2652 return [] 2653 b = ET.TreeBuilder(element_factory=element_factory) 2654 2655 b.start('tag', {}) 2656 b.data('ABCD') 2657 self.assertRaises(AttributeError, b.start, 'tag2', {}) 2658 del b 2659 gc_collect() 2660 2661 def test_treebuilder_end(self): 2662 # Issue #27863 2663 def element_factory(x, y): 2664 return [] 2665 b = ET.TreeBuilder(element_factory=element_factory) 2666 2667 b.start('tag', {}) 2668 b.data('ABCD') 2669 self.assertRaises(AttributeError, b.end, 'tag') 2670 del b 2671 gc_collect() 2672 2673 2674class MutatingElementPath(str): 2675 def __new__(cls, elem, *args): 2676 self = str.__new__(cls, *args) 2677 self.elem = elem 2678 return self 2679 def __eq__(self, o): 2680 del self.elem[:] 2681 return True 2682MutatingElementPath.__hash__ = str.__hash__ 2683 2684class BadElementPath(str): 2685 def __eq__(self, o): 2686 raise 1/0 2687BadElementPath.__hash__ = str.__hash__ 2688 2689class BadElementPathTest(ElementTestCase, unittest.TestCase): 2690 def setUp(self): 2691 super().setUp() 2692 from xml.etree import ElementPath 2693 self.path_cache = ElementPath._cache 2694 ElementPath._cache = {} 2695 2696 def tearDown(self): 2697 from xml.etree import ElementPath 2698 ElementPath._cache = self.path_cache 2699 super().tearDown() 2700 2701 def test_find_with_mutating(self): 2702 e = ET.Element('foo') 2703 e.extend([ET.Element('bar')]) 2704 e.find(MutatingElementPath(e, 'x')) 2705 2706 def test_find_with_error(self): 2707 e = ET.Element('foo') 2708 e.extend([ET.Element('bar')]) 2709 try: 2710 e.find(BadElementPath('x')) 2711 except ZeroDivisionError: 2712 pass 2713 2714 def test_findtext_with_mutating(self): 2715 e = ET.Element('foo') 2716 e.extend([ET.Element('bar')]) 2717 e.findtext(MutatingElementPath(e, 'x')) 2718 2719 def test_findtext_with_error(self): 2720 e = ET.Element('foo') 2721 e.extend([ET.Element('bar')]) 2722 try: 2723 e.findtext(BadElementPath('x')) 2724 except ZeroDivisionError: 2725 pass 2726 2727 def test_findall_with_mutating(self): 2728 e = ET.Element('foo') 2729 e.extend([ET.Element('bar')]) 2730 e.findall(MutatingElementPath(e, 'x')) 2731 2732 def test_findall_with_error(self): 2733 e = ET.Element('foo') 2734 e.extend([ET.Element('bar')]) 2735 try: 2736 e.findall(BadElementPath('x')) 2737 except ZeroDivisionError: 2738 pass 2739 2740 2741class ElementTreeTypeTest(unittest.TestCase): 2742 def test_istype(self): 2743 self.assertIsInstance(ET.ParseError, type) 2744 self.assertIsInstance(ET.QName, type) 2745 self.assertIsInstance(ET.ElementTree, type) 2746 self.assertIsInstance(ET.Element, type) 2747 self.assertIsInstance(ET.TreeBuilder, type) 2748 self.assertIsInstance(ET.XMLParser, type) 2749 2750 def test_Element_subclass_trivial(self): 2751 class MyElement(ET.Element): 2752 pass 2753 2754 mye = MyElement('foo') 2755 self.assertIsInstance(mye, ET.Element) 2756 self.assertIsInstance(mye, MyElement) 2757 self.assertEqual(mye.tag, 'foo') 2758 2759 # test that attribute assignment works (issue 14849) 2760 mye.text = "joe" 2761 self.assertEqual(mye.text, "joe") 2762 2763 def test_Element_subclass_constructor(self): 2764 class MyElement(ET.Element): 2765 def __init__(self, tag, attrib={}, **extra): 2766 super(MyElement, self).__init__(tag + '__', attrib, **extra) 2767 2768 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 2769 self.assertEqual(mye.tag, 'foo__') 2770 self.assertEqual(sorted(mye.items()), 2771 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 2772 2773 def test_Element_subclass_new_method(self): 2774 class MyElement(ET.Element): 2775 def newmethod(self): 2776 return self.tag 2777 2778 mye = MyElement('joe') 2779 self.assertEqual(mye.newmethod(), 'joe') 2780 2781 def test_Element_subclass_find(self): 2782 class MyElement(ET.Element): 2783 pass 2784 2785 e = ET.Element('foo') 2786 e.text = 'text' 2787 sub = MyElement('bar') 2788 sub.text = 'subtext' 2789 e.append(sub) 2790 self.assertEqual(e.findtext('bar'), 'subtext') 2791 self.assertEqual(e.find('bar').tag, 'bar') 2792 found = list(e.findall('bar')) 2793 self.assertEqual(len(found), 1, found) 2794 self.assertEqual(found[0].tag, 'bar') 2795 2796 2797class ElementFindTest(unittest.TestCase): 2798 def test_find_simple(self): 2799 e = ET.XML(SAMPLE_XML) 2800 self.assertEqual(e.find('tag').tag, 'tag') 2801 self.assertEqual(e.find('section/tag').tag, 'tag') 2802 self.assertEqual(e.find('./tag').tag, 'tag') 2803 2804 e[2] = ET.XML(SAMPLE_SECTION) 2805 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 2806 2807 self.assertEqual(e.findtext('./tag'), 'text') 2808 self.assertEqual(e.findtext('section/tag'), 'subtext') 2809 2810 # section/nexttag is found but has no text 2811 self.assertEqual(e.findtext('section/nexttag'), '') 2812 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 2813 2814 # tog doesn't exist and 'default' kicks in 2815 self.assertIsNone(e.findtext('tog')) 2816 self.assertEqual(e.findtext('tog', 'default'), 'default') 2817 2818 # Issue #16922 2819 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 2820 2821 def test_find_xpath(self): 2822 LINEAR_XML = ''' 2823 <body> 2824 <tag class='a'/> 2825 <tag class='b'/> 2826 <tag class='c'/> 2827 <tag class='d'/> 2828 </body>''' 2829 e = ET.XML(LINEAR_XML) 2830 2831 # Test for numeric indexing and last() 2832 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 2833 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 2834 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 2835 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 2836 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 2837 2838 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]') 2839 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]') 2840 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') 2841 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') 2842 2843 def test_findall(self): 2844 e = ET.XML(SAMPLE_XML) 2845 e[2] = ET.XML(SAMPLE_SECTION) 2846 self.assertEqual(summarize_list(e.findall('.')), ['body']) 2847 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 2848 self.assertEqual(summarize_list(e.findall('tog')), []) 2849 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 2850 self.assertEqual(summarize_list(e.findall('*')), 2851 ['tag', 'tag', 'section']) 2852 self.assertEqual(summarize_list(e.findall('.//tag')), 2853 ['tag'] * 4) 2854 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 2855 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 2856 self.assertEqual(summarize_list(e.findall('section/*')), 2857 ['tag', 'nexttag', 'nextsection']) 2858 self.assertEqual(summarize_list(e.findall('section//*')), 2859 ['tag', 'nexttag', 'nextsection', 'tag']) 2860 self.assertEqual(summarize_list(e.findall('section/.//*')), 2861 ['tag', 'nexttag', 'nextsection', 'tag']) 2862 self.assertEqual(summarize_list(e.findall('*/*')), 2863 ['tag', 'nexttag', 'nextsection']) 2864 self.assertEqual(summarize_list(e.findall('*//*')), 2865 ['tag', 'nexttag', 'nextsection', 'tag']) 2866 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 2867 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 2868 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 2869 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 2870 2871 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 2872 ['tag'] * 3) 2873 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 2874 ['tag']) 2875 self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')), 2876 ['tag'] * 2) 2877 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 2878 ['tag'] * 2) 2879 self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')), 2880 ['tag']) 2881 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 2882 ['tag']) 2883 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 2884 ['section']) 2885 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 2886 self.assertEqual(summarize_list(e.findall('../tag')), []) 2887 self.assertEqual(summarize_list(e.findall('section/../tag')), 2888 ['tag'] * 2) 2889 self.assertEqual(e.findall('section//'), e.findall('section//*')) 2890 2891 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")), 2892 ['section']) 2893 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")), 2894 ['section']) 2895 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")), 2896 ['section']) 2897 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2898 ['section']) 2899 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), 2900 ['section']) 2901 2902 # Negations of above tests. They match nothing because the sole section 2903 # tag has subtext. 2904 self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")), 2905 []) 2906 self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")), 2907 []) 2908 self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")), 2909 []) 2910 self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")), 2911 []) 2912 self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")), 2913 []) 2914 2915 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), 2916 ['tag']) 2917 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), 2918 ['tag']) 2919 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')), 2920 ['tag']) 2921 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')), 2922 ['tag']) 2923 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2924 ['tag']) 2925 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")), 2926 []) 2927 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), 2928 []) 2929 2930 # Negations of above tests. 2931 # Matches everything but the tag containing subtext 2932 self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")), 2933 ['tag'] * 3) 2934 self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")), 2935 ['tag'] * 3) 2936 self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')), 2937 ['tag'] * 3) 2938 self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')), 2939 ['tag'] * 3) 2940 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")), 2941 ['tag'] * 3) 2942 # Matches all tags. 2943 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")), 2944 ['tag'] * 4) 2945 self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")), 2946 ['tag'] * 4) 2947 2948 # duplicate section => 2x tag matches 2949 e[1] = e[2] 2950 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 2951 ['section', 'section']) 2952 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 2953 ['tag', 'tag']) 2954 2955 def test_test_find_with_ns(self): 2956 e = ET.XML(SAMPLE_XML_NS) 2957 self.assertEqual(summarize_list(e.findall('tag')), []) 2958 self.assertEqual( 2959 summarize_list(e.findall("{http://effbot.org/ns}tag")), 2960 ['{http://effbot.org/ns}tag'] * 2) 2961 self.assertEqual( 2962 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 2963 ['{http://effbot.org/ns}tag'] * 3) 2964 2965 def test_findall_different_nsmaps(self): 2966 root = ET.XML(''' 2967 <a xmlns:x="X" xmlns:y="Y"> 2968 <x:b><c/></x:b> 2969 <b/> 2970 <c><x:b/><b/></c><y:b/> 2971 </a>''') 2972 nsmap = {'xx': 'X'} 2973 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2974 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2975 nsmap = {'xx': 'Y'} 2976 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) 2977 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 2978 nsmap = {'xx': 'X', '': 'Y'} 2979 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 2980 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) 2981 2982 def test_findall_wildcard(self): 2983 root = ET.XML(''' 2984 <a xmlns:x="X" xmlns:y="Y"> 2985 <x:b><c/></x:b> 2986 <b/> 2987 <c><x:b/><b/></c><y:b/> 2988 </a>''') 2989 root.append(ET.Comment('test')) 2990 2991 self.assertEqual(summarize_list(root.findall("{*}b")), 2992 ['{X}b', 'b', '{Y}b']) 2993 self.assertEqual(summarize_list(root.findall("{*}c")), 2994 ['c']) 2995 self.assertEqual(summarize_list(root.findall("{X}*")), 2996 ['{X}b']) 2997 self.assertEqual(summarize_list(root.findall("{Y}*")), 2998 ['{Y}b']) 2999 self.assertEqual(summarize_list(root.findall("{}*")), 3000 ['b', 'c']) 3001 self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency 3002 ['b']) 3003 self.assertEqual(summarize_list(root.findall("{}b")), 3004 summarize_list(root.findall("b"))) 3005 self.assertEqual(summarize_list(root.findall("{*}*")), 3006 ['{X}b', 'b', 'c', '{Y}b']) 3007 # This is an unfortunate difference, but that's how find('*') works. 3008 self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]), 3009 summarize_list(root.findall("*"))) 3010 3011 self.assertEqual(summarize_list(root.findall(".//{*}b")), 3012 ['{X}b', 'b', '{X}b', 'b', '{Y}b']) 3013 self.assertEqual(summarize_list(root.findall(".//{*}c")), 3014 ['c', 'c']) 3015 self.assertEqual(summarize_list(root.findall(".//{X}*")), 3016 ['{X}b', '{X}b']) 3017 self.assertEqual(summarize_list(root.findall(".//{Y}*")), 3018 ['{Y}b']) 3019 self.assertEqual(summarize_list(root.findall(".//{}*")), 3020 ['c', 'b', 'c', 'b']) 3021 self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency 3022 ['b', 'b']) 3023 self.assertEqual(summarize_list(root.findall(".//{}b")), 3024 summarize_list(root.findall(".//b"))) 3025 3026 def test_bad_find(self): 3027 e = ET.XML(SAMPLE_XML) 3028 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): 3029 e.findall('/tag') 3030 3031 def test_find_through_ElementTree(self): 3032 e = ET.XML(SAMPLE_XML) 3033 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 3034 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 3035 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')), 3036 ['tag'] * 2) 3037 # this produces a warning 3038 msg = ("This search is broken in 1.3 and earlier, and will be fixed " 3039 "in a future version. If you rely on the current behaviour, " 3040 "change it to '.+'") 3041 with self.assertWarnsRegex(FutureWarning, msg): 3042 it = ET.ElementTree(e).findall('//tag') 3043 self.assertEqual(summarize_list(it), ['tag'] * 3) 3044 3045 3046class ElementIterTest(unittest.TestCase): 3047 def _ilist(self, elem, tag=None): 3048 return summarize_list(elem.iter(tag)) 3049 3050 def test_basic(self): 3051 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 3052 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 3053 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 3054 self.assertEqual(next(doc.iter()).tag, 'html') 3055 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 3056 self.assertEqual(''.join(doc.find('body').itertext()), 3057 'this is a paragraph.') 3058 self.assertEqual(next(doc.itertext()), 'this is a ') 3059 3060 # iterparse should return an iterator 3061 sourcefile = serialize(doc, to_string=False) 3062 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 3063 3064 # With an explicit parser too (issue #9708) 3065 sourcefile = serialize(doc, to_string=False) 3066 parser = ET.XMLParser(target=ET.TreeBuilder()) 3067 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 3068 'end') 3069 3070 tree = ET.ElementTree(None) 3071 self.assertRaises(AttributeError, tree.iter) 3072 3073 # Issue #16913 3074 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 3075 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 3076 3077 def test_corners(self): 3078 # single root, no subelements 3079 a = ET.Element('a') 3080 self.assertEqual(self._ilist(a), ['a']) 3081 3082 # one child 3083 b = ET.SubElement(a, 'b') 3084 self.assertEqual(self._ilist(a), ['a', 'b']) 3085 3086 # one child and one grandchild 3087 c = ET.SubElement(b, 'c') 3088 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 3089 3090 # two children, only first with grandchild 3091 d = ET.SubElement(a, 'd') 3092 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 3093 3094 # replace first child by second 3095 a[0] = a[1] 3096 del a[1] 3097 self.assertEqual(self._ilist(a), ['a', 'd']) 3098 3099 def test_iter_by_tag(self): 3100 doc = ET.XML(''' 3101 <document> 3102 <house> 3103 <room>bedroom1</room> 3104 <room>bedroom2</room> 3105 </house> 3106 <shed>nothing here 3107 </shed> 3108 <house> 3109 <room>bedroom8</room> 3110 </house> 3111 </document>''') 3112 3113 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 3114 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 3115 3116 # test that iter also accepts 'tag' as a keyword arg 3117 self.assertEqual( 3118 summarize_list(doc.iter(tag='room')), 3119 ['room'] * 3) 3120 3121 # make sure both tag=None and tag='*' return all tags 3122 all_tags = ['document', 'house', 'room', 'room', 3123 'shed', 'house', 'room'] 3124 self.assertEqual(summarize_list(doc.iter()), all_tags) 3125 self.assertEqual(self._ilist(doc), all_tags) 3126 self.assertEqual(self._ilist(doc, '*'), all_tags) 3127 3128 def test_copy(self): 3129 a = ET.Element('a') 3130 it = a.iter() 3131 with self.assertRaises(TypeError): 3132 copy.copy(it) 3133 3134 def test_pickle(self): 3135 a = ET.Element('a') 3136 it = a.iter() 3137 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 3138 with self.assertRaises((TypeError, pickle.PicklingError)): 3139 pickle.dumps(it, proto) 3140 3141 3142class TreeBuilderTest(unittest.TestCase): 3143 sample1 = ('<!DOCTYPE html PUBLIC' 3144 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3145 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3146 '<html>text<div>subtext</div>tail</html>') 3147 3148 sample2 = '''<toplevel>sometext</toplevel>''' 3149 3150 def _check_sample1_element(self, e): 3151 self.assertEqual(e.tag, 'html') 3152 self.assertEqual(e.text, 'text') 3153 self.assertEqual(e.tail, None) 3154 self.assertEqual(e.attrib, {}) 3155 children = list(e) 3156 self.assertEqual(len(children), 1) 3157 child = children[0] 3158 self.assertEqual(child.tag, 'div') 3159 self.assertEqual(child.text, 'subtext') 3160 self.assertEqual(child.tail, 'tail') 3161 self.assertEqual(child.attrib, {}) 3162 3163 def test_dummy_builder(self): 3164 class BaseDummyBuilder: 3165 def close(self): 3166 return 42 3167 3168 class DummyBuilder(BaseDummyBuilder): 3169 data = start = end = lambda *a: None 3170 3171 parser = ET.XMLParser(target=DummyBuilder()) 3172 parser.feed(self.sample1) 3173 self.assertEqual(parser.close(), 42) 3174 3175 parser = ET.XMLParser(target=BaseDummyBuilder()) 3176 parser.feed(self.sample1) 3177 self.assertEqual(parser.close(), 42) 3178 3179 parser = ET.XMLParser(target=object()) 3180 parser.feed(self.sample1) 3181 self.assertIsNone(parser.close()) 3182 3183 def test_treebuilder_comment(self): 3184 b = ET.TreeBuilder() 3185 self.assertEqual(b.comment('ctext').tag, ET.Comment) 3186 self.assertEqual(b.comment('ctext').text, 'ctext') 3187 3188 b = ET.TreeBuilder(comment_factory=ET.Comment) 3189 self.assertEqual(b.comment('ctext').tag, ET.Comment) 3190 self.assertEqual(b.comment('ctext').text, 'ctext') 3191 3192 b = ET.TreeBuilder(comment_factory=len) 3193 self.assertEqual(b.comment('ctext'), len('ctext')) 3194 3195 def test_treebuilder_pi(self): 3196 b = ET.TreeBuilder() 3197 self.assertEqual(b.pi('target', None).tag, ET.PI) 3198 self.assertEqual(b.pi('target', None).text, 'target') 3199 3200 b = ET.TreeBuilder(pi_factory=ET.PI) 3201 self.assertEqual(b.pi('target').tag, ET.PI) 3202 self.assertEqual(b.pi('target').text, "target") 3203 self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI) 3204 self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ") 3205 3206 b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text)) 3207 self.assertEqual(b.pi('target'), (len('target'), None)) 3208 self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text ')) 3209 3210 def test_late_tail(self): 3211 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 3212 class TreeBuilderSubclass(ET.TreeBuilder): 3213 pass 3214 3215 xml = "<a>text<!-- comment -->tail</a>" 3216 a = ET.fromstring(xml) 3217 self.assertEqual(a.text, "texttail") 3218 3219 parser = ET.XMLParser(target=TreeBuilderSubclass()) 3220 parser.feed(xml) 3221 a = parser.close() 3222 self.assertEqual(a.text, "texttail") 3223 3224 xml = "<a>text<?pi data?>tail</a>" 3225 a = ET.fromstring(xml) 3226 self.assertEqual(a.text, "texttail") 3227 3228 xml = "<a>text<?pi data?>tail</a>" 3229 parser = ET.XMLParser(target=TreeBuilderSubclass()) 3230 parser.feed(xml) 3231 a = parser.close() 3232 self.assertEqual(a.text, "texttail") 3233 3234 def test_late_tail_mix_pi_comments(self): 3235 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 3236 # Test appending tails to comments/pis. 3237 class TreeBuilderSubclass(ET.TreeBuilder): 3238 pass 3239 3240 xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>" 3241 parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) 3242 parser.feed(xml) 3243 a = parser.close() 3244 self.assertEqual(a[0].text, ' comment ') 3245 self.assertEqual(a[0].tail, '\ntail') 3246 self.assertEqual(a.text, "text ") 3247 3248 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True)) 3249 parser.feed(xml) 3250 a = parser.close() 3251 self.assertEqual(a[0].text, ' comment ') 3252 self.assertEqual(a[0].tail, '\ntail') 3253 self.assertEqual(a.text, "text ") 3254 3255 xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>" 3256 parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True)) 3257 parser.feed(xml) 3258 a = parser.close() 3259 self.assertEqual(a[0].text, 'pi data') 3260 self.assertEqual(a[0].tail, 'tail') 3261 self.assertEqual(a.text, "text\n") 3262 3263 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True)) 3264 parser.feed(xml) 3265 a = parser.close() 3266 self.assertEqual(a[0].text, 'pi data') 3267 self.assertEqual(a[0].tail, 'tail') 3268 self.assertEqual(a.text, "text\n") 3269 3270 def test_treebuilder_elementfactory_none(self): 3271 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 3272 parser.feed(self.sample1) 3273 e = parser.close() 3274 self._check_sample1_element(e) 3275 3276 def test_subclass(self): 3277 class MyTreeBuilder(ET.TreeBuilder): 3278 def foobar(self, x): 3279 return x * 2 3280 3281 tb = MyTreeBuilder() 3282 self.assertEqual(tb.foobar(10), 20) 3283 3284 parser = ET.XMLParser(target=tb) 3285 parser.feed(self.sample1) 3286 3287 e = parser.close() 3288 self._check_sample1_element(e) 3289 3290 def test_subclass_comment_pi(self): 3291 class MyTreeBuilder(ET.TreeBuilder): 3292 def foobar(self, x): 3293 return x * 2 3294 3295 tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI) 3296 self.assertEqual(tb.foobar(10), 20) 3297 3298 parser = ET.XMLParser(target=tb) 3299 parser.feed(self.sample1) 3300 parser.feed('<!-- a comment--><?and a pi?>') 3301 3302 e = parser.close() 3303 self._check_sample1_element(e) 3304 3305 def test_element_factory(self): 3306 lst = [] 3307 def myfactory(tag, attrib): 3308 nonlocal lst 3309 lst.append(tag) 3310 return ET.Element(tag, attrib) 3311 3312 tb = ET.TreeBuilder(element_factory=myfactory) 3313 parser = ET.XMLParser(target=tb) 3314 parser.feed(self.sample2) 3315 parser.close() 3316 3317 self.assertEqual(lst, ['toplevel']) 3318 3319 def _check_element_factory_class(self, cls): 3320 tb = ET.TreeBuilder(element_factory=cls) 3321 3322 parser = ET.XMLParser(target=tb) 3323 parser.feed(self.sample1) 3324 e = parser.close() 3325 self.assertIsInstance(e, cls) 3326 self._check_sample1_element(e) 3327 3328 def test_element_factory_subclass(self): 3329 class MyElement(ET.Element): 3330 pass 3331 self._check_element_factory_class(MyElement) 3332 3333 def test_element_factory_pure_python_subclass(self): 3334 # Mimic SimpleTAL's behaviour (issue #16089): both versions of 3335 # TreeBuilder should be able to cope with a subclass of the 3336 # pure Python Element class. 3337 base = ET._Element_Py 3338 # Not from a C extension 3339 self.assertEqual(base.__module__, 'xml.etree.ElementTree') 3340 # Force some multiple inheritance with a C class to make things 3341 # more interesting. 3342 class MyElement(base, ValueError): 3343 pass 3344 self._check_element_factory_class(MyElement) 3345 3346 def test_doctype(self): 3347 class DoctypeParser: 3348 _doctype = None 3349 3350 def doctype(self, name, pubid, system): 3351 self._doctype = (name, pubid, system) 3352 3353 def close(self): 3354 return self._doctype 3355 3356 parser = ET.XMLParser(target=DoctypeParser()) 3357 parser.feed(self.sample1) 3358 3359 self.assertEqual(parser.close(), 3360 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3361 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3362 3363 def test_builder_lookup_errors(self): 3364 class RaisingBuilder: 3365 def __init__(self, raise_in=None, what=ValueError): 3366 self.raise_in = raise_in 3367 self.what = what 3368 3369 def __getattr__(self, name): 3370 if name == self.raise_in: 3371 raise self.what(self.raise_in) 3372 def handle(*args): 3373 pass 3374 return handle 3375 3376 ET.XMLParser(target=RaisingBuilder()) 3377 # cET also checks for 'close' and 'doctype', PyET does it only at need 3378 for event in ('start', 'data', 'end', 'comment', 'pi'): 3379 with self.assertRaisesRegex(ValueError, event): 3380 ET.XMLParser(target=RaisingBuilder(event)) 3381 3382 ET.XMLParser(target=RaisingBuilder(what=AttributeError)) 3383 for event in ('start', 'data', 'end', 'comment', 'pi'): 3384 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError)) 3385 parser.feed(self.sample1) 3386 self.assertIsNone(parser.close()) 3387 3388 3389class XMLParserTest(unittest.TestCase): 3390 sample1 = b'<file><line>22</line></file>' 3391 sample2 = (b'<!DOCTYPE html PUBLIC' 3392 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3393 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3394 b'<html>text</html>') 3395 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n' 3396 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>') 3397 3398 def _check_sample_element(self, e): 3399 self.assertEqual(e.tag, 'file') 3400 self.assertEqual(e[0].tag, 'line') 3401 self.assertEqual(e[0].text, '22') 3402 3403 def test_constructor_args(self): 3404 parser2 = ET.XMLParser(encoding='utf-8', 3405 target=ET.TreeBuilder()) 3406 parser2.feed(self.sample1) 3407 self._check_sample_element(parser2.close()) 3408 3409 def test_subclass(self): 3410 class MyParser(ET.XMLParser): 3411 pass 3412 parser = MyParser() 3413 parser.feed(self.sample1) 3414 self._check_sample_element(parser.close()) 3415 3416 def test_doctype_warning(self): 3417 with warnings.catch_warnings(): 3418 warnings.simplefilter('error', DeprecationWarning) 3419 parser = ET.XMLParser() 3420 parser.feed(self.sample2) 3421 parser.close() 3422 3423 def test_subclass_doctype(self): 3424 _doctype = None 3425 class MyParserWithDoctype(ET.XMLParser): 3426 def doctype(self, *args, **kwargs): 3427 nonlocal _doctype 3428 _doctype = (args, kwargs) 3429 3430 parser = MyParserWithDoctype() 3431 with self.assertWarnsRegex(RuntimeWarning, 'doctype'): 3432 parser.feed(self.sample2) 3433 parser.close() 3434 self.assertIsNone(_doctype) 3435 3436 _doctype = _doctype2 = None 3437 with warnings.catch_warnings(): 3438 warnings.simplefilter('error', DeprecationWarning) 3439 warnings.simplefilter('error', RuntimeWarning) 3440 class DoctypeParser: 3441 def doctype(self, name, pubid, system): 3442 nonlocal _doctype2 3443 _doctype2 = (name, pubid, system) 3444 3445 parser = MyParserWithDoctype(target=DoctypeParser()) 3446 parser.feed(self.sample2) 3447 parser.close() 3448 self.assertIsNone(_doctype) 3449 self.assertEqual(_doctype2, 3450 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3451 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3452 3453 def test_inherited_doctype(self): 3454 '''Ensure that ordinary usage is not deprecated (Issue 19176)''' 3455 with warnings.catch_warnings(): 3456 warnings.simplefilter('error', DeprecationWarning) 3457 warnings.simplefilter('error', RuntimeWarning) 3458 class MyParserWithoutDoctype(ET.XMLParser): 3459 pass 3460 parser = MyParserWithoutDoctype() 3461 parser.feed(self.sample2) 3462 parser.close() 3463 3464 def test_parse_string(self): 3465 parser = ET.XMLParser(target=ET.TreeBuilder()) 3466 parser.feed(self.sample3) 3467 e = parser.close() 3468 self.assertEqual(e.tag, 'money') 3469 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') 3470 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') 3471 3472 3473class NamespaceParseTest(unittest.TestCase): 3474 def test_find_with_namespace(self): 3475 nsmap = {'h': 'hello', 'f': 'foo'} 3476 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 3477 3478 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 3479 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 3480 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 3481 3482 3483class ElementSlicingTest(unittest.TestCase): 3484 def _elem_tags(self, elemlist): 3485 return [e.tag for e in elemlist] 3486 3487 def _subelem_tags(self, elem): 3488 return self._elem_tags(list(elem)) 3489 3490 def _make_elem_with_children(self, numchildren): 3491 """Create an Element with a tag 'a', with the given amount of children 3492 named 'a0', 'a1' ... and so on. 3493 3494 """ 3495 e = ET.Element('a') 3496 for i in range(numchildren): 3497 ET.SubElement(e, 'a%s' % i) 3498 return e 3499 3500 def test_getslice_single_index(self): 3501 e = self._make_elem_with_children(10) 3502 3503 self.assertEqual(e[1].tag, 'a1') 3504 self.assertEqual(e[-2].tag, 'a8') 3505 3506 self.assertRaises(IndexError, lambda: e[12]) 3507 self.assertRaises(IndexError, lambda: e[-12]) 3508 3509 def test_getslice_range(self): 3510 e = self._make_elem_with_children(6) 3511 3512 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 3513 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 3514 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 3515 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 3516 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 3517 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 3518 3519 def test_getslice_steps(self): 3520 e = self._make_elem_with_children(10) 3521 3522 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 3523 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 3524 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 3525 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 3526 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 3527 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 3528 3529 def test_getslice_negative_steps(self): 3530 e = self._make_elem_with_children(4) 3531 3532 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 3533 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 3534 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 3535 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 3536 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 3537 3538 def test_delslice(self): 3539 e = self._make_elem_with_children(4) 3540 del e[0:2] 3541 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 3542 3543 e = self._make_elem_with_children(4) 3544 del e[0:] 3545 self.assertEqual(self._subelem_tags(e), []) 3546 3547 e = self._make_elem_with_children(4) 3548 del e[::-1] 3549 self.assertEqual(self._subelem_tags(e), []) 3550 3551 e = self._make_elem_with_children(4) 3552 del e[::-2] 3553 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3554 3555 e = self._make_elem_with_children(4) 3556 del e[1::2] 3557 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3558 3559 e = self._make_elem_with_children(2) 3560 del e[::2] 3561 self.assertEqual(self._subelem_tags(e), ['a1']) 3562 3563 def test_setslice_single_index(self): 3564 e = self._make_elem_with_children(4) 3565 e[1] = ET.Element('b') 3566 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3567 3568 e[-2] = ET.Element('c') 3569 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3570 3571 with self.assertRaises(IndexError): 3572 e[5] = ET.Element('d') 3573 with self.assertRaises(IndexError): 3574 e[-5] = ET.Element('d') 3575 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3576 3577 def test_setslice_range(self): 3578 e = self._make_elem_with_children(4) 3579 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 3580 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 3581 3582 e = self._make_elem_with_children(4) 3583 e[1:3] = [ET.Element('b')] 3584 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 3585 3586 e = self._make_elem_with_children(4) 3587 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 3588 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 3589 3590 def test_setslice_steps(self): 3591 e = self._make_elem_with_children(6) 3592 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 3593 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 3594 3595 e = self._make_elem_with_children(6) 3596 with self.assertRaises(ValueError): 3597 e[1:5:2] = [ET.Element('b')] 3598 with self.assertRaises(ValueError): 3599 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 3600 with self.assertRaises(ValueError): 3601 e[1:5:2] = [] 3602 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 3603 3604 e = self._make_elem_with_children(4) 3605 e[1::sys.maxsize] = [ET.Element('b')] 3606 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3607 e[1::sys.maxsize<<64] = [ET.Element('c')] 3608 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3609 3610 def test_setslice_negative_steps(self): 3611 e = self._make_elem_with_children(4) 3612 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 3613 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 3614 3615 e = self._make_elem_with_children(4) 3616 with self.assertRaises(ValueError): 3617 e[2:0:-1] = [ET.Element('b')] 3618 with self.assertRaises(ValueError): 3619 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 3620 with self.assertRaises(ValueError): 3621 e[2:0:-1] = [] 3622 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 3623 3624 e = self._make_elem_with_children(4) 3625 e[1::-sys.maxsize] = [ET.Element('b')] 3626 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3627 e[1::-sys.maxsize-1] = [ET.Element('c')] 3628 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3629 e[1::-sys.maxsize<<64] = [ET.Element('d')] 3630 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 3631 3632 3633class IOTest(unittest.TestCase): 3634 def test_encoding(self): 3635 # Test encoding issues. 3636 elem = ET.Element("tag") 3637 elem.text = "abc" 3638 self.assertEqual(serialize(elem), '<tag>abc</tag>') 3639 for enc in ("utf-8", "us-ascii"): 3640 with self.subTest(enc): 3641 self.assertEqual(serialize(elem, encoding=enc), 3642 b'<tag>abc</tag>') 3643 self.assertEqual(serialize(elem, encoding=enc.upper()), 3644 b'<tag>abc</tag>') 3645 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3646 with self.subTest(enc): 3647 self.assertEqual(serialize(elem, encoding=enc), 3648 ("<?xml version='1.0' encoding='%s'?>\n" 3649 "<tag>abc</tag>" % enc).encode(enc)) 3650 upper = enc.upper() 3651 self.assertEqual(serialize(elem, encoding=upper), 3652 ("<?xml version='1.0' encoding='%s'?>\n" 3653 "<tag>abc</tag>" % upper).encode(enc)) 3654 3655 elem = ET.Element("tag") 3656 elem.text = "<&\"\'>" 3657 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 3658 self.assertEqual(serialize(elem, encoding="utf-8"), 3659 b'<tag><&"\'></tag>') 3660 self.assertEqual(serialize(elem, encoding="us-ascii"), 3661 b'<tag><&"\'></tag>') 3662 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3663 self.assertEqual(serialize(elem, encoding=enc), 3664 ("<?xml version='1.0' encoding='%s'?>\n" 3665 "<tag><&\"'></tag>" % enc).encode(enc)) 3666 3667 elem = ET.Element("tag") 3668 elem.attrib["key"] = "<&\"\'>" 3669 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 3670 self.assertEqual(serialize(elem, encoding="utf-8"), 3671 b'<tag key="<&"\'>" />') 3672 self.assertEqual(serialize(elem, encoding="us-ascii"), 3673 b'<tag key="<&"\'>" />') 3674 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3675 self.assertEqual(serialize(elem, encoding=enc), 3676 ("<?xml version='1.0' encoding='%s'?>\n" 3677 "<tag key=\"<&"'>\" />" % enc).encode(enc)) 3678 3679 elem = ET.Element("tag") 3680 elem.text = '\xe5\xf6\xf6<>' 3681 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>') 3682 self.assertEqual(serialize(elem, encoding="utf-8"), 3683 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 3684 self.assertEqual(serialize(elem, encoding="us-ascii"), 3685 b'<tag>åöö<></tag>') 3686 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3687 self.assertEqual(serialize(elem, encoding=enc), 3688 ("<?xml version='1.0' encoding='%s'?>\n" 3689 "<tag>åöö<></tag>" % enc).encode(enc)) 3690 3691 elem = ET.Element("tag") 3692 elem.attrib["key"] = '\xe5\xf6\xf6<>' 3693 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />') 3694 self.assertEqual(serialize(elem, encoding="utf-8"), 3695 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 3696 self.assertEqual(serialize(elem, encoding="us-ascii"), 3697 b'<tag key="åöö<>" />') 3698 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"): 3699 self.assertEqual(serialize(elem, encoding=enc), 3700 ("<?xml version='1.0' encoding='%s'?>\n" 3701 "<tag key=\"åöö<>\" />" % enc).encode(enc)) 3702 3703 def test_write_to_filename(self): 3704 self.addCleanup(os_helper.unlink, TESTFN) 3705 tree = ET.ElementTree(ET.XML('''<site />''')) 3706 tree.write(TESTFN) 3707 with open(TESTFN, 'rb') as f: 3708 self.assertEqual(f.read(), b'''<site />''') 3709 3710 def test_write_to_text_file(self): 3711 self.addCleanup(os_helper.unlink, TESTFN) 3712 tree = ET.ElementTree(ET.XML('''<site />''')) 3713 with open(TESTFN, 'w', encoding='utf-8') as f: 3714 tree.write(f, encoding='unicode') 3715 self.assertFalse(f.closed) 3716 with open(TESTFN, 'rb') as f: 3717 self.assertEqual(f.read(), b'''<site />''') 3718 3719 def test_write_to_binary_file(self): 3720 self.addCleanup(os_helper.unlink, TESTFN) 3721 tree = ET.ElementTree(ET.XML('''<site />''')) 3722 with open(TESTFN, 'wb') as f: 3723 tree.write(f) 3724 self.assertFalse(f.closed) 3725 with open(TESTFN, 'rb') as f: 3726 self.assertEqual(f.read(), b'''<site />''') 3727 3728 def test_write_to_binary_file_with_bom(self): 3729 self.addCleanup(os_helper.unlink, TESTFN) 3730 tree = ET.ElementTree(ET.XML('''<site />''')) 3731 # test BOM writing to buffered file 3732 with open(TESTFN, 'wb') as f: 3733 tree.write(f, encoding='utf-16') 3734 self.assertFalse(f.closed) 3735 with open(TESTFN, 'rb') as f: 3736 self.assertEqual(f.read(), 3737 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3738 '''<site />'''.encode("utf-16")) 3739 # test BOM writing to non-buffered file 3740 with open(TESTFN, 'wb', buffering=0) as f: 3741 tree.write(f, encoding='utf-16') 3742 self.assertFalse(f.closed) 3743 with open(TESTFN, 'rb') as f: 3744 self.assertEqual(f.read(), 3745 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3746 '''<site />'''.encode("utf-16")) 3747 3748 def test_read_from_stringio(self): 3749 tree = ET.ElementTree() 3750 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3751 tree.parse(stream) 3752 self.assertEqual(tree.getroot().tag, 'site') 3753 3754 def test_write_to_stringio(self): 3755 tree = ET.ElementTree(ET.XML('''<site />''')) 3756 stream = io.StringIO() 3757 tree.write(stream, encoding='unicode') 3758 self.assertEqual(stream.getvalue(), '''<site />''') 3759 3760 def test_read_from_bytesio(self): 3761 tree = ET.ElementTree() 3762 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3763 tree.parse(raw) 3764 self.assertEqual(tree.getroot().tag, 'site') 3765 3766 def test_write_to_bytesio(self): 3767 tree = ET.ElementTree(ET.XML('''<site />''')) 3768 raw = io.BytesIO() 3769 tree.write(raw) 3770 self.assertEqual(raw.getvalue(), b'''<site />''') 3771 3772 class dummy: 3773 pass 3774 3775 def test_read_from_user_text_reader(self): 3776 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3777 reader = self.dummy() 3778 reader.read = stream.read 3779 tree = ET.ElementTree() 3780 tree.parse(reader) 3781 self.assertEqual(tree.getroot().tag, 'site') 3782 3783 def test_write_to_user_text_writer(self): 3784 tree = ET.ElementTree(ET.XML('''<site />''')) 3785 stream = io.StringIO() 3786 writer = self.dummy() 3787 writer.write = stream.write 3788 tree.write(writer, encoding='unicode') 3789 self.assertEqual(stream.getvalue(), '''<site />''') 3790 3791 def test_read_from_user_binary_reader(self): 3792 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3793 reader = self.dummy() 3794 reader.read = raw.read 3795 tree = ET.ElementTree() 3796 tree.parse(reader) 3797 self.assertEqual(tree.getroot().tag, 'site') 3798 tree = ET.ElementTree() 3799 3800 def test_write_to_user_binary_writer(self): 3801 tree = ET.ElementTree(ET.XML('''<site />''')) 3802 raw = io.BytesIO() 3803 writer = self.dummy() 3804 writer.write = raw.write 3805 tree.write(writer) 3806 self.assertEqual(raw.getvalue(), b'''<site />''') 3807 3808 def test_write_to_user_binary_writer_with_bom(self): 3809 tree = ET.ElementTree(ET.XML('''<site />''')) 3810 raw = io.BytesIO() 3811 writer = self.dummy() 3812 writer.write = raw.write 3813 writer.seekable = lambda: True 3814 writer.tell = raw.tell 3815 tree.write(writer, encoding="utf-16") 3816 self.assertEqual(raw.getvalue(), 3817 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3818 '''<site />'''.encode("utf-16")) 3819 3820 def test_tostringlist_invariant(self): 3821 root = ET.fromstring('<tag>foo</tag>') 3822 self.assertEqual( 3823 ET.tostring(root, 'unicode'), 3824 ''.join(ET.tostringlist(root, 'unicode'))) 3825 self.assertEqual( 3826 ET.tostring(root, 'utf-16'), 3827 b''.join(ET.tostringlist(root, 'utf-16'))) 3828 3829 def test_short_empty_elements(self): 3830 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>') 3831 self.assertEqual( 3832 ET.tostring(root, 'unicode'), 3833 '<tag>a<x />b<y />c</tag>') 3834 self.assertEqual( 3835 ET.tostring(root, 'unicode', short_empty_elements=True), 3836 '<tag>a<x />b<y />c</tag>') 3837 self.assertEqual( 3838 ET.tostring(root, 'unicode', short_empty_elements=False), 3839 '<tag>a<x></x>b<y></y>c</tag>') 3840 3841 3842class ParseErrorTest(unittest.TestCase): 3843 def test_subclass(self): 3844 self.assertIsInstance(ET.ParseError(), SyntaxError) 3845 3846 def _get_error(self, s): 3847 try: 3848 ET.fromstring(s) 3849 except ET.ParseError as e: 3850 return e 3851 3852 def test_error_position(self): 3853 self.assertEqual(self._get_error('foo').position, (1, 0)) 3854 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 3855 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 3856 3857 def test_error_code(self): 3858 import xml.parsers.expat.errors as ERRORS 3859 self.assertEqual(self._get_error('foo').code, 3860 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) 3861 3862 3863class KeywordArgsTest(unittest.TestCase): 3864 # Test various issues with keyword arguments passed to ET.Element 3865 # constructor and methods 3866 def test_issue14818(self): 3867 x = ET.XML("<a>foo</a>") 3868 self.assertEqual(x.find('a', None), 3869 x.find(path='a', namespaces=None)) 3870 self.assertEqual(x.findtext('a', None, None), 3871 x.findtext(path='a', default=None, namespaces=None)) 3872 self.assertEqual(x.findall('a', None), 3873 x.findall(path='a', namespaces=None)) 3874 self.assertEqual(list(x.iterfind('a', None)), 3875 list(x.iterfind(path='a', namespaces=None))) 3876 3877 self.assertEqual(ET.Element('a').attrib, {}) 3878 elements = [ 3879 ET.Element('a', dict(href="#", id="foo")), 3880 ET.Element('a', attrib=dict(href="#", id="foo")), 3881 ET.Element('a', dict(href="#"), id="foo"), 3882 ET.Element('a', href="#", id="foo"), 3883 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 3884 ] 3885 for e in elements: 3886 self.assertEqual(e.tag, 'a') 3887 self.assertEqual(e.attrib, dict(href="#", id="foo")) 3888 3889 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 3890 self.assertEqual(e2.attrib['key1'], 'value1') 3891 3892 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3893 ET.Element('a', "I'm not a dict") 3894 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 3895 ET.Element('a', attrib="I'm not a dict") 3896 3897# -------------------------------------------------------------------- 3898 3899class NoAcceleratorTest(unittest.TestCase): 3900 def setUp(self): 3901 if not pyET: 3902 raise unittest.SkipTest('only for the Python version') 3903 3904 # Test that the C accelerator was not imported for pyET 3905 def test_correct_import_pyET(self): 3906 # The type of methods defined in Python code is types.FunctionType, 3907 # while the type of methods defined inside _elementtree is 3908 # <class 'wrapper_descriptor'> 3909 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 3910 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 3911 3912 3913# -------------------------------------------------------------------- 3914 3915def c14n_roundtrip(xml, **options): 3916 return pyET.canonicalize(xml, **options) 3917 3918 3919class C14NTest(unittest.TestCase): 3920 maxDiff = None 3921 3922 # 3923 # simple roundtrip tests (from c14n.py) 3924 3925 def test_simple_roundtrip(self): 3926 # Basics 3927 self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>') 3928 self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME 3929 '<doc xmlns="uri"></doc>') 3930 self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"), 3931 '<prefix:doc xmlns:prefix="uri"></prefix:doc>') 3932 self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"), 3933 '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>') 3934 self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"), 3935 '<elem></elem>') 3936 3937 # C14N spec 3938 self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"), 3939 '<doc>Hello, world!</doc>') 3940 self.assertEqual(c14n_roundtrip("<value>2</value>"), 3941 '<value>2</value>') 3942 self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'), 3943 '<compute>value>"0" && value<"10" ?"valid":"error"</compute>') 3944 self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''), 3945 '<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>') 3946 self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"), 3947 '<norm attr=" \' 
	 \' "></norm>') 3948 self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"), 3949 '<normNames attr=" A 
	 B "></normNames>') 3950 self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"), 3951 '<normId id=" \' 
	 \' "></normId>') 3952 3953 # fragments from PJ's tests 3954 #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"), 3955 #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>') 3956 3957 # Namespace issues 3958 xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>' 3959 self.assertEqual(c14n_roundtrip(xml), xml) 3960 xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>' 3961 self.assertEqual(c14n_roundtrip(xml), xml) 3962 xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>' 3963 self.assertEqual(c14n_roundtrip(xml), xml) 3964 3965 def test_c14n_exclusion(self): 3966 xml = textwrap.dedent("""\ 3967 <root xmlns:x="http://example.com/x"> 3968 <a x:attr="attrx"> 3969 <b>abtext</b> 3970 </a> 3971 <b>btext</b> 3972 <c> 3973 <x:d>dtext</x:d> 3974 </c> 3975 </root> 3976 """) 3977 self.assertEqual( 3978 c14n_roundtrip(xml, strip_text=True), 3979 '<root>' 3980 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 3981 '<b>btext</b>' 3982 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 3983 '</root>') 3984 self.assertEqual( 3985 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']), 3986 '<root>' 3987 '<a><b>abtext</b></a>' 3988 '<b>btext</b>' 3989 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 3990 '</root>') 3991 self.assertEqual( 3992 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']), 3993 '<root>' 3994 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 3995 '<b>btext</b>' 3996 '<c></c>' 3997 '</root>') 3998 self.assertEqual( 3999 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'], 4000 exclude_tags=['{http://example.com/x}d']), 4001 '<root>' 4002 '<a><b>abtext</b></a>' 4003 '<b>btext</b>' 4004 '<c></c>' 4005 '</root>') 4006 self.assertEqual( 4007 c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']), 4008 '<root>' 4009 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4010 '</root>') 4011 self.assertEqual( 4012 c14n_roundtrip(xml, exclude_tags=['a', 'b']), 4013 '<root>\n' 4014 ' \n' 4015 ' \n' 4016 ' <c>\n' 4017 ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n' 4018 ' </c>\n' 4019 '</root>') 4020 self.assertEqual( 4021 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']), 4022 '<root>' 4023 '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>' 4024 '<c></c>' 4025 '</root>') 4026 self.assertEqual( 4027 c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']), 4028 '<root>\n' 4029 ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n' 4030 ' \n' 4031 ' </a>\n' 4032 ' \n' 4033 ' <c>\n' 4034 ' \n' 4035 ' </c>\n' 4036 '</root>') 4037 4038 # 4039 # basic method=c14n tests from the c14n 2.0 specification. uses 4040 # test files under xmltestdata/c14n-20. 4041 4042 # note that this uses generated C14N versions of the standard ET.write 4043 # output, not roundtripped C14N (see above). 4044 4045 def test_xml_c14n2(self): 4046 datadir = findfile("c14n-20", subdir="xmltestdata") 4047 full_path = partial(os.path.join, datadir) 4048 4049 files = [filename[:-4] for filename in sorted(os.listdir(datadir)) 4050 if filename.endswith('.xml')] 4051 input_files = [ 4052 filename for filename in files 4053 if filename.startswith('in') 4054 ] 4055 configs = { 4056 filename: { 4057 # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> 4058 option.tag.split('}')[-1]: ((option.text or '').strip(), option) 4059 for option in ET.parse(full_path(filename) + ".xml").getroot() 4060 } 4061 for filename in files 4062 if filename.startswith('c14n') 4063 } 4064 4065 tests = { 4066 input_file: [ 4067 (filename, configs[filename.rsplit('_', 1)[-1]]) 4068 for filename in files 4069 if filename.startswith(f'out_{input_file}_') 4070 and filename.rsplit('_', 1)[-1] in configs 4071 ] 4072 for input_file in input_files 4073 } 4074 4075 # Make sure we found all test cases. 4076 self.assertEqual(30, len([ 4077 output_file for output_files in tests.values() 4078 for output_file in output_files])) 4079 4080 def get_option(config, option_name, default=None): 4081 return config.get(option_name, (default, ()))[0] 4082 4083 for input_file, output_files in tests.items(): 4084 for output_file, config in output_files: 4085 keep_comments = get_option( 4086 config, 'IgnoreComments') == 'true' # no, it's right :) 4087 strip_text = get_option( 4088 config, 'TrimTextNodes') == 'true' 4089 rewrite_prefixes = get_option( 4090 config, 'PrefixRewrite') == 'sequential' 4091 if 'QNameAware' in config: 4092 qattrs = [ 4093 f"{{{el.get('NS')}}}{el.get('Name')}" 4094 for el in config['QNameAware'][1].findall( 4095 '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') 4096 ] 4097 qtags = [ 4098 f"{{{el.get('NS')}}}{el.get('Name')}" 4099 for el in config['QNameAware'][1].findall( 4100 '{http://www.w3.org/2010/xml-c14n2}Element') 4101 ] 4102 else: 4103 qtags = qattrs = None 4104 4105 # Build subtest description from config. 4106 config_descr = ','.join( 4107 f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}" 4108 for name, (value, children) in sorted(config.items()) 4109 ) 4110 4111 with self.subTest(f"{output_file}({config_descr})"): 4112 if input_file == 'inNsRedecl' and not rewrite_prefixes: 4113 self.skipTest( 4114 f"Redeclared namespace handling is not supported in {output_file}") 4115 if input_file == 'inNsSuperfluous' and not rewrite_prefixes: 4116 self.skipTest( 4117 f"Redeclared namespace handling is not supported in {output_file}") 4118 if 'QNameAware' in config and config['QNameAware'][1].find( 4119 '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: 4120 self.skipTest( 4121 f"QName rewriting in XPath text is not supported in {output_file}") 4122 4123 f = full_path(input_file + ".xml") 4124 if input_file == 'inC14N5': 4125 # Hack: avoid setting up external entity resolution in the parser. 4126 with open(full_path('world.txt'), 'rb') as entity_file: 4127 with open(f, 'rb') as f: 4128 f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read())) 4129 4130 text = ET.canonicalize( 4131 from_file=f, 4132 with_comments=keep_comments, 4133 strip_text=strip_text, 4134 rewrite_prefixes=rewrite_prefixes, 4135 qname_aware_tags=qtags, qname_aware_attrs=qattrs) 4136 4137 with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: 4138 expected = f.read() 4139 if input_file == 'inC14N3': 4140 # FIXME: cET resolves default attributes but ET does not! 4141 expected = expected.replace(' attr="default"', '') 4142 text = text.replace(' attr="default"', '') 4143 self.assertEqual(expected, text) 4144 4145# -------------------------------------------------------------------- 4146 4147 4148def test_main(module=None): 4149 # When invoked without a module, runs the Python ET tests by loading pyET. 4150 # Otherwise, uses the given module as the ET. 4151 global pyET 4152 pyET = import_fresh_module('xml.etree.ElementTree', 4153 blocked=['_elementtree']) 4154 if module is None: 4155 module = pyET 4156 4157 global ET 4158 ET = module 4159 4160 test_classes = [ 4161 ModuleTest, 4162 ElementSlicingTest, 4163 BasicElementTest, 4164 BadElementTest, 4165 BadElementPathTest, 4166 ElementTreeTest, 4167 IOTest, 4168 ParseErrorTest, 4169 XIncludeTest, 4170 ElementTreeTypeTest, 4171 ElementFindTest, 4172 ElementIterTest, 4173 TreeBuilderTest, 4174 XMLParserTest, 4175 XMLPullParserTest, 4176 BugsTest, 4177 KeywordArgsTest, 4178 C14NTest, 4179 ] 4180 4181 # These tests will only run for the pure-Python version that doesn't import 4182 # _elementtree. We can't use skipUnless here, because pyET is filled in only 4183 # after the module is loaded. 4184 if pyET is not ET: 4185 test_classes.extend([ 4186 NoAcceleratorTest, 4187 ]) 4188 4189 # Provide default namespace mapping and path cache. 4190 from xml.etree import ElementPath 4191 nsmap = ET.register_namespace._namespace_map 4192 # Copy the default namespace mapping 4193 nsmap_copy = nsmap.copy() 4194 # Copy the path cache (should be empty) 4195 path_cache = ElementPath._cache 4196 ElementPath._cache = path_cache.copy() 4197 # Align the Comment/PI factories. 4198 if hasattr(ET, '_set_factories'): 4199 old_factories = ET._set_factories(ET.Comment, ET.PI) 4200 else: 4201 old_factories = None 4202 4203 try: 4204 support.run_unittest(*test_classes) 4205 finally: 4206 from xml.etree import ElementPath 4207 # Restore mapping and path cache 4208 nsmap.clear() 4209 nsmap.update(nsmap_copy) 4210 ElementPath._cache = path_cache 4211 if old_factories is not None: 4212 ET._set_factories(*old_factories) 4213 # don't interfere with subsequent tests 4214 ET = pyET = None 4215 4216 4217if __name__ == '__main__': 4218 test_main() 4219