1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order 2# to ensure consistency between the C implementation and the Python 3# implementation. 4# 5# For this purpose, the module-level "ET" symbol is temporarily 6# monkey-patched when running the "test_xml_etree_c" test suite. 7 8import copy 9import functools 10import html 11import io 12import itertools 13import operator 14import os 15import pickle 16import pyexpat 17import sys 18import textwrap 19import types 20import unittest 21import warnings 22import weakref 23 24from functools import partial 25from itertools import product, islice 26from test import support 27from test.support import os_helper 28from test.support import warnings_helper 29from test.support import findfile, gc_collect, swap_attr, swap_item 30from test.support.import_helper import import_fresh_module 31from test.support.os_helper import TESTFN 32 33 34# pyET is the pure-Python implementation. 35# 36# ET is pyET in test_xml_etree and is the C accelerated version in 37# test_xml_etree_c. 38pyET = None 39ET = None 40 41SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata") 42try: 43 SIMPLE_XMLFILE.encode("utf-8") 44except UnicodeEncodeError: 45 raise unittest.SkipTest("filename is not encodable to utf8") 46SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata") 47UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") 48 49SAMPLE_XML = """\ 50<body> 51 <tag class='a'>text</tag> 52 <tag class='b' /> 53 <section> 54 <tag class='b' id='inner'>subtext</tag> 55 </section> 56</body> 57""" 58 59SAMPLE_SECTION = """\ 60<section> 61 <tag class='b' id='inner'>subtext</tag> 62 <nexttag /> 63 <nextsection> 64 <tag /> 65 </nextsection> 66</section> 67""" 68 69SAMPLE_XML_NS = """ 70<body xmlns="http://effbot.org/ns"> 71 <tag>text</tag> 72 <tag /> 73 <section> 74 <tag>subtext</tag> 75 </section> 76</body> 77""" 78 79SAMPLE_XML_NS_ELEMS = """ 80<root> 81<h:table xmlns:h="hello"> 82 <h:tr> 83 <h:td>Apples</h:td> 84 <h:td>Bananas</h:td> 85 </h:tr> 86</h:table> 87 88<f:table xmlns:f="foo"> 89 <f:name>African Coffee Table</f:name> 90 <f:width>80</f:width> 91 <f:length>120</f:length> 92</f:table> 93</root> 94""" 95 96ENTITY_XML = """\ 97<!DOCTYPE points [ 98<!ENTITY % user-entities SYSTEM 'user-entities.xml'> 99%user-entities; 100]> 101<document>&entity;</document> 102""" 103 104EXTERNAL_ENTITY_XML = """\ 105<!DOCTYPE points [ 106<!ENTITY entity SYSTEM "file:///non-existing-file.xml"> 107]> 108<document>&entity;</document> 109""" 110 111ATTLIST_XML = """\ 112<?xml version="1.0" encoding="UTF-8"?> 113<!DOCTYPE Foo [ 114<!ELEMENT foo (bar*)> 115<!ELEMENT bar (#PCDATA)*> 116<!ATTLIST bar xml:lang CDATA "eng"> 117<!ENTITY qux "quux"> 118]> 119<foo> 120<bar>&qux;</bar> 121</foo> 122""" 123 124def checkwarnings(*filters, quiet=False): 125 def decorator(test): 126 def newtest(*args, **kwargs): 127 with warnings_helper.check_warnings(*filters, quiet=quiet): 128 test(*args, **kwargs) 129 functools.update_wrapper(newtest, test) 130 return newtest 131 return decorator 132 133def convlinesep(data): 134 return data.replace(b'\n', os.linesep.encode()) 135 136 137class ModuleTest(unittest.TestCase): 138 def test_sanity(self): 139 # Import sanity. 140 141 from xml.etree import ElementTree 142 from xml.etree import ElementInclude 143 from xml.etree import ElementPath 144 145 def test_all(self): 146 names = ("xml.etree.ElementTree", "_elementtree") 147 support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",)) 148 149 150def serialize(elem, to_string=True, encoding='unicode', **options): 151 if encoding != 'unicode': 152 file = io.BytesIO() 153 else: 154 file = io.StringIO() 155 tree = ET.ElementTree(elem) 156 tree.write(file, encoding=encoding, **options) 157 if to_string: 158 return file.getvalue() 159 else: 160 file.seek(0) 161 return file 162 163def summarize_list(seq): 164 return [elem.tag for elem in seq] 165 166 167class ElementTestCase: 168 @classmethod 169 def setUpClass(cls): 170 cls.modules = {pyET, ET} 171 172 def pickleRoundTrip(self, obj, name, dumper, loader, proto): 173 try: 174 with swap_item(sys.modules, name, dumper): 175 temp = pickle.dumps(obj, proto) 176 with swap_item(sys.modules, name, loader): 177 result = pickle.loads(temp) 178 except pickle.PicklingError as pe: 179 # pyET must be second, because pyET may be (equal to) ET. 180 human = dict([(ET, "cET"), (pyET, "pyET")]) 181 raise support.TestFailed("Failed to round-trip %r from %r to %r" 182 % (obj, 183 human.get(dumper, dumper), 184 human.get(loader, loader))) from pe 185 return result 186 187 def assertEqualElements(self, alice, bob): 188 self.assertIsInstance(alice, (ET.Element, pyET.Element)) 189 self.assertIsInstance(bob, (ET.Element, pyET.Element)) 190 self.assertEqual(len(list(alice)), len(list(bob))) 191 for x, y in zip(alice, bob): 192 self.assertEqualElements(x, y) 193 properties = operator.attrgetter('tag', 'tail', 'text', 'attrib') 194 self.assertEqual(properties(alice), properties(bob)) 195 196# -------------------------------------------------------------------- 197# element tree tests 198 199class ElementTreeTest(unittest.TestCase): 200 201 def serialize_check(self, elem, expected): 202 self.assertEqual(serialize(elem), expected) 203 204 def test_interface(self): 205 # Test element tree interface. 206 207 def check_element(element): 208 self.assertTrue(ET.iselement(element), msg="not an element") 209 direlem = dir(element) 210 for attr in 'tag', 'attrib', 'text', 'tail': 211 self.assertTrue(hasattr(element, attr), 212 msg='no %s member' % attr) 213 self.assertIn(attr, direlem, 214 msg='no %s visible by dir' % attr) 215 216 self.assertIsInstance(element.tag, str) 217 self.assertIsInstance(element.attrib, dict) 218 if element.text is not None: 219 self.assertIsInstance(element.text, str) 220 if element.tail is not None: 221 self.assertIsInstance(element.tail, str) 222 for elem in element: 223 check_element(elem) 224 225 element = ET.Element("tag") 226 check_element(element) 227 tree = ET.ElementTree(element) 228 check_element(tree.getroot()) 229 element = ET.Element("t\xe4g", key="value") 230 tree = ET.ElementTree(element) 231 self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$") 232 element = ET.Element("tag", key="value") 233 234 # Make sure all standard element methods exist. 235 236 def check_method(method): 237 self.assertTrue(hasattr(method, '__call__'), 238 msg="%s not callable" % method) 239 240 check_method(element.append) 241 check_method(element.extend) 242 check_method(element.insert) 243 check_method(element.remove) 244 check_method(element.find) 245 check_method(element.iterfind) 246 check_method(element.findall) 247 check_method(element.findtext) 248 check_method(element.clear) 249 check_method(element.get) 250 check_method(element.set) 251 check_method(element.keys) 252 check_method(element.items) 253 check_method(element.iter) 254 check_method(element.itertext) 255 256 # These methods return an iterable. See bug 6472. 257 258 def check_iter(it): 259 check_method(it.__next__) 260 261 check_iter(element.iterfind("tag")) 262 check_iter(element.iterfind("*")) 263 check_iter(tree.iterfind("tag")) 264 check_iter(tree.iterfind("*")) 265 266 # These aliases are provided: 267 268 self.assertEqual(ET.XML, ET.fromstring) 269 self.assertEqual(ET.PI, ET.ProcessingInstruction) 270 271 def test_set_attribute(self): 272 element = ET.Element('tag') 273 274 self.assertEqual(element.tag, 'tag') 275 element.tag = 'Tag' 276 self.assertEqual(element.tag, 'Tag') 277 element.tag = 'TAG' 278 self.assertEqual(element.tag, 'TAG') 279 280 self.assertIsNone(element.text) 281 element.text = 'Text' 282 self.assertEqual(element.text, 'Text') 283 element.text = 'TEXT' 284 self.assertEqual(element.text, 'TEXT') 285 286 self.assertIsNone(element.tail) 287 element.tail = 'Tail' 288 self.assertEqual(element.tail, 'Tail') 289 element.tail = 'TAIL' 290 self.assertEqual(element.tail, 'TAIL') 291 292 self.assertEqual(element.attrib, {}) 293 element.attrib = {'a': 'b', 'c': 'd'} 294 self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'}) 295 element.attrib = {'A': 'B', 'C': 'D'} 296 self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) 297 298 def test_simpleops(self): 299 # Basic method sanity checks. 300 301 elem = ET.XML("<body><tag/></body>") 302 self.serialize_check(elem, '<body><tag /></body>') 303 e = ET.Element("tag2") 304 elem.append(e) 305 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 306 elem.remove(e) 307 self.serialize_check(elem, '<body><tag /></body>') 308 elem.insert(0, e) 309 self.serialize_check(elem, '<body><tag2 /><tag /></body>') 310 elem.remove(e) 311 elem.extend([e]) 312 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 313 elem.remove(e) 314 elem.extend(iter([e])) 315 self.serialize_check(elem, '<body><tag /><tag2 /></body>') 316 elem.remove(e) 317 318 element = ET.Element("tag", key="value") 319 self.serialize_check(element, '<tag key="value" />') # 1 320 subelement = ET.Element("subtag") 321 element.append(subelement) 322 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2 323 element.insert(0, subelement) 324 self.serialize_check(element, 325 '<tag key="value"><subtag /><subtag /></tag>') # 3 326 element.remove(subelement) 327 self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4 328 element.remove(subelement) 329 self.serialize_check(element, '<tag key="value" />') # 5 330 with self.assertRaises(ValueError) as cm: 331 element.remove(subelement) 332 self.assertEqual(str(cm.exception), 'list.remove(x): x not in list') 333 self.serialize_check(element, '<tag key="value" />') # 6 334 element[0:0] = [subelement, subelement, subelement] 335 self.serialize_check(element[1], '<subtag />') 336 self.assertEqual(element[1:9], [element[1], element[2]]) 337 self.assertEqual(element[:9:2], [element[0], element[2]]) 338 del element[1:2] 339 self.serialize_check(element, 340 '<tag key="value"><subtag /><subtag /></tag>') 341 342 def test_cdata(self): 343 # Test CDATA handling (etc). 344 345 self.serialize_check(ET.XML("<tag>hello</tag>"), 346 '<tag>hello</tag>') 347 self.serialize_check(ET.XML("<tag>hello</tag>"), 348 '<tag>hello</tag>') 349 self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"), 350 '<tag>hello</tag>') 351 352 def test_file_init(self): 353 stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8")) 354 tree = ET.ElementTree(file=stringfile) 355 self.assertEqual(tree.find("tag").tag, 'tag') 356 self.assertEqual(tree.find("section/tag").tag, 'tag') 357 358 tree = ET.ElementTree(file=SIMPLE_XMLFILE) 359 self.assertEqual(tree.find("element").tag, 'element') 360 self.assertEqual(tree.find("element/../empty-element").tag, 361 'empty-element') 362 363 def test_path_cache(self): 364 # Check that the path cache behaves sanely. 365 366 from xml.etree import ElementPath 367 368 elem = ET.XML(SAMPLE_XML) 369 ElementPath._cache.clear() 370 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 371 cache_len_10 = len(ElementPath._cache) 372 for i in range(10): ET.ElementTree(elem).find('./'+str(i)) 373 self.assertEqual(len(ElementPath._cache), cache_len_10) 374 for i in range(20): ET.ElementTree(elem).find('./'+str(i)) 375 self.assertGreater(len(ElementPath._cache), cache_len_10) 376 for i in range(600): ET.ElementTree(elem).find('./'+str(i)) 377 self.assertLess(len(ElementPath._cache), 500) 378 379 def test_copy(self): 380 # Test copy handling (etc). 381 382 import copy 383 e1 = ET.XML("<tag>hello<foo/></tag>") 384 e2 = copy.copy(e1) 385 e3 = copy.deepcopy(e1) 386 e1.find("foo").tag = "bar" 387 self.serialize_check(e1, '<tag>hello<bar /></tag>') 388 self.serialize_check(e2, '<tag>hello<bar /></tag>') 389 self.serialize_check(e3, '<tag>hello<foo /></tag>') 390 391 def test_attrib(self): 392 # Test attribute handling. 393 394 elem = ET.Element("tag") 395 elem.get("key") # 1.1 396 self.assertEqual(elem.get("key", "default"), 'default') # 1.2 397 398 elem.set("key", "value") 399 self.assertEqual(elem.get("key"), 'value') # 1.3 400 401 elem = ET.Element("tag", key="value") 402 self.assertEqual(elem.get("key"), 'value') # 2.1 403 self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2 404 405 attrib = {"key": "value"} 406 elem = ET.Element("tag", attrib) 407 attrib.clear() # check for aliasing issues 408 self.assertEqual(elem.get("key"), 'value') # 3.1 409 self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2 410 411 attrib = {"key": "value"} 412 elem = ET.Element("tag", **attrib) 413 attrib.clear() # check for aliasing issues 414 self.assertEqual(elem.get("key"), 'value') # 4.1 415 self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2 416 417 elem = ET.Element("tag", {"key": "other"}, key="value") 418 self.assertEqual(elem.get("key"), 'value') # 5.1 419 self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2 420 421 elem = ET.Element('test') 422 elem.text = "aa" 423 elem.set('testa', 'testval') 424 elem.set('testb', 'test2') 425 self.assertEqual(ET.tostring(elem), 426 b'<test testa="testval" testb="test2">aa</test>') 427 self.assertEqual(sorted(elem.keys()), ['testa', 'testb']) 428 self.assertEqual(sorted(elem.items()), 429 [('testa', 'testval'), ('testb', 'test2')]) 430 self.assertEqual(elem.attrib['testb'], 'test2') 431 elem.attrib['testb'] = 'test1' 432 elem.attrib['testc'] = 'test2' 433 self.assertEqual(ET.tostring(elem), 434 b'<test testa="testval" testb="test1" testc="test2">aa</test>') 435 436 # Test preserving white space chars in attributes 437 elem = ET.Element('test') 438 elem.set('a', '\r') 439 elem.set('b', '\r\n') 440 elem.set('c', '\t\n\r ') 441 elem.set('d', '\n\n\r\r\t\t ') 442 self.assertEqual(ET.tostring(elem), 443 b'<test a=" " b=" " c="	 " d=" 		 " />') 444 445 def test_makeelement(self): 446 # Test makeelement handling. 447 448 elem = ET.Element("tag") 449 attrib = {"key": "value"} 450 subelem = elem.makeelement("subtag", attrib) 451 self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing") 452 elem.append(subelem) 453 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 454 455 elem.clear() 456 self.serialize_check(elem, '<tag />') 457 elem.append(subelem) 458 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 459 elem.extend([subelem, subelem]) 460 self.serialize_check(elem, 461 '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>') 462 elem[:] = [subelem] 463 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 464 elem[:] = tuple([subelem]) 465 self.serialize_check(elem, '<tag><subtag key="value" /></tag>') 466 467 def test_parsefile(self): 468 # Test parsing from file. 469 470 tree = ET.parse(SIMPLE_XMLFILE) 471 stream = io.StringIO() 472 tree.write(stream, encoding='unicode') 473 self.assertEqual(stream.getvalue(), 474 '<root>\n' 475 ' <element key="value">text</element>\n' 476 ' <element>text</element>tail\n' 477 ' <empty-element />\n' 478 '</root>') 479 tree = ET.parse(SIMPLE_NS_XMLFILE) 480 stream = io.StringIO() 481 tree.write(stream, encoding='unicode') 482 self.assertEqual(stream.getvalue(), 483 '<ns0:root xmlns:ns0="namespace">\n' 484 ' <ns0:element key="value">text</ns0:element>\n' 485 ' <ns0:element>text</ns0:element>tail\n' 486 ' <ns0:empty-element />\n' 487 '</ns0:root>') 488 489 with open(SIMPLE_XMLFILE) as f: 490 data = f.read() 491 492 parser = ET.XMLParser() 493 self.assertRegex(parser.version, r'^Expat ') 494 parser.feed(data) 495 self.serialize_check(parser.close(), 496 '<root>\n' 497 ' <element key="value">text</element>\n' 498 ' <element>text</element>tail\n' 499 ' <empty-element />\n' 500 '</root>') 501 502 target = ET.TreeBuilder() 503 parser = ET.XMLParser(target=target) 504 parser.feed(data) 505 self.serialize_check(parser.close(), 506 '<root>\n' 507 ' <element key="value">text</element>\n' 508 ' <element>text</element>tail\n' 509 ' <empty-element />\n' 510 '</root>') 511 512 def test_parseliteral(self): 513 element = ET.XML("<html><body>text</body></html>") 514 self.assertEqual(ET.tostring(element, encoding='unicode'), 515 '<html><body>text</body></html>') 516 element = ET.fromstring("<html><body>text</body></html>") 517 self.assertEqual(ET.tostring(element, encoding='unicode'), 518 '<html><body>text</body></html>') 519 sequence = ["<html><body>", "text</bo", "dy></html>"] 520 element = ET.fromstringlist(sequence) 521 self.assertEqual(ET.tostring(element), 522 b'<html><body>text</body></html>') 523 self.assertEqual(b"".join(ET.tostringlist(element)), 524 b'<html><body>text</body></html>') 525 self.assertEqual(ET.tostring(element, "ascii"), 526 b"<?xml version='1.0' encoding='ascii'?>\n" 527 b"<html><body>text</body></html>") 528 _, ids = ET.XMLID("<html><body>text</body></html>") 529 self.assertEqual(len(ids), 0) 530 _, ids = ET.XMLID("<html><body id='body'>text</body></html>") 531 self.assertEqual(len(ids), 1) 532 self.assertEqual(ids["body"].tag, 'body') 533 534 def test_iterparse(self): 535 # Test iterparse interface. 536 537 iterparse = ET.iterparse 538 539 context = iterparse(SIMPLE_XMLFILE) 540 self.assertIsNone(context.root) 541 action, elem = next(context) 542 self.assertIsNone(context.root) 543 self.assertEqual((action, elem.tag), ('end', 'element')) 544 self.assertEqual([(action, elem.tag) for action, elem in context], [ 545 ('end', 'element'), 546 ('end', 'empty-element'), 547 ('end', 'root'), 548 ]) 549 self.assertEqual(context.root.tag, 'root') 550 551 context = iterparse(SIMPLE_NS_XMLFILE) 552 self.assertEqual([(action, elem.tag) for action, elem in context], [ 553 ('end', '{namespace}element'), 554 ('end', '{namespace}element'), 555 ('end', '{namespace}empty-element'), 556 ('end', '{namespace}root'), 557 ]) 558 559 with open(SIMPLE_XMLFILE, 'rb') as source: 560 context = iterparse(source) 561 action, elem = next(context) 562 self.assertEqual((action, elem.tag), ('end', 'element')) 563 self.assertEqual([(action, elem.tag) for action, elem in context], [ 564 ('end', 'element'), 565 ('end', 'empty-element'), 566 ('end', 'root'), 567 ]) 568 self.assertEqual(context.root.tag, 'root') 569 570 events = () 571 context = iterparse(SIMPLE_XMLFILE, events) 572 self.assertEqual([(action, elem.tag) for action, elem in context], []) 573 574 events = () 575 context = iterparse(SIMPLE_XMLFILE, events=events) 576 self.assertEqual([(action, elem.tag) for action, elem in context], []) 577 578 events = ("start", "end") 579 context = iterparse(SIMPLE_XMLFILE, events) 580 self.assertEqual([(action, elem.tag) for action, elem in context], [ 581 ('start', 'root'), 582 ('start', 'element'), 583 ('end', 'element'), 584 ('start', 'element'), 585 ('end', 'element'), 586 ('start', 'empty-element'), 587 ('end', 'empty-element'), 588 ('end', 'root'), 589 ]) 590 591 events = ("start", "end", "start-ns", "end-ns") 592 context = iterparse(SIMPLE_NS_XMLFILE, events) 593 self.assertEqual([(action, elem.tag) if action in ("start", "end") 594 else (action, elem) 595 for action, elem in context], [ 596 ('start-ns', ('', 'namespace')), 597 ('start', '{namespace}root'), 598 ('start', '{namespace}element'), 599 ('end', '{namespace}element'), 600 ('start', '{namespace}element'), 601 ('end', '{namespace}element'), 602 ('start', '{namespace}empty-element'), 603 ('end', '{namespace}empty-element'), 604 ('end', '{namespace}root'), 605 ('end-ns', None), 606 ]) 607 608 events = ('start-ns', 'end-ns') 609 context = iterparse(io.StringIO(r"<root xmlns=''/>"), events) 610 res = [action for action, elem in context] 611 self.assertEqual(res, ['start-ns', 'end-ns']) 612 613 events = ("start", "end", "bogus") 614 with open(SIMPLE_XMLFILE, "rb") as f: 615 with self.assertRaises(ValueError) as cm: 616 iterparse(f, events) 617 self.assertFalse(f.closed) 618 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 619 620 with warnings_helper.check_no_resource_warning(self): 621 with self.assertRaises(ValueError) as cm: 622 iterparse(SIMPLE_XMLFILE, events) 623 self.assertEqual(str(cm.exception), "unknown event 'bogus'") 624 del cm 625 626 source = io.BytesIO( 627 b"<?xml version='1.0' encoding='iso-8859-1'?>\n" 628 b"<body xmlns='http://éffbot.org/ns'\n" 629 b" xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n") 630 events = ("start-ns",) 631 context = iterparse(source, events) 632 self.assertEqual([(action, elem) for action, elem in context], [ 633 ('start-ns', ('', 'http://\xe9ffbot.org/ns')), 634 ('start-ns', ('cl\xe9', 'http://effbot.org/ns')), 635 ]) 636 637 source = io.StringIO("<document />junk") 638 it = iterparse(source) 639 action, elem = next(it) 640 self.assertEqual((action, elem.tag), ('end', 'document')) 641 with self.assertRaises(ET.ParseError) as cm: 642 next(it) 643 self.assertEqual(str(cm.exception), 644 'junk after document element: line 1, column 12') 645 646 self.addCleanup(os_helper.unlink, TESTFN) 647 with open(TESTFN, "wb") as f: 648 f.write(b"<document />junk") 649 it = iterparse(TESTFN) 650 action, elem = next(it) 651 self.assertEqual((action, elem.tag), ('end', 'document')) 652 with warnings_helper.check_no_resource_warning(self): 653 with self.assertRaises(ET.ParseError) as cm: 654 next(it) 655 self.assertEqual(str(cm.exception), 656 'junk after document element: line 1, column 12') 657 del cm, it 658 659 # Not exhausting the iterator still closes the resource (bpo-43292) 660 with warnings_helper.check_no_resource_warning(self): 661 it = iterparse(SIMPLE_XMLFILE) 662 del it 663 664 with warnings_helper.check_no_resource_warning(self): 665 it = iterparse(SIMPLE_XMLFILE) 666 it.close() 667 del it 668 669 with warnings_helper.check_no_resource_warning(self): 670 it = iterparse(SIMPLE_XMLFILE) 671 action, elem = next(it) 672 self.assertEqual((action, elem.tag), ('end', 'element')) 673 del it, elem 674 675 with warnings_helper.check_no_resource_warning(self): 676 it = iterparse(SIMPLE_XMLFILE) 677 action, elem = next(it) 678 it.close() 679 self.assertEqual((action, elem.tag), ('end', 'element')) 680 del it, elem 681 682 with self.assertRaises(FileNotFoundError): 683 iterparse("nonexistent") 684 685 def test_iterparse_close(self): 686 iterparse = ET.iterparse 687 688 it = iterparse(SIMPLE_XMLFILE) 689 it.close() 690 with self.assertRaises(StopIteration): 691 next(it) 692 it.close() # idempotent 693 694 with open(SIMPLE_XMLFILE, 'rb') as source: 695 it = iterparse(source) 696 it.close() 697 self.assertFalse(source.closed) 698 with self.assertRaises(StopIteration): 699 next(it) 700 it.close() # idempotent 701 702 it = iterparse(SIMPLE_XMLFILE) 703 action, elem = next(it) 704 self.assertEqual((action, elem.tag), ('end', 'element')) 705 it.close() 706 with self.assertRaises(StopIteration): 707 next(it) 708 it.close() # idempotent 709 710 with open(SIMPLE_XMLFILE, 'rb') as source: 711 it = iterparse(source) 712 action, elem = next(it) 713 self.assertEqual((action, elem.tag), ('end', 'element')) 714 it.close() 715 self.assertFalse(source.closed) 716 with self.assertRaises(StopIteration): 717 next(it) 718 it.close() # idempotent 719 720 it = iterparse(SIMPLE_XMLFILE) 721 list(it) 722 it.close() 723 with self.assertRaises(StopIteration): 724 next(it) 725 it.close() # idempotent 726 727 with open(SIMPLE_XMLFILE, 'rb') as source: 728 it = iterparse(source) 729 list(it) 730 it.close() 731 self.assertFalse(source.closed) 732 with self.assertRaises(StopIteration): 733 next(it) 734 it.close() # idempotent 735 736 def test_writefile(self): 737 elem = ET.Element("tag") 738 elem.text = "text" 739 self.serialize_check(elem, '<tag>text</tag>') 740 ET.SubElement(elem, "subtag").text = "subtext" 741 self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>') 742 743 # Test tag suppression 744 elem.tag = None 745 self.serialize_check(elem, 'text<subtag>subtext</subtag>') 746 elem.insert(0, ET.Comment("comment")) 747 self.serialize_check(elem, 748 'text<!--comment--><subtag>subtext</subtag>') # assumes 1.3 749 750 elem[0] = ET.PI("key", "value") 751 self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>') 752 753 def test_custom_builder(self): 754 # Test parser w. custom builder. 755 756 with open(SIMPLE_XMLFILE) as f: 757 data = f.read() 758 class Builder(list): 759 def start(self, tag, attrib): 760 self.append(("start", tag)) 761 def end(self, tag): 762 self.append(("end", tag)) 763 def data(self, text): 764 pass 765 builder = Builder() 766 parser = ET.XMLParser(target=builder) 767 parser.feed(data) 768 self.assertEqual(builder, [ 769 ('start', 'root'), 770 ('start', 'element'), 771 ('end', 'element'), 772 ('start', 'element'), 773 ('end', 'element'), 774 ('start', 'empty-element'), 775 ('end', 'empty-element'), 776 ('end', 'root'), 777 ]) 778 779 with open(SIMPLE_NS_XMLFILE) as f: 780 data = f.read() 781 class Builder(list): 782 def start(self, tag, attrib): 783 self.append(("start", tag)) 784 def end(self, tag): 785 self.append(("end", tag)) 786 def data(self, text): 787 pass 788 def pi(self, target, data): 789 self.append(("pi", target, data)) 790 def comment(self, data): 791 self.append(("comment", data)) 792 def start_ns(self, prefix, uri): 793 self.append(("start-ns", prefix, uri)) 794 def end_ns(self, prefix): 795 self.append(("end-ns", prefix)) 796 builder = Builder() 797 parser = ET.XMLParser(target=builder) 798 parser.feed(data) 799 self.assertEqual(builder, [ 800 ('pi', 'pi', 'data'), 801 ('comment', ' comment '), 802 ('start-ns', '', 'namespace'), 803 ('start', '{namespace}root'), 804 ('start', '{namespace}element'), 805 ('end', '{namespace}element'), 806 ('start', '{namespace}element'), 807 ('end', '{namespace}element'), 808 ('start', '{namespace}empty-element'), 809 ('end', '{namespace}empty-element'), 810 ('end', '{namespace}root'), 811 ('end-ns', ''), 812 ]) 813 814 def test_custom_builder_only_end_ns(self): 815 class Builder(list): 816 def end_ns(self, prefix): 817 self.append(("end-ns", prefix)) 818 819 builder = Builder() 820 parser = ET.XMLParser(target=builder) 821 parser.feed(textwrap.dedent("""\ 822 <?pi data?> 823 <!-- comment --> 824 <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'> 825 <a:element key='value'>text</a:element> 826 <p:element>text</p:element>tail 827 <empty-element/> 828 </root> 829 """)) 830 self.assertEqual(builder, [ 831 ('end-ns', 'a'), 832 ('end-ns', 'p'), 833 ('end-ns', ''), 834 ]) 835 836 def test_initialize_parser_without_target(self): 837 # Explicit None 838 parser = ET.XMLParser(target=None) 839 self.assertIsInstance(parser.target, ET.TreeBuilder) 840 841 # Implicit None 842 parser2 = ET.XMLParser() 843 self.assertIsInstance(parser2.target, ET.TreeBuilder) 844 845 def test_children(self): 846 # Test Element children iteration 847 848 with open(SIMPLE_XMLFILE, "rb") as f: 849 tree = ET.parse(f) 850 self.assertEqual([summarize_list(elem) 851 for elem in tree.getroot().iter()], [ 852 ['element', 'element', 'empty-element'], 853 [], 854 [], 855 [], 856 ]) 857 self.assertEqual([summarize_list(elem) 858 for elem in tree.iter()], [ 859 ['element', 'element', 'empty-element'], 860 [], 861 [], 862 [], 863 ]) 864 865 elem = ET.XML(SAMPLE_XML) 866 self.assertEqual(len(list(elem)), 3) 867 self.assertEqual(len(list(elem[2])), 1) 868 self.assertEqual(elem[:], list(elem)) 869 child1 = elem[0] 870 child2 = elem[2] 871 del elem[1:2] 872 self.assertEqual(len(list(elem)), 2) 873 self.assertEqual(child1, elem[0]) 874 self.assertEqual(child2, elem[1]) 875 elem[0:2] = [child2, child1] 876 self.assertEqual(child2, elem[0]) 877 self.assertEqual(child1, elem[1]) 878 self.assertNotEqual(child1, elem[0]) 879 elem.clear() 880 self.assertEqual(list(elem), []) 881 882 def test_writestring(self): 883 elem = ET.XML("<html><body>text</body></html>") 884 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 885 elem = ET.fromstring("<html><body>text</body></html>") 886 self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>') 887 888 def test_indent(self): 889 elem = ET.XML("<root></root>") 890 ET.indent(elem) 891 self.assertEqual(ET.tostring(elem), b'<root />') 892 893 elem = ET.XML("<html><body>text</body></html>") 894 ET.indent(elem) 895 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') 896 897 elem = ET.XML("<html> <body>text</body> </html>") 898 ET.indent(elem) 899 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>') 900 901 elem = ET.XML("<html><body>text</body>tail</html>") 902 ET.indent(elem) 903 self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>') 904 905 elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>") 906 ET.indent(elem) 907 self.assertEqual( 908 ET.tostring(elem), 909 b'<html>\n' 910 b' <body>\n' 911 b' <p>par</p>\n' 912 b' <p>text</p>\n' 913 b' <p>\n' 914 b' <br />\n' 915 b' </p>\n' 916 b' </body>\n' 917 b'</html>' 918 ) 919 920 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 921 ET.indent(elem) 922 self.assertEqual( 923 ET.tostring(elem), 924 b'<html>\n' 925 b' <body>\n' 926 b' <p>pre<br />post</p>\n' 927 b' <p>text</p>\n' 928 b' </body>\n' 929 b'</html>' 930 ) 931 932 def test_indent_space(self): 933 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 934 ET.indent(elem, space='\t') 935 self.assertEqual( 936 ET.tostring(elem), 937 b'<html>\n' 938 b'\t<body>\n' 939 b'\t\t<p>pre<br />post</p>\n' 940 b'\t\t<p>text</p>\n' 941 b'\t</body>\n' 942 b'</html>' 943 ) 944 945 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 946 ET.indent(elem, space='') 947 self.assertEqual( 948 ET.tostring(elem), 949 b'<html>\n' 950 b'<body>\n' 951 b'<p>pre<br />post</p>\n' 952 b'<p>text</p>\n' 953 b'</body>\n' 954 b'</html>' 955 ) 956 957 def test_indent_space_caching(self): 958 elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>") 959 ET.indent(elem) 960 self.assertEqual( 961 {el.tail for el in elem.iter()}, 962 {None, "\n", "\n ", "\n "} 963 ) 964 self.assertEqual( 965 {el.text for el in elem.iter()}, 966 {None, "\n ", "\n ", "\n ", "par", "text"} 967 ) 968 self.assertEqual( 969 len({el.tail for el in elem.iter()}), 970 len({id(el.tail) for el in elem.iter()}), 971 ) 972 973 def test_indent_level(self): 974 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 975 with self.assertRaises(ValueError): 976 ET.indent(elem, level=-1) 977 self.assertEqual( 978 ET.tostring(elem), 979 b"<html><body><p>pre<br />post</p><p>text</p></body></html>" 980 ) 981 982 ET.indent(elem, level=2) 983 self.assertEqual( 984 ET.tostring(elem), 985 b'<html>\n' 986 b' <body>\n' 987 b' <p>pre<br />post</p>\n' 988 b' <p>text</p>\n' 989 b' </body>\n' 990 b' </html>' 991 ) 992 993 elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>") 994 ET.indent(elem, level=1, space=' ') 995 self.assertEqual( 996 ET.tostring(elem), 997 b'<html>\n' 998 b' <body>\n' 999 b' <p>pre<br />post</p>\n' 1000 b' <p>text</p>\n' 1001 b' </body>\n' 1002 b' </html>' 1003 ) 1004 1005 def test_tostring_default_namespace(self): 1006 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 1007 self.assertEqual( 1008 ET.tostring(elem, encoding='unicode'), 1009 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 1010 ) 1011 self.assertEqual( 1012 ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'), 1013 '<body xmlns="http://effbot.org/ns"><tag /></body>' 1014 ) 1015 1016 def test_tostring_default_namespace_different_namespace(self): 1017 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 1018 self.assertEqual( 1019 ET.tostring(elem, encoding='unicode', default_namespace='foobar'), 1020 '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>' 1021 ) 1022 1023 def test_tostring_default_namespace_original_no_namespace(self): 1024 elem = ET.XML('<body><tag/></body>') 1025 EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$' 1026 with self.assertRaisesRegex(ValueError, EXPECTED_MSG): 1027 ET.tostring(elem, encoding='unicode', default_namespace='foobar') 1028 1029 def test_tostring_no_xml_declaration(self): 1030 elem = ET.XML('<body><tag/></body>') 1031 self.assertEqual( 1032 ET.tostring(elem, encoding='unicode'), 1033 '<body><tag /></body>' 1034 ) 1035 1036 def test_tostring_xml_declaration(self): 1037 elem = ET.XML('<body><tag/></body>') 1038 self.assertEqual( 1039 ET.tostring(elem, encoding='utf8', xml_declaration=True), 1040 b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>" 1041 ) 1042 1043 def test_tostring_xml_declaration_unicode_encoding(self): 1044 elem = ET.XML('<body><tag/></body>') 1045 self.assertEqual( 1046 ET.tostring(elem, encoding='unicode', xml_declaration=True), 1047 "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>" 1048 ) 1049 1050 def test_tostring_xml_declaration_cases(self): 1051 elem = ET.XML('<body><tag>ø</tag></body>') 1052 TESTCASES = [ 1053 # (expected_retval, encoding, xml_declaration) 1054 # ... xml_declaration = None 1055 (b'<body><tag>ø</tag></body>', None, None), 1056 (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None), 1057 (b'<body><tag>ø</tag></body>', 'US-ASCII', None), 1058 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 1059 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None), 1060 ('<body><tag>ø</tag></body>', 'unicode', None), 1061 1062 # ... xml_declaration = False 1063 (b"<body><tag>ø</tag></body>", None, False), 1064 (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False), 1065 (b"<body><tag>ø</tag></body>", 'US-ASCII', False), 1066 (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False), 1067 ("<body><tag>ø</tag></body>", 'unicode', False), 1068 1069 # ... xml_declaration = True 1070 (b"<?xml version='1.0' encoding='us-ascii'?>\n" 1071 b"<body><tag>ø</tag></body>", None, True), 1072 (b"<?xml version='1.0' encoding='UTF-8'?>\n" 1073 b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True), 1074 (b"<?xml version='1.0' encoding='US-ASCII'?>\n" 1075 b"<body><tag>ø</tag></body>", 'US-ASCII', True), 1076 (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n" 1077 b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True), 1078 ("<?xml version='1.0' encoding='utf-8'?>\n" 1079 "<body><tag>ø</tag></body>", 'unicode', True), 1080 1081 ] 1082 for expected_retval, encoding, xml_declaration in TESTCASES: 1083 with self.subTest(f'encoding={encoding} ' 1084 f'xml_declaration={xml_declaration}'): 1085 self.assertEqual( 1086 ET.tostring( 1087 elem, 1088 encoding=encoding, 1089 xml_declaration=xml_declaration 1090 ), 1091 expected_retval 1092 ) 1093 1094 def test_tostringlist_default_namespace(self): 1095 elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>') 1096 self.assertEqual( 1097 ''.join(ET.tostringlist(elem, encoding='unicode')), 1098 '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>' 1099 ) 1100 self.assertEqual( 1101 ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')), 1102 '<body xmlns="http://effbot.org/ns"><tag /></body>' 1103 ) 1104 1105 def test_tostringlist_xml_declaration(self): 1106 elem = ET.XML('<body><tag/></body>') 1107 self.assertEqual( 1108 ''.join(ET.tostringlist(elem, encoding='unicode')), 1109 '<body><tag /></body>' 1110 ) 1111 self.assertEqual( 1112 b''.join(ET.tostringlist(elem, xml_declaration=True)), 1113 b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>" 1114 ) 1115 1116 stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) 1117 self.assertEqual( 1118 ''.join(stringlist), 1119 "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>" 1120 ) 1121 self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>") 1122 self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:]) 1123 1124 def test_encoding(self): 1125 def check(encoding, body=''): 1126 xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" % 1127 (encoding, body)) 1128 self.assertEqual(ET.XML(xml.encode(encoding)).text, body) 1129 self.assertEqual(ET.XML(xml).text, body) 1130 check("ascii", 'a') 1131 check("us-ascii", 'a') 1132 check("iso-8859-1", '\xbd') 1133 check("iso-8859-15", '\u20ac') 1134 check("cp437", '\u221a') 1135 check("mac-roman", '\u02da') 1136 1137 def xml(encoding): 1138 return "<?xml version='1.0' encoding='%s'?><xml />" % encoding 1139 def bxml(encoding): 1140 return xml(encoding).encode(encoding) 1141 supported_encodings = [ 1142 'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 1143 'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5', 1144 'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10', 1145 'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16', 1146 'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 1147 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 1148 'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125', 1149 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 1150 'cp1256', 'cp1257', 'cp1258', 1151 'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2', 1152 'mac-roman', 'mac-turkish', 1153 'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004', 1154 'iso2022-jp-3', 'iso2022-jp-ext', 1155 'koi8-r', 'koi8-t', 'koi8-u', 'kz1048', 1156 'hz', 'ptcp154', 1157 ] 1158 for encoding in supported_encodings: 1159 self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />') 1160 1161 unsupported_ascii_compatible_encodings = [ 1162 'big5', 'big5hkscs', 1163 'cp932', 'cp949', 'cp950', 1164 'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr', 1165 'gb2312', 'gbk', 'gb18030', 1166 'iso2022-kr', 'johab', 1167 'shift-jis', 'shift-jis-2004', 'shift-jisx0213', 1168 'utf-7', 1169 ] 1170 for encoding in unsupported_ascii_compatible_encodings: 1171 self.assertRaises(ValueError, ET.XML, bxml(encoding)) 1172 1173 unsupported_ascii_incompatible_encodings = [ 1174 'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140', 1175 'utf_32', 'utf_32_be', 'utf_32_le', 1176 ] 1177 for encoding in unsupported_ascii_incompatible_encodings: 1178 self.assertRaises(ET.ParseError, ET.XML, bxml(encoding)) 1179 1180 self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii')) 1181 self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii')) 1182 1183 def test_methods(self): 1184 # Test serialization methods. 1185 1186 e = ET.XML("<html><link/><script>1 < 2</script></html>") 1187 e.tail = "\n" 1188 self.assertEqual(serialize(e), 1189 '<html><link /><script>1 < 2</script></html>\n') 1190 self.assertEqual(serialize(e, method=None), 1191 '<html><link /><script>1 < 2</script></html>\n') 1192 self.assertEqual(serialize(e, method="xml"), 1193 '<html><link /><script>1 < 2</script></html>\n') 1194 self.assertEqual(serialize(e, method="html"), 1195 '<html><link><script>1 < 2</script></html>\n') 1196 self.assertEqual(serialize(e, method="text"), '1 < 2\n') 1197 1198 def test_issue18347(self): 1199 e = ET.XML('<html><CamelCase>text</CamelCase></html>') 1200 self.assertEqual(serialize(e), 1201 '<html><CamelCase>text</CamelCase></html>') 1202 self.assertEqual(serialize(e, method="html"), 1203 '<html><CamelCase>text</CamelCase></html>') 1204 1205 def test_entity(self): 1206 # Test entity handling. 1207 1208 # 1) good entities 1209 1210 e = ET.XML("<document title='舰'>test</document>") 1211 self.assertEqual(serialize(e, encoding="us-ascii"), 1212 b'<document title="舰">test</document>') 1213 self.serialize_check(e, '<document title="\u8230">test</document>') 1214 1215 # 2) bad entities 1216 1217 with self.assertRaises(ET.ParseError) as cm: 1218 ET.XML("<document>&entity;</document>") 1219 self.assertEqual(str(cm.exception), 1220 'undefined entity: line 1, column 10') 1221 1222 with self.assertRaises(ET.ParseError) as cm: 1223 ET.XML(ENTITY_XML) 1224 self.assertEqual(str(cm.exception), 1225 'undefined entity &entity;: line 5, column 10') 1226 1227 # 3) custom entity 1228 1229 parser = ET.XMLParser() 1230 parser.entity["entity"] = "text" 1231 parser.feed(ENTITY_XML) 1232 root = parser.close() 1233 self.serialize_check(root, '<document>text</document>') 1234 1235 # 4) external (SYSTEM) entity 1236 1237 with self.assertRaises(ET.ParseError) as cm: 1238 ET.XML(EXTERNAL_ENTITY_XML) 1239 self.assertEqual(str(cm.exception), 1240 'undefined entity &entity;: line 4, column 10') 1241 1242 def test_namespace(self): 1243 # Test namespace issues. 1244 1245 # 1) xml namespace 1246 1247 elem = ET.XML("<tag xml:lang='en' />") 1248 self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1 1249 1250 # 2) other "well-known" namespaces 1251 1252 elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />") 1253 self.serialize_check(elem, 1254 '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1 1255 1256 elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />") 1257 self.serialize_check(elem, 1258 '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2 1259 1260 elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />") 1261 self.serialize_check(elem, 1262 '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3 1263 1264 # 3) unknown namespaces 1265 elem = ET.XML(SAMPLE_XML_NS) 1266 self.serialize_check(elem, 1267 '<ns0:body xmlns:ns0="http://effbot.org/ns">\n' 1268 ' <ns0:tag>text</ns0:tag>\n' 1269 ' <ns0:tag />\n' 1270 ' <ns0:section>\n' 1271 ' <ns0:tag>subtext</ns0:tag>\n' 1272 ' </ns0:section>\n' 1273 '</ns0:body>') 1274 1275 def test_qname(self): 1276 # Test QName handling. 1277 1278 # 1) decorated tags 1279 1280 elem = ET.Element("{uri}tag") 1281 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1 1282 elem = ET.Element(ET.QName("{uri}tag")) 1283 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2 1284 elem = ET.Element(ET.QName("uri", "tag")) 1285 self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3 1286 elem = ET.Element(ET.QName("uri", "tag")) 1287 subelem = ET.SubElement(elem, ET.QName("uri", "tag1")) 1288 subelem = ET.SubElement(elem, ET.QName("uri", "tag2")) 1289 self.serialize_check(elem, 1290 '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4 1291 1292 # 2) decorated attributes 1293 1294 elem.clear() 1295 elem.attrib["{uri}key"] = "value" 1296 self.serialize_check(elem, 1297 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1 1298 1299 elem.clear() 1300 elem.attrib[ET.QName("{uri}key")] = "value" 1301 self.serialize_check(elem, 1302 '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2 1303 1304 # 3) decorated values are not converted by default, but the 1305 # QName wrapper can be used for values 1306 1307 elem.clear() 1308 elem.attrib["{uri}key"] = "{uri}value" 1309 self.serialize_check(elem, 1310 '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1 1311 1312 elem.clear() 1313 elem.attrib["{uri}key"] = ET.QName("{uri}value") 1314 self.serialize_check(elem, 1315 '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2 1316 1317 elem.clear() 1318 subelem = ET.Element("tag") 1319 subelem.attrib["{uri1}key"] = ET.QName("{uri2}value") 1320 elem.append(subelem) 1321 elem.append(subelem) 1322 self.serialize_check(elem, 1323 '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">' 1324 '<tag ns1:key="ns2:value" />' 1325 '<tag ns1:key="ns2:value" />' 1326 '</ns0:tag>') # 3.3 1327 1328 # 4) Direct QName tests 1329 1330 self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag') 1331 self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag') 1332 q1 = ET.QName('ns', 'tag') 1333 q2 = ET.QName('ns', 'tag') 1334 self.assertEqual(q1, q2) 1335 q2 = ET.QName('ns', 'other-tag') 1336 self.assertNotEqual(q1, q2) 1337 self.assertNotEqual(q1, 'ns:tag') 1338 self.assertEqual(q1, '{ns}tag') 1339 1340 def test_doctype_public(self): 1341 # Test PUBLIC doctype. 1342 1343 elem = ET.XML('<!DOCTYPE html PUBLIC' 1344 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 1345 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 1346 '<html>text</html>') 1347 1348 def test_xpath_tokenizer(self): 1349 # Test the XPath tokenizer. 1350 from xml.etree import ElementPath 1351 def check(p, expected, namespaces=None): 1352 self.assertEqual([op or tag 1353 for op, tag in ElementPath.xpath_tokenizer(p, namespaces)], 1354 expected) 1355 1356 # tests from the xml specification 1357 check("*", ['*']) 1358 check("text()", ['text', '()']) 1359 check("@name", ['@', 'name']) 1360 check("@*", ['@', '*']) 1361 check("para[1]", ['para', '[', '1', ']']) 1362 check("para[last()]", ['para', '[', 'last', '()', ']']) 1363 check("*/para", ['*', '/', 'para']) 1364 check("/doc/chapter[5]/section[2]", 1365 ['/', 'doc', '/', 'chapter', '[', '5', ']', 1366 '/', 'section', '[', '2', ']']) 1367 check("chapter//para", ['chapter', '//', 'para']) 1368 check("//para", ['//', 'para']) 1369 check("//olist/item", ['//', 'olist', '/', 'item']) 1370 check(".", ['.']) 1371 check(".//para", ['.', '//', 'para']) 1372 check("..", ['..']) 1373 check("../@lang", ['..', '/', '@', 'lang']) 1374 check("chapter[title]", ['chapter', '[', 'title', ']']) 1375 check("employee[@secretary and @assistant]", ['employee', 1376 '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']) 1377 1378 # additional tests 1379 check("@{ns}attr", ['@', '{ns}attr']) 1380 check("{http://spam}egg", ['{http://spam}egg']) 1381 check("./spam.egg", ['.', '/', 'spam.egg']) 1382 check(".//{http://spam}egg", ['.', '//', '{http://spam}egg']) 1383 1384 # wildcard tags 1385 check("{ns}*", ['{ns}*']) 1386 check("{}*", ['{}*']) 1387 check("{*}tag", ['{*}tag']) 1388 check("{*}*", ['{*}*']) 1389 check(".//{*}tag", ['.', '//', '{*}tag']) 1390 1391 # namespace prefix resolution 1392 check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'], 1393 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1394 check("type", ['{http://www.w3.org/2001/XMLSchema}type'], 1395 {'': 'http://www.w3.org/2001/XMLSchema'}) 1396 check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'], 1397 {'xsd': 'http://www.w3.org/2001/XMLSchema'}) 1398 check("@type", ['@', 'type'], 1399 {'': 'http://www.w3.org/2001/XMLSchema'}) 1400 check("@{*}type", ['@', '{*}type'], 1401 {'': 'http://www.w3.org/2001/XMLSchema'}) 1402 check("@{ns}attr", ['@', '{ns}attr'], 1403 {'': 'http://www.w3.org/2001/XMLSchema', 1404 'ns': 'http://www.w3.org/2001/XMLSchema'}) 1405 1406 def test_processinginstruction(self): 1407 # Test ProcessingInstruction directly 1408 1409 self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')), 1410 b'<?test instruction?>') 1411 self.assertEqual(ET.tostring(ET.PI('test', 'instruction')), 1412 b'<?test instruction?>') 1413 1414 # Issue #2746 1415 1416 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')), 1417 b'<?test <testing&>?>') 1418 self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'), 1419 b"<?xml version='1.0' encoding='latin-1'?>\n" 1420 b"<?test <testing&>\xe3?>") 1421 1422 def test_html_empty_elems_serialization(self): 1423 # issue 15970 1424 # from http://www.w3.org/TR/html401/index/elements.html 1425 for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME', 1426 'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM', 1427 'SOURCE', 'TRACK', 'WBR']: 1428 for elem in [element, element.lower()]: 1429 expected = '<%s>' % elem 1430 serialized = serialize(ET.XML('<%s />' % elem), method='html') 1431 self.assertEqual(serialized, expected) 1432 serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)), 1433 method='html') 1434 self.assertEqual(serialized, expected) 1435 1436 def test_dump_attribute_order(self): 1437 # See BPO 34160 1438 e = ET.Element('cirriculum', status='public', company='example') 1439 with support.captured_stdout() as stdout: 1440 ET.dump(e) 1441 self.assertEqual(stdout.getvalue(), 1442 '<cirriculum status="public" company="example" />\n') 1443 1444 def test_tree_write_attribute_order(self): 1445 # See BPO 34160 1446 root = ET.Element('cirriculum', status='public', company='example') 1447 self.assertEqual(serialize(root), 1448 '<cirriculum status="public" company="example" />') 1449 self.assertEqual(serialize(root, method='html'), 1450 '<cirriculum status="public" company="example"></cirriculum>') 1451 1452 def test_attlist_default(self): 1453 # Test default attribute values; See BPO 42151. 1454 root = ET.fromstring(ATTLIST_XML) 1455 self.assertEqual(root[0].attrib, 1456 {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'}) 1457 1458 1459class XMLPullParserTest(unittest.TestCase): 1460 1461 def _feed(self, parser, data, chunk_size=None, flush=False): 1462 if chunk_size is None: 1463 parser.feed(data) 1464 else: 1465 for i in range(0, len(data), chunk_size): 1466 parser.feed(data[i:i+chunk_size]) 1467 if flush: 1468 parser.flush() 1469 1470 def assert_events(self, parser, expected, max_events=None): 1471 self.assertEqual( 1472 [(event, (elem.tag, elem.text)) 1473 for event, elem in islice(parser.read_events(), max_events)], 1474 expected) 1475 1476 def assert_event_tuples(self, parser, expected, max_events=None): 1477 self.assertEqual( 1478 list(islice(parser.read_events(), max_events)), 1479 expected) 1480 1481 def assert_event_tags(self, parser, expected, max_events=None): 1482 events = islice(parser.read_events(), max_events) 1483 self.assertEqual([(action, elem.tag) for action, elem in events], 1484 expected) 1485 1486 def test_simple_xml(self, chunk_size=None, flush=False): 1487 parser = ET.XMLPullParser() 1488 self.assert_event_tags(parser, []) 1489 self._feed(parser, "<!-- comment -->\n", chunk_size, flush) 1490 self.assert_event_tags(parser, []) 1491 self._feed(parser, 1492 "<root>\n <element key='value'>text</element", 1493 chunk_size, flush) 1494 self.assert_event_tags(parser, []) 1495 self._feed(parser, ">\n", chunk_size, flush) 1496 self.assert_event_tags(parser, [('end', 'element')]) 1497 self._feed(parser, "<element>text</element>tail\n", chunk_size, flush) 1498 self._feed(parser, "<empty-element/>\n", chunk_size, flush) 1499 self.assert_event_tags(parser, [ 1500 ('end', 'element'), 1501 ('end', 'empty-element'), 1502 ]) 1503 self._feed(parser, "</root>\n", chunk_size, flush) 1504 self.assert_event_tags(parser, [('end', 'root')]) 1505 self.assertIsNone(parser.close()) 1506 1507 def test_simple_xml_chunk_1(self): 1508 self.test_simple_xml(chunk_size=1, flush=True) 1509 1510 def test_simple_xml_chunk_5(self): 1511 self.test_simple_xml(chunk_size=5, flush=True) 1512 1513 def test_simple_xml_chunk_22(self): 1514 self.test_simple_xml(chunk_size=22) 1515 1516 def test_feed_while_iterating(self): 1517 parser = ET.XMLPullParser() 1518 it = parser.read_events() 1519 self._feed(parser, "<root>\n <element key='value'>text</element>\n") 1520 action, elem = next(it) 1521 self.assertEqual((action, elem.tag), ('end', 'element')) 1522 self._feed(parser, "</root>\n") 1523 action, elem = next(it) 1524 self.assertEqual((action, elem.tag), ('end', 'root')) 1525 with self.assertRaises(StopIteration): 1526 next(it) 1527 1528 def test_simple_xml_with_ns(self): 1529 parser = ET.XMLPullParser() 1530 self.assert_event_tags(parser, []) 1531 self._feed(parser, "<!-- comment -->\n") 1532 self.assert_event_tags(parser, []) 1533 self._feed(parser, "<root xmlns='namespace'>\n") 1534 self.assert_event_tags(parser, []) 1535 self._feed(parser, "<element key='value'>text</element") 1536 self.assert_event_tags(parser, []) 1537 self._feed(parser, ">\n") 1538 self.assert_event_tags(parser, [('end', '{namespace}element')]) 1539 self._feed(parser, "<element>text</element>tail\n") 1540 self._feed(parser, "<empty-element/>\n") 1541 self.assert_event_tags(parser, [ 1542 ('end', '{namespace}element'), 1543 ('end', '{namespace}empty-element'), 1544 ]) 1545 self._feed(parser, "</root>\n") 1546 self.assert_event_tags(parser, [('end', '{namespace}root')]) 1547 self.assertIsNone(parser.close()) 1548 1549 def test_ns_events(self): 1550 parser = ET.XMLPullParser(events=('start-ns', 'end-ns')) 1551 self._feed(parser, "<!-- comment -->\n") 1552 self._feed(parser, "<root xmlns='namespace'>\n") 1553 self.assertEqual( 1554 list(parser.read_events()), 1555 [('start-ns', ('', 'namespace'))]) 1556 self._feed(parser, "<element key='value'>text</element") 1557 self._feed(parser, ">\n") 1558 self._feed(parser, "<element>text</element>tail\n") 1559 self._feed(parser, "<empty-element/>\n") 1560 self._feed(parser, "</root>\n") 1561 self.assertEqual(list(parser.read_events()), [('end-ns', None)]) 1562 self.assertIsNone(parser.close()) 1563 1564 def test_ns_events_start(self): 1565 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end')) 1566 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1567 self.assert_event_tuples(parser, [ 1568 ('start-ns', ('', 'abc')), 1569 ('start-ns', ('p', 'xyz')), 1570 ], max_events=2) 1571 self.assert_event_tags(parser, [ 1572 ('start', '{abc}tag'), 1573 ], max_events=1) 1574 1575 self._feed(parser, "<child />\n") 1576 self.assert_event_tags(parser, [ 1577 ('start', '{abc}child'), 1578 ('end', '{abc}child'), 1579 ]) 1580 1581 self._feed(parser, "</tag>\n") 1582 parser.close() 1583 self.assert_event_tags(parser, [ 1584 ('end', '{abc}tag'), 1585 ]) 1586 1587 def test_ns_events_start_end(self): 1588 parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns')) 1589 self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n") 1590 self.assert_event_tuples(parser, [ 1591 ('start-ns', ('', 'abc')), 1592 ('start-ns', ('p', 'xyz')), 1593 ], max_events=2) 1594 self.assert_event_tags(parser, [ 1595 ('start', '{abc}tag'), 1596 ], max_events=1) 1597 1598 self._feed(parser, "<child />\n") 1599 self.assert_event_tags(parser, [ 1600 ('start', '{abc}child'), 1601 ('end', '{abc}child'), 1602 ]) 1603 1604 self._feed(parser, "</tag>\n") 1605 parser.close() 1606 self.assert_event_tags(parser, [ 1607 ('end', '{abc}tag'), 1608 ], max_events=1) 1609 self.assert_event_tuples(parser, [ 1610 ('end-ns', None), 1611 ('end-ns', None), 1612 ]) 1613 1614 def test_events(self): 1615 parser = ET.XMLPullParser(events=()) 1616 self._feed(parser, "<root/>\n") 1617 self.assert_event_tags(parser, []) 1618 1619 parser = ET.XMLPullParser(events=('start', 'end')) 1620 self._feed(parser, "<!-- text here -->\n") 1621 self.assert_events(parser, []) 1622 1623 parser = ET.XMLPullParser(events=('start', 'end')) 1624 self._feed(parser, "<root>\n") 1625 self.assert_event_tags(parser, [('start', 'root')]) 1626 self._feed(parser, "<element key='value'>text</element") 1627 self.assert_event_tags(parser, [('start', 'element')]) 1628 self._feed(parser, ">\n") 1629 self.assert_event_tags(parser, [('end', 'element')]) 1630 self._feed(parser, 1631 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1632 self.assert_event_tags(parser, [ 1633 ('start', '{foo}element'), 1634 ('start', '{foo}empty-element'), 1635 ('end', '{foo}empty-element'), 1636 ('end', '{foo}element'), 1637 ]) 1638 self._feed(parser, "</root>") 1639 self.assertIsNone(parser.close()) 1640 self.assert_event_tags(parser, [('end', 'root')]) 1641 1642 parser = ET.XMLPullParser(events=('start',)) 1643 self._feed(parser, "<!-- comment -->\n") 1644 self.assert_event_tags(parser, []) 1645 self._feed(parser, "<root>\n") 1646 self.assert_event_tags(parser, [('start', 'root')]) 1647 self._feed(parser, "<element key='value'>text</element") 1648 self.assert_event_tags(parser, [('start', 'element')]) 1649 self._feed(parser, ">\n") 1650 self.assert_event_tags(parser, []) 1651 self._feed(parser, 1652 "<element xmlns='foo'>text<empty-element/></element>tail\n") 1653 self.assert_event_tags(parser, [ 1654 ('start', '{foo}element'), 1655 ('start', '{foo}empty-element'), 1656 ]) 1657 self._feed(parser, "</root>") 1658 self.assertIsNone(parser.close()) 1659 1660 def test_events_comment(self): 1661 parser = ET.XMLPullParser(events=('start', 'comment', 'end')) 1662 self._feed(parser, "<!-- text here -->\n") 1663 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1664 self._feed(parser, "<!-- more text here -->\n") 1665 self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))]) 1666 self._feed(parser, "<root-tag>text") 1667 self.assert_event_tags(parser, [('start', 'root-tag')]) 1668 self._feed(parser, "<!-- inner comment-->\n") 1669 self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))]) 1670 self._feed(parser, "</root-tag>\n") 1671 self.assert_event_tags(parser, [('end', 'root-tag')]) 1672 self._feed(parser, "<!-- outer comment -->\n") 1673 self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))]) 1674 1675 parser = ET.XMLPullParser(events=('comment',)) 1676 self._feed(parser, "<!-- text here -->\n") 1677 self.assert_events(parser, [('comment', (ET.Comment, ' text here '))]) 1678 1679 def test_events_pi(self): 1680 parser = ET.XMLPullParser(events=('start', 'pi', 'end')) 1681 self._feed(parser, "<?pitarget?>\n") 1682 self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))]) 1683 parser = ET.XMLPullParser(events=('pi',)) 1684 self._feed(parser, "<?pitarget some text ?>\n") 1685 self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))]) 1686 1687 def test_events_sequence(self): 1688 # Test that events can be some sequence that's not just a tuple or list 1689 eventset = {'end', 'start'} 1690 parser = ET.XMLPullParser(events=eventset) 1691 self._feed(parser, "<foo>bar</foo>") 1692 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1693 1694 class DummyIter: 1695 def __init__(self): 1696 self.events = iter(['start', 'end', 'start-ns']) 1697 def __iter__(self): 1698 return self 1699 def __next__(self): 1700 return next(self.events) 1701 1702 parser = ET.XMLPullParser(events=DummyIter()) 1703 self._feed(parser, "<foo>bar</foo>") 1704 self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')]) 1705 1706 def test_unknown_event(self): 1707 with self.assertRaises(ValueError): 1708 ET.XMLPullParser(events=('start', 'end', 'bogus')) 1709 1710 @unittest.skipIf(pyexpat.version_info < (2, 6, 0), 1711 f'Expat {pyexpat.version_info} does not ' 1712 'support reparse deferral') 1713 def test_flush_reparse_deferral_enabled(self): 1714 parser = ET.XMLPullParser(events=('start', 'end')) 1715 1716 for chunk in ("<doc", ">"): 1717 parser.feed(chunk) 1718 1719 self.assert_event_tags(parser, []) # i.e. no elements started 1720 if ET is pyET: 1721 self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) 1722 1723 parser.flush() 1724 1725 self.assert_event_tags(parser, [('start', 'doc')]) 1726 if ET is pyET: 1727 self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled()) 1728 1729 parser.feed("</doc>") 1730 parser.close() 1731 1732 self.assert_event_tags(parser, [('end', 'doc')]) 1733 1734 def test_flush_reparse_deferral_disabled(self): 1735 parser = ET.XMLPullParser(events=('start', 'end')) 1736 1737 for chunk in ("<doc", ">"): 1738 parser.feed(chunk) 1739 1740 if pyexpat.version_info >= (2, 6, 0): 1741 if not ET is pyET: 1742 self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled ' 1743 'methods not available in C') 1744 parser._parser._parser.SetReparseDeferralEnabled(False) 1745 self.assert_event_tags(parser, []) # i.e. no elements started 1746 1747 if ET is pyET: 1748 self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) 1749 1750 parser.flush() 1751 1752 self.assert_event_tags(parser, [('start', 'doc')]) 1753 if ET is pyET: 1754 self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled()) 1755 1756 parser.feed("</doc>") 1757 parser.close() 1758 1759 self.assert_event_tags(parser, [('end', 'doc')]) 1760 1761# 1762# xinclude tests (samples from appendix C of the xinclude specification) 1763 1764XINCLUDE = {} 1765 1766XINCLUDE["C1.xml"] = """\ 1767<?xml version='1.0'?> 1768<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1769 <p>120 Mz is adequate for an average home user.</p> 1770 <xi:include href="disclaimer.xml"/> 1771</document> 1772""" 1773 1774XINCLUDE["disclaimer.xml"] = """\ 1775<?xml version='1.0'?> 1776<disclaimer> 1777 <p>The opinions represented herein represent those of the individual 1778 and should not be interpreted as official policy endorsed by this 1779 organization.</p> 1780</disclaimer> 1781""" 1782 1783XINCLUDE["C2.xml"] = """\ 1784<?xml version='1.0'?> 1785<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1786 <p>This document has been accessed 1787 <xi:include href="count.txt" parse="text"/> times.</p> 1788</document> 1789""" 1790 1791XINCLUDE["count.txt"] = "324387" 1792 1793XINCLUDE["C2b.xml"] = """\ 1794<?xml version='1.0'?> 1795<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1796 <p>This document has been <em>accessed</em> 1797 <xi:include href="count.txt" parse="text"/> times.</p> 1798</document> 1799""" 1800 1801XINCLUDE["C3.xml"] = """\ 1802<?xml version='1.0'?> 1803<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1804 <p>The following is the source of the "data.xml" resource:</p> 1805 <example><xi:include href="data.xml" parse="text"/></example> 1806</document> 1807""" 1808 1809XINCLUDE["data.xml"] = """\ 1810<?xml version='1.0'?> 1811<data> 1812 <item><![CDATA[Brooks & Shields]]></item> 1813</data> 1814""" 1815 1816XINCLUDE["C5.xml"] = """\ 1817<?xml version='1.0'?> 1818<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1819 <xi:include href="example.txt" parse="text"> 1820 <xi:fallback> 1821 <xi:include href="fallback-example.txt" parse="text"> 1822 <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback> 1823 </xi:include> 1824 </xi:fallback> 1825 </xi:include> 1826</div> 1827""" 1828 1829XINCLUDE["default.xml"] = """\ 1830<?xml version='1.0'?> 1831<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1832 <p>Example.</p> 1833 <xi:include href="{}"/> 1834</document> 1835""".format(html.escape(SIMPLE_XMLFILE, True)) 1836 1837XINCLUDE["include_c1_repeated.xml"] = """\ 1838<?xml version='1.0'?> 1839<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1840 <p>The following is the source code of Recursive1.xml:</p> 1841 <xi:include href="C1.xml"/> 1842 <xi:include href="C1.xml"/> 1843 <xi:include href="C1.xml"/> 1844 <xi:include href="C1.xml"/> 1845</document> 1846""" 1847 1848# 1849# badly formatted xi:include tags 1850 1851XINCLUDE_BAD = {} 1852 1853XINCLUDE_BAD["B1.xml"] = """\ 1854<?xml version='1.0'?> 1855<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1856 <p>120 Mz is adequate for an average home user.</p> 1857 <xi:include href="disclaimer.xml" parse="BAD_TYPE"/> 1858</document> 1859""" 1860 1861XINCLUDE_BAD["B2.xml"] = """\ 1862<?xml version='1.0'?> 1863<div xmlns:xi="http://www.w3.org/2001/XInclude"> 1864 <xi:fallback></xi:fallback> 1865</div> 1866""" 1867 1868XINCLUDE["Recursive1.xml"] = """\ 1869<?xml version='1.0'?> 1870<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1871 <p>The following is the source code of Recursive2.xml:</p> 1872 <xi:include href="Recursive2.xml"/> 1873</document> 1874""" 1875 1876XINCLUDE["Recursive2.xml"] = """\ 1877<?xml version='1.0'?> 1878<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1879 <p>The following is the source code of Recursive3.xml:</p> 1880 <xi:include href="Recursive3.xml"/> 1881</document> 1882""" 1883 1884XINCLUDE["Recursive3.xml"] = """\ 1885<?xml version='1.0'?> 1886<document xmlns:xi="http://www.w3.org/2001/XInclude"> 1887 <p>The following is the source code of Recursive1.xml:</p> 1888 <xi:include href="Recursive1.xml"/> 1889</document> 1890""" 1891 1892 1893class XIncludeTest(unittest.TestCase): 1894 1895 def xinclude_loader(self, href, parse="xml", encoding=None): 1896 try: 1897 data = XINCLUDE[href] 1898 except KeyError: 1899 raise OSError("resource not found") 1900 if parse == "xml": 1901 data = ET.XML(data) 1902 return data 1903 1904 def none_loader(self, href, parser, encoding=None): 1905 return None 1906 1907 def _my_loader(self, href, parse): 1908 # Used to avoid a test-dependency problem where the default loader 1909 # of ElementInclude uses the pyET parser for cET tests. 1910 if parse == 'xml': 1911 with open(href, 'rb') as f: 1912 return ET.parse(f).getroot() 1913 else: 1914 return None 1915 1916 def test_xinclude_default(self): 1917 from xml.etree import ElementInclude 1918 doc = self.xinclude_loader('default.xml') 1919 ElementInclude.include(doc, self._my_loader) 1920 self.assertEqual(serialize(doc), 1921 '<document>\n' 1922 ' <p>Example.</p>\n' 1923 ' <root>\n' 1924 ' <element key="value">text</element>\n' 1925 ' <element>text</element>tail\n' 1926 ' <empty-element />\n' 1927 '</root>\n' 1928 '</document>') 1929 1930 def test_xinclude(self): 1931 from xml.etree import ElementInclude 1932 1933 # Basic inclusion example (XInclude C.1) 1934 document = self.xinclude_loader("C1.xml") 1935 ElementInclude.include(document, self.xinclude_loader) 1936 self.assertEqual(serialize(document), 1937 '<document>\n' 1938 ' <p>120 Mz is adequate for an average home user.</p>\n' 1939 ' <disclaimer>\n' 1940 ' <p>The opinions represented herein represent those of the individual\n' 1941 ' and should not be interpreted as official policy endorsed by this\n' 1942 ' organization.</p>\n' 1943 '</disclaimer>\n' 1944 '</document>') # C1 1945 1946 # Textual inclusion example (XInclude C.2) 1947 document = self.xinclude_loader("C2.xml") 1948 ElementInclude.include(document, self.xinclude_loader) 1949 self.assertEqual(serialize(document), 1950 '<document>\n' 1951 ' <p>This document has been accessed\n' 1952 ' 324387 times.</p>\n' 1953 '</document>') # C2 1954 1955 # Textual inclusion after sibling element (based on modified XInclude C.2) 1956 document = self.xinclude_loader("C2b.xml") 1957 ElementInclude.include(document, self.xinclude_loader) 1958 self.assertEqual(serialize(document), 1959 '<document>\n' 1960 ' <p>This document has been <em>accessed</em>\n' 1961 ' 324387 times.</p>\n' 1962 '</document>') # C2b 1963 1964 # Textual inclusion of XML example (XInclude C.3) 1965 document = self.xinclude_loader("C3.xml") 1966 ElementInclude.include(document, self.xinclude_loader) 1967 self.assertEqual(serialize(document), 1968 '<document>\n' 1969 ' <p>The following is the source of the "data.xml" resource:</p>\n' 1970 " <example><?xml version='1.0'?>\n" 1971 '<data>\n' 1972 ' <item><![CDATA[Brooks & Shields]]></item>\n' 1973 '</data>\n' 1974 '</example>\n' 1975 '</document>') # C3 1976 1977 # Fallback example (XInclude C.5) 1978 # Note! Fallback support is not yet implemented 1979 document = self.xinclude_loader("C5.xml") 1980 with self.assertRaises(OSError) as cm: 1981 ElementInclude.include(document, self.xinclude_loader) 1982 self.assertEqual(str(cm.exception), 'resource not found') 1983 self.assertEqual(serialize(document), 1984 '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n' 1985 ' <ns0:include href="example.txt" parse="text">\n' 1986 ' <ns0:fallback>\n' 1987 ' <ns0:include href="fallback-example.txt" parse="text">\n' 1988 ' <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n' 1989 ' </ns0:include>\n' 1990 ' </ns0:fallback>\n' 1991 ' </ns0:include>\n' 1992 '</div>') # C5 1993 1994 def test_xinclude_repeated(self): 1995 from xml.etree import ElementInclude 1996 1997 document = self.xinclude_loader("include_c1_repeated.xml") 1998 ElementInclude.include(document, self.xinclude_loader) 1999 self.assertEqual(1+4*2, len(document.findall(".//p"))) 2000 2001 def test_xinclude_failures(self): 2002 from xml.etree import ElementInclude 2003 2004 # Test failure to locate included XML file. 2005 document = ET.XML(XINCLUDE["C1.xml"]) 2006 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2007 ElementInclude.include(document, loader=self.none_loader) 2008 self.assertEqual(str(cm.exception), 2009 "cannot load 'disclaimer.xml' as 'xml'") 2010 2011 # Test failure to locate included text file. 2012 document = ET.XML(XINCLUDE["C2.xml"]) 2013 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2014 ElementInclude.include(document, loader=self.none_loader) 2015 self.assertEqual(str(cm.exception), 2016 "cannot load 'count.txt' as 'text'") 2017 2018 # Test bad parse type. 2019 document = ET.XML(XINCLUDE_BAD["B1.xml"]) 2020 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2021 ElementInclude.include(document, loader=self.none_loader) 2022 self.assertEqual(str(cm.exception), 2023 "unknown parse type in xi:include tag ('BAD_TYPE')") 2024 2025 # Test xi:fallback outside xi:include. 2026 document = ET.XML(XINCLUDE_BAD["B2.xml"]) 2027 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2028 ElementInclude.include(document, loader=self.none_loader) 2029 self.assertEqual(str(cm.exception), 2030 "xi:fallback tag must be child of xi:include " 2031 "('{http://www.w3.org/2001/XInclude}fallback')") 2032 2033 # Test infinitely recursive includes. 2034 document = self.xinclude_loader("Recursive1.xml") 2035 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2036 ElementInclude.include(document, self.xinclude_loader) 2037 self.assertEqual(str(cm.exception), 2038 "recursive include of Recursive2.xml") 2039 2040 # Test 'max_depth' limitation. 2041 document = self.xinclude_loader("Recursive1.xml") 2042 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2043 ElementInclude.include(document, self.xinclude_loader, max_depth=None) 2044 self.assertEqual(str(cm.exception), 2045 "recursive include of Recursive2.xml") 2046 2047 document = self.xinclude_loader("Recursive1.xml") 2048 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 2049 ElementInclude.include(document, self.xinclude_loader, max_depth=0) 2050 self.assertEqual(str(cm.exception), 2051 "maximum xinclude depth reached when including file Recursive2.xml") 2052 2053 document = self.xinclude_loader("Recursive1.xml") 2054 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 2055 ElementInclude.include(document, self.xinclude_loader, max_depth=1) 2056 self.assertEqual(str(cm.exception), 2057 "maximum xinclude depth reached when including file Recursive3.xml") 2058 2059 document = self.xinclude_loader("Recursive1.xml") 2060 with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm: 2061 ElementInclude.include(document, self.xinclude_loader, max_depth=2) 2062 self.assertEqual(str(cm.exception), 2063 "maximum xinclude depth reached when including file Recursive1.xml") 2064 2065 document = self.xinclude_loader("Recursive1.xml") 2066 with self.assertRaises(ElementInclude.FatalIncludeError) as cm: 2067 ElementInclude.include(document, self.xinclude_loader, max_depth=3) 2068 self.assertEqual(str(cm.exception), 2069 "recursive include of Recursive2.xml") 2070 2071 2072# -------------------------------------------------------------------- 2073# reported bugs 2074 2075class BugsTest(unittest.TestCase): 2076 2077 def test_bug_xmltoolkit21(self): 2078 # marshaller gives obscure errors for non-string values 2079 2080 def check(elem): 2081 with self.assertRaises(TypeError) as cm: 2082 serialize(elem) 2083 self.assertEqual(str(cm.exception), 2084 'cannot serialize 123 (type int)') 2085 2086 elem = ET.Element(123) 2087 check(elem) # tag 2088 2089 elem = ET.Element("elem") 2090 elem.text = 123 2091 check(elem) # text 2092 2093 elem = ET.Element("elem") 2094 elem.tail = 123 2095 check(elem) # tail 2096 2097 elem = ET.Element("elem") 2098 elem.set(123, "123") 2099 check(elem) # attribute key 2100 2101 elem = ET.Element("elem") 2102 elem.set("123", 123) 2103 check(elem) # attribute value 2104 2105 def test_bug_xmltoolkit25(self): 2106 # typo in ElementTree.findtext 2107 2108 elem = ET.XML(SAMPLE_XML) 2109 tree = ET.ElementTree(elem) 2110 self.assertEqual(tree.findtext("tag"), 'text') 2111 self.assertEqual(tree.findtext("section/tag"), 'subtext') 2112 2113 def test_bug_xmltoolkit28(self): 2114 # .//tag causes exceptions 2115 2116 tree = ET.XML("<doc><table><tbody/></table></doc>") 2117 self.assertEqual(summarize_list(tree.findall(".//thead")), []) 2118 self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody']) 2119 2120 def test_bug_xmltoolkitX1(self): 2121 # dump() doesn't flush the output buffer 2122 2123 tree = ET.XML("<doc><table><tbody/></table></doc>") 2124 with support.captured_stdout() as stdout: 2125 ET.dump(tree) 2126 self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n') 2127 2128 def test_bug_xmltoolkit39(self): 2129 # non-ascii element and attribute names doesn't work 2130 2131 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />") 2132 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 2133 2134 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 2135 b"<tag \xe4ttr='välue' />") 2136 self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'}) 2137 self.assertEqual(ET.tostring(tree, "utf-8"), 2138 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 2139 2140 tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 2141 b'<t\xe4g>text</t\xe4g>') 2142 self.assertEqual(ET.tostring(tree, "utf-8"), 2143 b'<t\xc3\xa4g>text</t\xc3\xa4g>') 2144 2145 tree = ET.Element("t\u00e4g") 2146 self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />') 2147 2148 tree = ET.Element("tag") 2149 tree.set("\u00e4ttr", "v\u00e4lue") 2150 self.assertEqual(ET.tostring(tree, "utf-8"), 2151 b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />') 2152 2153 def test_bug_xmltoolkit54(self): 2154 # problems handling internally defined entities 2155 2156 e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '舰'>]>" 2157 '<doc>&ldots;</doc>') 2158 self.assertEqual(serialize(e, encoding="us-ascii"), 2159 b'<doc>舰</doc>') 2160 self.assertEqual(serialize(e), '<doc>\u8230</doc>') 2161 2162 def test_bug_xmltoolkit55(self): 2163 # make sure we're reporting the first error, not the last 2164 2165 with self.assertRaises(ET.ParseError) as cm: 2166 ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>" 2167 b'<doc>&ldots;&ndots;&rdots;</doc>') 2168 self.assertEqual(str(cm.exception), 2169 'undefined entity &ldots;: line 1, column 36') 2170 2171 def test_bug_xmltoolkit60(self): 2172 # Handle crash in stream source. 2173 2174 class ExceptionFile: 2175 def read(self, x): 2176 raise OSError 2177 2178 self.assertRaises(OSError, ET.parse, ExceptionFile()) 2179 2180 def test_bug_xmltoolkit62(self): 2181 # Don't crash when using custom entities. 2182 2183 ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'} 2184 parser = ET.XMLParser() 2185 parser.entity.update(ENTITIES) 2186 parser.feed("""<?xml version="1.0" encoding="UTF-8"?> 2187<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []> 2188<patent-application-publication> 2189<subdoc-abstract> 2190<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named ‘BCT9801BEG’.</paragraph> 2191</subdoc-abstract> 2192</patent-application-publication>""") 2193 t = parser.close() 2194 self.assertEqual(t.find('.//paragraph').text, 2195 'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.') 2196 2197 @unittest.skipIf(sys.gettrace(), "Skips under coverage.") 2198 def test_bug_xmltoolkit63(self): 2199 # Check reference leak. 2200 def xmltoolkit63(): 2201 tree = ET.TreeBuilder() 2202 tree.start("tag", {}) 2203 tree.data("text") 2204 tree.end("tag") 2205 2206 xmltoolkit63() 2207 count = sys.getrefcount(None) 2208 for i in range(1000): 2209 xmltoolkit63() 2210 self.assertEqual(sys.getrefcount(None), count) 2211 2212 def test_bug_200708_newline(self): 2213 # Preserve newlines in attributes. 2214 2215 e = ET.Element('SomeTag', text="def _f():\n return 3\n") 2216 self.assertEqual(ET.tostring(e), 2217 b'<SomeTag text="def _f(): return 3 " />') 2218 self.assertEqual(ET.XML(ET.tostring(e)).get("text"), 2219 'def _f():\n return 3\n') 2220 self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))), 2221 b'<SomeTag text="def _f(): return 3 " />') 2222 2223 def test_bug_200708_close(self): 2224 # Test default builder. 2225 parser = ET.XMLParser() # default 2226 parser.feed("<element>some text</element>") 2227 self.assertEqual(parser.close().tag, 'element') 2228 2229 # Test custom builder. 2230 class EchoTarget: 2231 def close(self): 2232 return ET.Element("element") # simulate root 2233 parser = ET.XMLParser(target=EchoTarget()) 2234 parser.feed("<element>some text</element>") 2235 self.assertEqual(parser.close().tag, 'element') 2236 2237 def test_bug_200709_default_namespace(self): 2238 e = ET.Element("{default}elem") 2239 s = ET.SubElement(e, "{default}elem") 2240 self.assertEqual(serialize(e, default_namespace="default"), # 1 2241 '<elem xmlns="default"><elem /></elem>') 2242 2243 e = ET.Element("{default}elem") 2244 s = ET.SubElement(e, "{default}elem") 2245 s = ET.SubElement(e, "{not-default}elem") 2246 self.assertEqual(serialize(e, default_namespace="default"), # 2 2247 '<elem xmlns="default" xmlns:ns1="not-default">' 2248 '<elem />' 2249 '<ns1:elem />' 2250 '</elem>') 2251 2252 e = ET.Element("{default}elem") 2253 s = ET.SubElement(e, "{default}elem") 2254 s = ET.SubElement(e, "elem") # unprefixed name 2255 with self.assertRaises(ValueError) as cm: 2256 serialize(e, default_namespace="default") # 3 2257 self.assertEqual(str(cm.exception), 2258 'cannot use non-qualified names with default_namespace option') 2259 2260 def test_bug_200709_register_namespace(self): 2261 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 2262 self.assertEqual(ET.tostring(e), 2263 b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />') 2264 ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/") 2265 e = ET.Element("{http://namespace.invalid/does/not/exist/}title") 2266 self.assertEqual(ET.tostring(e), 2267 b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />') 2268 2269 # And the Dublin Core namespace is in the default list: 2270 2271 e = ET.Element("{http://purl.org/dc/elements/1.1/}title") 2272 self.assertEqual(ET.tostring(e), 2273 b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />') 2274 2275 def test_bug_200709_element_comment(self): 2276 # Not sure if this can be fixed, really (since the serializer needs 2277 # ET.Comment, not cET.comment). 2278 2279 a = ET.Element('a') 2280 a.append(ET.Comment('foo')) 2281 self.assertEqual(a[0].tag, ET.Comment) 2282 2283 a = ET.Element('a') 2284 a.append(ET.PI('foo')) 2285 self.assertEqual(a[0].tag, ET.PI) 2286 2287 def test_bug_200709_element_insert(self): 2288 a = ET.Element('a') 2289 b = ET.SubElement(a, 'b') 2290 c = ET.SubElement(a, 'c') 2291 d = ET.Element('d') 2292 a.insert(0, d) 2293 self.assertEqual(summarize_list(a), ['d', 'b', 'c']) 2294 a.insert(-1, d) 2295 self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c']) 2296 2297 def test_bug_200709_iter_comment(self): 2298 a = ET.Element('a') 2299 b = ET.SubElement(a, 'b') 2300 comment_b = ET.Comment("TEST-b") 2301 b.append(comment_b) 2302 self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment]) 2303 2304 # -------------------------------------------------------------------- 2305 # reported on bugs.python.org 2306 2307 def test_bug_1534630(self): 2308 bob = ET.TreeBuilder() 2309 e = bob.data("data") 2310 e = bob.start("tag", {}) 2311 e = bob.end("tag") 2312 e = bob.close() 2313 self.assertEqual(serialize(e), '<tag />') 2314 2315 def test_issue6233(self): 2316 e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>" 2317 b'<body>t\xc3\xa3g</body>') 2318 self.assertEqual(ET.tostring(e, 'ascii'), 2319 b"<?xml version='1.0' encoding='ascii'?>\n" 2320 b'<body>tãg</body>') 2321 e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>" 2322 b'<body>t\xe3g</body>') 2323 self.assertEqual(ET.tostring(e, 'ascii'), 2324 b"<?xml version='1.0' encoding='ascii'?>\n" 2325 b'<body>tãg</body>') 2326 2327 def test_issue6565(self): 2328 elem = ET.XML("<body><tag/></body>") 2329 self.assertEqual(summarize_list(elem), ['tag']) 2330 newelem = ET.XML(SAMPLE_XML) 2331 elem[:] = newelem[:] 2332 self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section']) 2333 2334 def test_issue10777(self): 2335 # Registering a namespace twice caused a "dictionary changed size during 2336 # iteration" bug. 2337 2338 ET.register_namespace('test10777', 'http://myuri/') 2339 ET.register_namespace('test10777', 'http://myuri/') 2340 2341 def test_lost_text(self): 2342 # Issue #25902: Borrowed text can disappear 2343 class Text: 2344 def __bool__(self): 2345 e.text = 'changed' 2346 return True 2347 2348 e = ET.Element('tag') 2349 e.text = Text() 2350 i = e.itertext() 2351 t = next(i) 2352 self.assertIsInstance(t, Text) 2353 self.assertIsInstance(e.text, str) 2354 self.assertEqual(e.text, 'changed') 2355 2356 def test_lost_tail(self): 2357 # Issue #25902: Borrowed tail can disappear 2358 class Text: 2359 def __bool__(self): 2360 e[0].tail = 'changed' 2361 return True 2362 2363 e = ET.Element('root') 2364 e.append(ET.Element('tag')) 2365 e[0].tail = Text() 2366 i = e.itertext() 2367 t = next(i) 2368 self.assertIsInstance(t, Text) 2369 self.assertIsInstance(e[0].tail, str) 2370 self.assertEqual(e[0].tail, 'changed') 2371 2372 def test_lost_elem(self): 2373 # Issue #25902: Borrowed element can disappear 2374 class Tag: 2375 def __eq__(self, other): 2376 e[0] = ET.Element('changed') 2377 next(i) 2378 return True 2379 2380 e = ET.Element('root') 2381 e.append(ET.Element(Tag())) 2382 e.append(ET.Element('tag')) 2383 i = e.iter('tag') 2384 try: 2385 t = next(i) 2386 except ValueError: 2387 self.skipTest('generators are not reentrant') 2388 self.assertIsInstance(t.tag, Tag) 2389 self.assertIsInstance(e[0].tag, str) 2390 self.assertEqual(e[0].tag, 'changed') 2391 2392 def check_expat224_utf8_bug(self, text): 2393 xml = b'<a b="%s"/>' % text 2394 root = ET.XML(xml) 2395 self.assertEqual(root.get('b'), text.decode('utf-8')) 2396 2397 def test_expat224_utf8_bug(self): 2398 # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. 2399 # Check that Expat 2.2.4 fixed the bug. 2400 # 2401 # Test buffer bounds at odd and even positions. 2402 2403 text = b'\xc3\xa0' * 1024 2404 self.check_expat224_utf8_bug(text) 2405 2406 text = b'x' + b'\xc3\xa0' * 1024 2407 self.check_expat224_utf8_bug(text) 2408 2409 def test_expat224_utf8_bug_file(self): 2410 with open(UTF8_BUG_XMLFILE, 'rb') as fp: 2411 raw = fp.read() 2412 root = ET.fromstring(raw) 2413 xmlattr = root.get('b') 2414 2415 # "Parse" manually the XML file to extract the value of the 'b' 2416 # attribute of the <a b='xxx' /> XML element 2417 text = raw.decode('utf-8').strip() 2418 text = text.replace('\r\n', ' ') 2419 text = text[6:-4] 2420 self.assertEqual(root.get('b'), text) 2421 2422 def test_39495_treebuilder_start(self): 2423 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag") 2424 self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None) 2425 2426 def test_issue123213_correct_extend_exception(self): 2427 # Does not hide the internal exception when extending the element 2428 self.assertRaises(ZeroDivisionError, ET.Element('tag').extend, 2429 (1/0 for i in range(2))) 2430 2431 # Still raises the TypeError when extending with a non-iterable 2432 self.assertRaises(TypeError, ET.Element('tag').extend, None) 2433 2434 # Preserves the TypeError message when extending with a generator 2435 def f(): 2436 raise TypeError("mymessage") 2437 2438 self.assertRaisesRegex( 2439 TypeError, 'mymessage', 2440 ET.Element('tag').extend, (f() for i in range(2))) 2441 2442 2443 2444# -------------------------------------------------------------------- 2445 2446 2447class BasicElementTest(ElementTestCase, unittest.TestCase): 2448 2449 def test___init__(self): 2450 tag = "foo" 2451 attrib = { "zix": "wyp" } 2452 2453 element_foo = ET.Element(tag, attrib) 2454 2455 # traits of an element 2456 self.assertIsInstance(element_foo, ET.Element) 2457 self.assertIn("tag", dir(element_foo)) 2458 self.assertIn("attrib", dir(element_foo)) 2459 self.assertIn("text", dir(element_foo)) 2460 self.assertIn("tail", dir(element_foo)) 2461 2462 # string attributes have expected values 2463 self.assertEqual(element_foo.tag, tag) 2464 self.assertIsNone(element_foo.text) 2465 self.assertIsNone(element_foo.tail) 2466 2467 # attrib is a copy 2468 self.assertIsNot(element_foo.attrib, attrib) 2469 self.assertEqual(element_foo.attrib, attrib) 2470 2471 # attrib isn't linked 2472 attrib["bar"] = "baz" 2473 self.assertIsNot(element_foo.attrib, attrib) 2474 self.assertNotEqual(element_foo.attrib, attrib) 2475 2476 def test___copy__(self): 2477 element_foo = ET.Element("foo", { "zix": "wyp" }) 2478 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2479 2480 element_foo2 = copy.copy(element_foo) 2481 2482 # elements are not the same 2483 self.assertIsNot(element_foo2, element_foo) 2484 2485 # string attributes are equal 2486 self.assertEqual(element_foo2.tag, element_foo.tag) 2487 self.assertEqual(element_foo2.text, element_foo.text) 2488 self.assertEqual(element_foo2.tail, element_foo.tail) 2489 2490 # number of children is the same 2491 self.assertEqual(len(element_foo2), len(element_foo)) 2492 2493 # children are the same 2494 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2495 self.assertIs(child1, child2) 2496 2497 # attrib is a copy 2498 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2499 2500 def test___deepcopy__(self): 2501 element_foo = ET.Element("foo", { "zix": "wyp" }) 2502 element_foo.append(ET.Element("bar", { "baz": "qix" })) 2503 2504 element_foo2 = copy.deepcopy(element_foo) 2505 2506 # elements are not the same 2507 self.assertIsNot(element_foo2, element_foo) 2508 2509 # string attributes are equal 2510 self.assertEqual(element_foo2.tag, element_foo.tag) 2511 self.assertEqual(element_foo2.text, element_foo.text) 2512 self.assertEqual(element_foo2.tail, element_foo.tail) 2513 2514 # number of children is the same 2515 self.assertEqual(len(element_foo2), len(element_foo)) 2516 2517 # children are not the same 2518 for (child1, child2) in itertools.zip_longest(element_foo, element_foo2): 2519 self.assertIsNot(child1, child2) 2520 2521 # attrib is a copy 2522 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2523 self.assertEqual(element_foo2.attrib, element_foo.attrib) 2524 2525 # attrib isn't linked 2526 element_foo.attrib["bar"] = "baz" 2527 self.assertIsNot(element_foo2.attrib, element_foo.attrib) 2528 self.assertNotEqual(element_foo2.attrib, element_foo.attrib) 2529 2530 def test_augmentation_type_errors(self): 2531 e = ET.Element('joe') 2532 self.assertRaises(TypeError, e.append, 'b') 2533 self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo']) 2534 self.assertRaises(TypeError, e.insert, 0, 'foo') 2535 e[:] = [ET.Element('bar')] 2536 with self.assertRaises(TypeError): 2537 e[0] = 'foo' 2538 with self.assertRaises(TypeError): 2539 e[:] = [ET.Element('bar'), 'foo'] 2540 2541 if hasattr(e, '__setstate__'): 2542 state = { 2543 'tag': 'tag', 2544 '_children': [None], # non-Element 2545 'attrib': 'attr', 2546 'tail': 'tail', 2547 'text': 'text', 2548 } 2549 self.assertRaises(TypeError, e.__setstate__, state) 2550 2551 if hasattr(e, '__deepcopy__'): 2552 class E(ET.Element): 2553 def __deepcopy__(self, memo): 2554 return None # non-Element 2555 e[:] = [E('bar')] 2556 self.assertRaises(TypeError, copy.deepcopy, e) 2557 2558 def test_cyclic_gc(self): 2559 class Dummy: 2560 pass 2561 2562 # Test the shortest cycle: d->element->d 2563 d = Dummy() 2564 d.dummyref = ET.Element('joe', attr=d) 2565 wref = weakref.ref(d) 2566 del d 2567 gc_collect() 2568 self.assertIsNone(wref()) 2569 2570 # A longer cycle: d->e->e2->d 2571 e = ET.Element('joe') 2572 d = Dummy() 2573 d.dummyref = e 2574 wref = weakref.ref(d) 2575 e2 = ET.SubElement(e, 'foo', attr=d) 2576 del d, e, e2 2577 gc_collect() 2578 self.assertIsNone(wref()) 2579 2580 # A cycle between Element objects as children of one another 2581 # e1->e2->e3->e1 2582 e1 = ET.Element('e1') 2583 e2 = ET.Element('e2') 2584 e3 = ET.Element('e3') 2585 e3.append(e1) 2586 e2.append(e3) 2587 e1.append(e2) 2588 wref = weakref.ref(e1) 2589 del e1, e2, e3 2590 gc_collect() 2591 self.assertIsNone(wref()) 2592 2593 def test_weakref(self): 2594 flag = False 2595 def wref_cb(w): 2596 nonlocal flag 2597 flag = True 2598 e = ET.Element('e') 2599 wref = weakref.ref(e, wref_cb) 2600 self.assertEqual(wref().tag, 'e') 2601 del e 2602 gc_collect() # For PyPy or other GCs. 2603 self.assertEqual(flag, True) 2604 self.assertEqual(wref(), None) 2605 2606 def test_get_keyword_args(self): 2607 e1 = ET.Element('foo' , x=1, y=2, z=3) 2608 self.assertEqual(e1.get('x', default=7), 1) 2609 self.assertEqual(e1.get('w', default=7), 7) 2610 2611 def test_pickle(self): 2612 # issue #16076: the C implementation wasn't pickleable. 2613 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2614 for dumper, loader in product(self.modules, repeat=2): 2615 e = dumper.Element('foo', bar=42) 2616 e.text = "text goes here" 2617 e.tail = "opposite of head" 2618 dumper.SubElement(e, 'child').append(dumper.Element('grandchild')) 2619 e.append(dumper.Element('child')) 2620 e.findall('.//grandchild')[0].set('attr', 'other value') 2621 2622 e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree', 2623 dumper, loader, proto) 2624 2625 self.assertEqual(e2.tag, 'foo') 2626 self.assertEqual(e2.attrib['bar'], 42) 2627 self.assertEqual(len(e2), 2) 2628 self.assertEqualElements(e, e2) 2629 2630 def test_pickle_issue18997(self): 2631 for proto in range(2, pickle.HIGHEST_PROTOCOL + 1): 2632 for dumper, loader in product(self.modules, repeat=2): 2633 XMLTEXT = """<?xml version="1.0"?> 2634 <group><dogs>4</dogs> 2635 </group>""" 2636 e1 = dumper.fromstring(XMLTEXT) 2637 self.assertEqual(e1.__getstate__()['tag'], 'group') 2638 e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree', 2639 dumper, loader, proto) 2640 self.assertEqual(e2.tag, 'group') 2641 self.assertEqual(e2[0].tag, 'dogs') 2642 2643 2644class BadElementTest(ElementTestCase, unittest.TestCase): 2645 def test_extend_mutable_list(self): 2646 class X: 2647 @property 2648 def __class__(self): 2649 L[:] = [ET.Element('baz')] 2650 return ET.Element 2651 L = [X()] 2652 e = ET.Element('foo') 2653 try: 2654 e.extend(L) 2655 except TypeError: 2656 pass 2657 2658 class Y(X, ET.Element): 2659 pass 2660 L = [Y('x')] 2661 e = ET.Element('foo') 2662 e.extend(L) 2663 2664 def test_extend_mutable_list2(self): 2665 class X: 2666 @property 2667 def __class__(self): 2668 del L[:] 2669 return ET.Element 2670 L = [X(), ET.Element('baz')] 2671 e = ET.Element('foo') 2672 try: 2673 e.extend(L) 2674 except TypeError: 2675 pass 2676 2677 class Y(X, ET.Element): 2678 pass 2679 L = [Y('bar'), ET.Element('baz')] 2680 e = ET.Element('foo') 2681 e.extend(L) 2682 2683 def test_remove_with_mutating(self): 2684 class X(ET.Element): 2685 def __eq__(self, o): 2686 del e[:] 2687 return False 2688 e = ET.Element('foo') 2689 e.extend([X('bar')]) 2690 self.assertRaises(ValueError, e.remove, ET.Element('baz')) 2691 2692 e = ET.Element('foo') 2693 e.extend([ET.Element('bar')]) 2694 self.assertRaises(ValueError, e.remove, X('baz')) 2695 2696 @support.infinite_recursion(25) 2697 def test_recursive_repr(self): 2698 # Issue #25455 2699 e = ET.Element('foo') 2700 with swap_attr(e, 'tag', e): 2701 with self.assertRaises(RuntimeError): 2702 repr(e) # Should not crash 2703 2704 def test_element_get_text(self): 2705 # Issue #27863 2706 class X(str): 2707 def __del__(self): 2708 try: 2709 elem.text 2710 except NameError: 2711 pass 2712 2713 b = ET.TreeBuilder() 2714 b.start('tag', {}) 2715 b.data('ABCD') 2716 b.data(X('EFGH')) 2717 b.data('IJKL') 2718 b.end('tag') 2719 2720 elem = b.close() 2721 self.assertEqual(elem.text, 'ABCDEFGHIJKL') 2722 2723 def test_element_get_tail(self): 2724 # Issue #27863 2725 class X(str): 2726 def __del__(self): 2727 try: 2728 elem[0].tail 2729 except NameError: 2730 pass 2731 2732 b = ET.TreeBuilder() 2733 b.start('root', {}) 2734 b.start('tag', {}) 2735 b.end('tag') 2736 b.data('ABCD') 2737 b.data(X('EFGH')) 2738 b.data('IJKL') 2739 b.end('root') 2740 2741 elem = b.close() 2742 self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL') 2743 2744 def test_subscr(self): 2745 # Issue #27863 2746 class X: 2747 def __index__(self): 2748 del e[:] 2749 return 1 2750 2751 e = ET.Element('elem') 2752 e.append(ET.Element('child')) 2753 e[:X()] # shouldn't crash 2754 2755 e.append(ET.Element('child')) 2756 e[0:10:X()] # shouldn't crash 2757 2758 def test_ass_subscr(self): 2759 # Issue #27863 2760 class X: 2761 def __index__(self): 2762 e[:] = [] 2763 return 1 2764 2765 e = ET.Element('elem') 2766 for _ in range(10): 2767 e.insert(0, ET.Element('child')) 2768 2769 e[0:10:X()] = [] # shouldn't crash 2770 2771 def test_treebuilder_start(self): 2772 # Issue #27863 2773 def element_factory(x, y): 2774 return [] 2775 b = ET.TreeBuilder(element_factory=element_factory) 2776 2777 b.start('tag', {}) 2778 b.data('ABCD') 2779 self.assertRaises(AttributeError, b.start, 'tag2', {}) 2780 del b 2781 gc_collect() 2782 2783 def test_treebuilder_end(self): 2784 # Issue #27863 2785 def element_factory(x, y): 2786 return [] 2787 b = ET.TreeBuilder(element_factory=element_factory) 2788 2789 b.start('tag', {}) 2790 b.data('ABCD') 2791 self.assertRaises(AttributeError, b.end, 'tag') 2792 del b 2793 gc_collect() 2794 2795 2796class MutatingElementPath(str): 2797 def __new__(cls, elem, *args): 2798 self = str.__new__(cls, *args) 2799 self.elem = elem 2800 return self 2801 def __eq__(self, o): 2802 del self.elem[:] 2803 return True 2804MutatingElementPath.__hash__ = str.__hash__ 2805 2806class BadElementPath(str): 2807 def __eq__(self, o): 2808 raise 1/0 2809BadElementPath.__hash__ = str.__hash__ 2810 2811class BadElementPathTest(ElementTestCase, unittest.TestCase): 2812 def setUp(self): 2813 super().setUp() 2814 from xml.etree import ElementPath 2815 self.path_cache = ElementPath._cache 2816 ElementPath._cache = {} 2817 2818 def tearDown(self): 2819 from xml.etree import ElementPath 2820 ElementPath._cache = self.path_cache 2821 super().tearDown() 2822 2823 def test_find_with_mutating(self): 2824 e = ET.Element('foo') 2825 e.extend([ET.Element('bar')]) 2826 e.find(MutatingElementPath(e, 'x')) 2827 2828 def test_find_with_error(self): 2829 e = ET.Element('foo') 2830 e.extend([ET.Element('bar')]) 2831 try: 2832 e.find(BadElementPath('x')) 2833 except ZeroDivisionError: 2834 pass 2835 2836 def test_findtext_with_mutating(self): 2837 e = ET.Element('foo') 2838 e.extend([ET.Element('bar')]) 2839 e.findtext(MutatingElementPath(e, 'x')) 2840 2841 def test_findtext_with_error(self): 2842 e = ET.Element('foo') 2843 e.extend([ET.Element('bar')]) 2844 try: 2845 e.findtext(BadElementPath('x')) 2846 except ZeroDivisionError: 2847 pass 2848 2849 def test_findtext_with_falsey_text_attribute(self): 2850 root_elem = ET.Element('foo') 2851 sub_elem = ET.SubElement(root_elem, 'bar') 2852 falsey = ["", 0, False, [], (), {}] 2853 for val in falsey: 2854 sub_elem.text = val 2855 self.assertEqual(root_elem.findtext('./bar'), val) 2856 2857 def test_findtext_with_none_text_attribute(self): 2858 root_elem = ET.Element('foo') 2859 sub_elem = ET.SubElement(root_elem, 'bar') 2860 sub_elem.text = None 2861 self.assertEqual(root_elem.findtext('./bar'), '') 2862 2863 def test_findall_with_mutating(self): 2864 e = ET.Element('foo') 2865 e.extend([ET.Element('bar')]) 2866 e.findall(MutatingElementPath(e, 'x')) 2867 2868 def test_findall_with_error(self): 2869 e = ET.Element('foo') 2870 e.extend([ET.Element('bar')]) 2871 try: 2872 e.findall(BadElementPath('x')) 2873 except ZeroDivisionError: 2874 pass 2875 2876 2877class ElementTreeTypeTest(unittest.TestCase): 2878 def test_istype(self): 2879 self.assertIsInstance(ET.ParseError, type) 2880 self.assertIsInstance(ET.QName, type) 2881 self.assertIsInstance(ET.ElementTree, type) 2882 self.assertIsInstance(ET.Element, type) 2883 self.assertIsInstance(ET.TreeBuilder, type) 2884 self.assertIsInstance(ET.XMLParser, type) 2885 2886 def test_Element_subclass_trivial(self): 2887 class MyElement(ET.Element): 2888 pass 2889 2890 mye = MyElement('foo') 2891 self.assertIsInstance(mye, ET.Element) 2892 self.assertIsInstance(mye, MyElement) 2893 self.assertEqual(mye.tag, 'foo') 2894 2895 # test that attribute assignment works (issue 14849) 2896 mye.text = "joe" 2897 self.assertEqual(mye.text, "joe") 2898 2899 def test_Element_subclass_constructor(self): 2900 class MyElement(ET.Element): 2901 def __init__(self, tag, attrib={}, **extra): 2902 super(MyElement, self).__init__(tag + '__', attrib, **extra) 2903 2904 mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4) 2905 self.assertEqual(mye.tag, 'foo__') 2906 self.assertEqual(sorted(mye.items()), 2907 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]) 2908 2909 def test_Element_subclass_new_method(self): 2910 class MyElement(ET.Element): 2911 def newmethod(self): 2912 return self.tag 2913 2914 mye = MyElement('joe') 2915 self.assertEqual(mye.newmethod(), 'joe') 2916 2917 def test_Element_subclass_find(self): 2918 class MyElement(ET.Element): 2919 pass 2920 2921 e = ET.Element('foo') 2922 e.text = 'text' 2923 sub = MyElement('bar') 2924 sub.text = 'subtext' 2925 e.append(sub) 2926 self.assertEqual(e.findtext('bar'), 'subtext') 2927 self.assertEqual(e.find('bar').tag, 'bar') 2928 found = list(e.findall('bar')) 2929 self.assertEqual(len(found), 1, found) 2930 self.assertEqual(found[0].tag, 'bar') 2931 2932 2933class ElementFindTest(unittest.TestCase): 2934 def test_find_simple(self): 2935 e = ET.XML(SAMPLE_XML) 2936 self.assertEqual(e.find('tag').tag, 'tag') 2937 self.assertEqual(e.find('section/tag').tag, 'tag') 2938 self.assertEqual(e.find('./tag').tag, 'tag') 2939 2940 e[2] = ET.XML(SAMPLE_SECTION) 2941 self.assertEqual(e.find('section/nexttag').tag, 'nexttag') 2942 2943 self.assertEqual(e.findtext('./tag'), 'text') 2944 self.assertEqual(e.findtext('section/tag'), 'subtext') 2945 2946 # section/nexttag is found but has no text 2947 self.assertEqual(e.findtext('section/nexttag'), '') 2948 self.assertEqual(e.findtext('section/nexttag', 'default'), '') 2949 2950 # tog doesn't exist and 'default' kicks in 2951 self.assertIsNone(e.findtext('tog')) 2952 self.assertEqual(e.findtext('tog', 'default'), 'default') 2953 2954 # Issue #16922 2955 self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '') 2956 2957 def test_find_xpath(self): 2958 LINEAR_XML = ''' 2959 <body> 2960 <tag class='a'/> 2961 <tag class='b'/> 2962 <tag class='c'/> 2963 <tag class='d'/> 2964 </body>''' 2965 e = ET.XML(LINEAR_XML) 2966 2967 # Test for numeric indexing and last() 2968 self.assertEqual(e.find('./tag[1]').attrib['class'], 'a') 2969 self.assertEqual(e.find('./tag[2]').attrib['class'], 'b') 2970 self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd') 2971 self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c') 2972 self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b') 2973 2974 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]') 2975 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]') 2976 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') 2977 self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') 2978 2979 def test_findall(self): 2980 e = ET.XML(SAMPLE_XML) 2981 e[2] = ET.XML(SAMPLE_SECTION) 2982 self.assertEqual(summarize_list(e.findall('.')), ['body']) 2983 self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag']) 2984 self.assertEqual(summarize_list(e.findall('tog')), []) 2985 self.assertEqual(summarize_list(e.findall('tog/foo')), []) 2986 self.assertEqual(summarize_list(e.findall('*')), 2987 ['tag', 'tag', 'section']) 2988 self.assertEqual(summarize_list(e.findall('.//tag')), 2989 ['tag'] * 4) 2990 self.assertEqual(summarize_list(e.findall('section/tag')), ['tag']) 2991 self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2) 2992 self.assertEqual(summarize_list(e.findall('section/*')), 2993 ['tag', 'nexttag', 'nextsection']) 2994 self.assertEqual(summarize_list(e.findall('section//*')), 2995 ['tag', 'nexttag', 'nextsection', 'tag']) 2996 self.assertEqual(summarize_list(e.findall('section/.//*')), 2997 ['tag', 'nexttag', 'nextsection', 'tag']) 2998 self.assertEqual(summarize_list(e.findall('*/*')), 2999 ['tag', 'nexttag', 'nextsection']) 3000 self.assertEqual(summarize_list(e.findall('*//*')), 3001 ['tag', 'nexttag', 'nextsection', 'tag']) 3002 self.assertEqual(summarize_list(e.findall('*/tag')), ['tag']) 3003 self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag']) 3004 self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2) 3005 self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2) 3006 3007 self.assertEqual(summarize_list(e.findall('.//tag[@class]')), 3008 ['tag'] * 3) 3009 self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')), 3010 ['tag']) 3011 self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')), 3012 ['tag'] * 2) 3013 self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')), 3014 ['tag'] * 2) 3015 self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')), 3016 ['tag']) 3017 self.assertEqual(summarize_list(e.findall('.//tag[@id]')), 3018 ['tag']) 3019 self.assertEqual(summarize_list(e.findall('.//section[tag]')), 3020 ['section']) 3021 self.assertEqual(summarize_list(e.findall('.//section[element]')), []) 3022 self.assertEqual(summarize_list(e.findall('../tag')), []) 3023 self.assertEqual(summarize_list(e.findall('section/../tag')), 3024 ['tag'] * 2) 3025 self.assertEqual(e.findall('section//'), e.findall('section//*')) 3026 3027 self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")), 3028 ['section']) 3029 self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")), 3030 ['section']) 3031 self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")), 3032 ['section']) 3033 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 3034 ['section']) 3035 self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")), 3036 ['section']) 3037 3038 # Negations of above tests. They match nothing because the sole section 3039 # tag has subtext. 3040 self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")), 3041 []) 3042 self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")), 3043 []) 3044 self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")), 3045 []) 3046 self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")), 3047 []) 3048 self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")), 3049 []) 3050 3051 self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")), 3052 ['tag']) 3053 self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")), 3054 ['tag']) 3055 self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')), 3056 ['tag']) 3057 self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')), 3058 ['tag']) 3059 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 3060 ['tag']) 3061 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")), 3062 []) 3063 self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")), 3064 []) 3065 3066 # Negations of above tests. 3067 # Matches everything but the tag containing subtext 3068 self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")), 3069 ['tag'] * 3) 3070 self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")), 3071 ['tag'] * 3) 3072 self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')), 3073 ['tag'] * 3) 3074 self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')), 3075 ['tag'] * 3) 3076 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")), 3077 ['tag'] * 3) 3078 # Matches all tags. 3079 self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")), 3080 ['tag'] * 4) 3081 self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")), 3082 ['tag'] * 4) 3083 3084 # duplicate section => 2x tag matches 3085 e[1] = e[2] 3086 self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")), 3087 ['section', 'section']) 3088 self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")), 3089 ['tag', 'tag']) 3090 3091 def test_test_find_with_ns(self): 3092 e = ET.XML(SAMPLE_XML_NS) 3093 self.assertEqual(summarize_list(e.findall('tag')), []) 3094 self.assertEqual( 3095 summarize_list(e.findall("{http://effbot.org/ns}tag")), 3096 ['{http://effbot.org/ns}tag'] * 2) 3097 self.assertEqual( 3098 summarize_list(e.findall(".//{http://effbot.org/ns}tag")), 3099 ['{http://effbot.org/ns}tag'] * 3) 3100 3101 def test_findall_different_nsmaps(self): 3102 root = ET.XML(''' 3103 <a xmlns:x="X" xmlns:y="Y"> 3104 <x:b><c/></x:b> 3105 <b/> 3106 <c><x:b/><b/></c><y:b/> 3107 </a>''') 3108 nsmap = {'xx': 'X'} 3109 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 3110 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 3111 nsmap = {'xx': 'Y'} 3112 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) 3113 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) 3114 nsmap = {'xx': 'X', '': 'Y'} 3115 self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) 3116 self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) 3117 3118 def test_findall_wildcard(self): 3119 root = ET.XML(''' 3120 <a xmlns:x="X" xmlns:y="Y"> 3121 <x:b><c/></x:b> 3122 <b/> 3123 <c><x:b/><b/></c><y:b/> 3124 </a>''') 3125 root.append(ET.Comment('test')) 3126 3127 self.assertEqual(summarize_list(root.findall("{*}b")), 3128 ['{X}b', 'b', '{Y}b']) 3129 self.assertEqual(summarize_list(root.findall("{*}c")), 3130 ['c']) 3131 self.assertEqual(summarize_list(root.findall("{X}*")), 3132 ['{X}b']) 3133 self.assertEqual(summarize_list(root.findall("{Y}*")), 3134 ['{Y}b']) 3135 self.assertEqual(summarize_list(root.findall("{}*")), 3136 ['b', 'c']) 3137 self.assertEqual(summarize_list(root.findall("{}b")), # only for consistency 3138 ['b']) 3139 self.assertEqual(summarize_list(root.findall("{}b")), 3140 summarize_list(root.findall("b"))) 3141 self.assertEqual(summarize_list(root.findall("{*}*")), 3142 ['{X}b', 'b', 'c', '{Y}b']) 3143 # This is an unfortunate difference, but that's how find('*') works. 3144 self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]), 3145 summarize_list(root.findall("*"))) 3146 3147 self.assertEqual(summarize_list(root.findall(".//{*}b")), 3148 ['{X}b', 'b', '{X}b', 'b', '{Y}b']) 3149 self.assertEqual(summarize_list(root.findall(".//{*}c")), 3150 ['c', 'c']) 3151 self.assertEqual(summarize_list(root.findall(".//{X}*")), 3152 ['{X}b', '{X}b']) 3153 self.assertEqual(summarize_list(root.findall(".//{Y}*")), 3154 ['{Y}b']) 3155 self.assertEqual(summarize_list(root.findall(".//{}*")), 3156 ['c', 'b', 'c', 'b']) 3157 self.assertEqual(summarize_list(root.findall(".//{}b")), # only for consistency 3158 ['b', 'b']) 3159 self.assertEqual(summarize_list(root.findall(".//{}b")), 3160 summarize_list(root.findall(".//b"))) 3161 3162 def test_bad_find(self): 3163 e = ET.XML(SAMPLE_XML) 3164 with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'): 3165 e.findall('/tag') 3166 3167 def test_find_through_ElementTree(self): 3168 e = ET.XML(SAMPLE_XML) 3169 self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag') 3170 self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text') 3171 self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')), 3172 ['tag'] * 2) 3173 # this produces a warning 3174 msg = ("This search is broken in 1.3 and earlier, and will be fixed " 3175 "in a future version. If you rely on the current behaviour, " 3176 "change it to '.+'") 3177 with self.assertWarnsRegex(FutureWarning, msg): 3178 it = ET.ElementTree(e).findall('//tag') 3179 self.assertEqual(summarize_list(it), ['tag'] * 3) 3180 3181 3182class ElementIterTest(unittest.TestCase): 3183 def _ilist(self, elem, tag=None): 3184 return summarize_list(elem.iter(tag)) 3185 3186 def test_basic(self): 3187 doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>") 3188 self.assertEqual(self._ilist(doc), ['html', 'body', 'i']) 3189 self.assertEqual(self._ilist(doc.find('body')), ['body', 'i']) 3190 self.assertEqual(next(doc.iter()).tag, 'html') 3191 self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...') 3192 self.assertEqual(''.join(doc.find('body').itertext()), 3193 'this is a paragraph.') 3194 self.assertEqual(next(doc.itertext()), 'this is a ') 3195 3196 # iterparse should return an iterator 3197 sourcefile = serialize(doc, to_string=False) 3198 self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end') 3199 3200 # With an explicit parser too (issue #9708) 3201 sourcefile = serialize(doc, to_string=False) 3202 parser = ET.XMLParser(target=ET.TreeBuilder()) 3203 self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 'end') 3204 3205 tree = ET.ElementTree(None) 3206 self.assertRaises(AttributeError, tree.iter) 3207 3208 # Issue #16913 3209 doc = ET.XML("<root>a&<sub>b&</sub>c&</root>") 3210 self.assertEqual(''.join(doc.itertext()), 'a&b&c&') 3211 3212 def test_corners(self): 3213 # single root, no subelements 3214 a = ET.Element('a') 3215 self.assertEqual(self._ilist(a), ['a']) 3216 3217 # one child 3218 b = ET.SubElement(a, 'b') 3219 self.assertEqual(self._ilist(a), ['a', 'b']) 3220 3221 # one child and one grandchild 3222 c = ET.SubElement(b, 'c') 3223 self.assertEqual(self._ilist(a), ['a', 'b', 'c']) 3224 3225 # two children, only first with grandchild 3226 d = ET.SubElement(a, 'd') 3227 self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd']) 3228 3229 # replace first child by second 3230 a[0] = a[1] 3231 del a[1] 3232 self.assertEqual(self._ilist(a), ['a', 'd']) 3233 3234 def test_iter_by_tag(self): 3235 doc = ET.XML(''' 3236 <document> 3237 <house> 3238 <room>bedroom1</room> 3239 <room>bedroom2</room> 3240 </house> 3241 <shed>nothing here 3242 </shed> 3243 <house> 3244 <room>bedroom8</room> 3245 </house> 3246 </document>''') 3247 3248 self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3) 3249 self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2) 3250 3251 # test that iter also accepts 'tag' as a keyword arg 3252 self.assertEqual( 3253 summarize_list(doc.iter(tag='room')), 3254 ['room'] * 3) 3255 3256 # make sure both tag=None and tag='*' return all tags 3257 all_tags = ['document', 'house', 'room', 'room', 3258 'shed', 'house', 'room'] 3259 self.assertEqual(summarize_list(doc.iter()), all_tags) 3260 self.assertEqual(self._ilist(doc), all_tags) 3261 self.assertEqual(self._ilist(doc, '*'), all_tags) 3262 3263 def test_copy(self): 3264 a = ET.Element('a') 3265 it = a.iter() 3266 with self.assertRaises(TypeError): 3267 copy.copy(it) 3268 3269 def test_pickle(self): 3270 a = ET.Element('a') 3271 it = a.iter() 3272 for proto in range(pickle.HIGHEST_PROTOCOL + 1): 3273 with self.assertRaises((TypeError, pickle.PicklingError)): 3274 pickle.dumps(it, proto) 3275 3276 3277class TreeBuilderTest(unittest.TestCase): 3278 sample1 = ('<!DOCTYPE html PUBLIC' 3279 ' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3280 ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3281 '<html>text<div>subtext</div>tail</html>') 3282 3283 sample2 = '''<toplevel>sometext</toplevel>''' 3284 3285 def _check_sample1_element(self, e): 3286 self.assertEqual(e.tag, 'html') 3287 self.assertEqual(e.text, 'text') 3288 self.assertEqual(e.tail, None) 3289 self.assertEqual(e.attrib, {}) 3290 children = list(e) 3291 self.assertEqual(len(children), 1) 3292 child = children[0] 3293 self.assertEqual(child.tag, 'div') 3294 self.assertEqual(child.text, 'subtext') 3295 self.assertEqual(child.tail, 'tail') 3296 self.assertEqual(child.attrib, {}) 3297 3298 def test_dummy_builder(self): 3299 class BaseDummyBuilder: 3300 def close(self): 3301 return 42 3302 3303 class DummyBuilder(BaseDummyBuilder): 3304 data = start = end = lambda *a: None 3305 3306 parser = ET.XMLParser(target=DummyBuilder()) 3307 parser.feed(self.sample1) 3308 self.assertEqual(parser.close(), 42) 3309 3310 parser = ET.XMLParser(target=BaseDummyBuilder()) 3311 parser.feed(self.sample1) 3312 self.assertEqual(parser.close(), 42) 3313 3314 parser = ET.XMLParser(target=object()) 3315 parser.feed(self.sample1) 3316 self.assertIsNone(parser.close()) 3317 3318 def test_treebuilder_comment(self): 3319 b = ET.TreeBuilder() 3320 self.assertEqual(b.comment('ctext').tag, ET.Comment) 3321 self.assertEqual(b.comment('ctext').text, 'ctext') 3322 3323 b = ET.TreeBuilder(comment_factory=ET.Comment) 3324 self.assertEqual(b.comment('ctext').tag, ET.Comment) 3325 self.assertEqual(b.comment('ctext').text, 'ctext') 3326 3327 b = ET.TreeBuilder(comment_factory=len) 3328 self.assertEqual(b.comment('ctext'), len('ctext')) 3329 3330 def test_treebuilder_pi(self): 3331 b = ET.TreeBuilder() 3332 self.assertEqual(b.pi('target', None).tag, ET.PI) 3333 self.assertEqual(b.pi('target', None).text, 'target') 3334 3335 b = ET.TreeBuilder(pi_factory=ET.PI) 3336 self.assertEqual(b.pi('target').tag, ET.PI) 3337 self.assertEqual(b.pi('target').text, "target") 3338 self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI) 3339 self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget text ") 3340 3341 b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text)) 3342 self.assertEqual(b.pi('target'), (len('target'), None)) 3343 self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text ')) 3344 3345 def test_late_tail(self): 3346 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 3347 class TreeBuilderSubclass(ET.TreeBuilder): 3348 pass 3349 3350 xml = "<a>text<!-- comment -->tail</a>" 3351 a = ET.fromstring(xml) 3352 self.assertEqual(a.text, "texttail") 3353 3354 parser = ET.XMLParser(target=TreeBuilderSubclass()) 3355 parser.feed(xml) 3356 a = parser.close() 3357 self.assertEqual(a.text, "texttail") 3358 3359 xml = "<a>text<?pi data?>tail</a>" 3360 a = ET.fromstring(xml) 3361 self.assertEqual(a.text, "texttail") 3362 3363 xml = "<a>text<?pi data?>tail</a>" 3364 parser = ET.XMLParser(target=TreeBuilderSubclass()) 3365 parser.feed(xml) 3366 a = parser.close() 3367 self.assertEqual(a.text, "texttail") 3368 3369 def test_late_tail_mix_pi_comments(self): 3370 # Issue #37399: The tail of an ignored comment could overwrite the text before it. 3371 # Test appending tails to comments/pis. 3372 class TreeBuilderSubclass(ET.TreeBuilder): 3373 pass 3374 3375 xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>" 3376 parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True)) 3377 parser.feed(xml) 3378 a = parser.close() 3379 self.assertEqual(a[0].text, ' comment ') 3380 self.assertEqual(a[0].tail, '\ntail') 3381 self.assertEqual(a.text, "text ") 3382 3383 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True)) 3384 parser.feed(xml) 3385 a = parser.close() 3386 self.assertEqual(a[0].text, ' comment ') 3387 self.assertEqual(a[0].tail, '\ntail') 3388 self.assertEqual(a.text, "text ") 3389 3390 xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>" 3391 parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True)) 3392 parser.feed(xml) 3393 a = parser.close() 3394 self.assertEqual(a[0].text, 'pi data') 3395 self.assertEqual(a[0].tail, 'tail') 3396 self.assertEqual(a.text, "text\n") 3397 3398 parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True)) 3399 parser.feed(xml) 3400 a = parser.close() 3401 self.assertEqual(a[0].text, 'pi data') 3402 self.assertEqual(a[0].tail, 'tail') 3403 self.assertEqual(a.text, "text\n") 3404 3405 def test_treebuilder_elementfactory_none(self): 3406 parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None)) 3407 parser.feed(self.sample1) 3408 e = parser.close() 3409 self._check_sample1_element(e) 3410 3411 def test_subclass(self): 3412 class MyTreeBuilder(ET.TreeBuilder): 3413 def foobar(self, x): 3414 return x * 2 3415 3416 tb = MyTreeBuilder() 3417 self.assertEqual(tb.foobar(10), 20) 3418 3419 parser = ET.XMLParser(target=tb) 3420 parser.feed(self.sample1) 3421 3422 e = parser.close() 3423 self._check_sample1_element(e) 3424 3425 def test_subclass_comment_pi(self): 3426 class MyTreeBuilder(ET.TreeBuilder): 3427 def foobar(self, x): 3428 return x * 2 3429 3430 tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI) 3431 self.assertEqual(tb.foobar(10), 20) 3432 3433 parser = ET.XMLParser(target=tb) 3434 parser.feed(self.sample1) 3435 parser.feed('<!-- a comment--><?and a pi?>') 3436 3437 e = parser.close() 3438 self._check_sample1_element(e) 3439 3440 def test_element_factory(self): 3441 lst = [] 3442 def myfactory(tag, attrib): 3443 nonlocal lst 3444 lst.append(tag) 3445 return ET.Element(tag, attrib) 3446 3447 tb = ET.TreeBuilder(element_factory=myfactory) 3448 parser = ET.XMLParser(target=tb) 3449 parser.feed(self.sample2) 3450 parser.close() 3451 3452 self.assertEqual(lst, ['toplevel']) 3453 3454 def _check_element_factory_class(self, cls): 3455 tb = ET.TreeBuilder(element_factory=cls) 3456 3457 parser = ET.XMLParser(target=tb) 3458 parser.feed(self.sample1) 3459 e = parser.close() 3460 self.assertIsInstance(e, cls) 3461 self._check_sample1_element(e) 3462 3463 def test_element_factory_subclass(self): 3464 class MyElement(ET.Element): 3465 pass 3466 self._check_element_factory_class(MyElement) 3467 3468 def test_element_factory_pure_python_subclass(self): 3469 # Mimic SimpleTAL's behaviour (issue #16089): both versions of 3470 # TreeBuilder should be able to cope with a subclass of the 3471 # pure Python Element class. 3472 base = ET._Element_Py 3473 # Not from a C extension 3474 self.assertEqual(base.__module__, 'xml.etree.ElementTree') 3475 # Force some multiple inheritance with a C class to make things 3476 # more interesting. 3477 class MyElement(base, ValueError): 3478 pass 3479 self._check_element_factory_class(MyElement) 3480 3481 def test_doctype(self): 3482 class DoctypeParser: 3483 _doctype = None 3484 3485 def doctype(self, name, pubid, system): 3486 self._doctype = (name, pubid, system) 3487 3488 def close(self): 3489 return self._doctype 3490 3491 parser = ET.XMLParser(target=DoctypeParser()) 3492 parser.feed(self.sample1) 3493 3494 self.assertEqual(parser.close(), 3495 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3496 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3497 3498 def test_builder_lookup_errors(self): 3499 class RaisingBuilder: 3500 def __init__(self, raise_in=None, what=ValueError): 3501 self.raise_in = raise_in 3502 self.what = what 3503 3504 def __getattr__(self, name): 3505 if name == self.raise_in: 3506 raise self.what(self.raise_in) 3507 def handle(*args): 3508 pass 3509 return handle 3510 3511 ET.XMLParser(target=RaisingBuilder()) 3512 # cET also checks for 'close' and 'doctype', PyET does it only at need 3513 for event in ('start', 'data', 'end', 'comment', 'pi'): 3514 with self.assertRaisesRegex(ValueError, event): 3515 ET.XMLParser(target=RaisingBuilder(event)) 3516 3517 ET.XMLParser(target=RaisingBuilder(what=AttributeError)) 3518 for event in ('start', 'data', 'end', 'comment', 'pi'): 3519 parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError)) 3520 parser.feed(self.sample1) 3521 self.assertIsNone(parser.close()) 3522 3523 3524class XMLParserTest(unittest.TestCase): 3525 sample1 = b'<file><line>22</line></file>' 3526 sample2 = (b'<!DOCTYPE html PUBLIC' 3527 b' "-//W3C//DTD XHTML 1.0 Transitional//EN"' 3528 b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">' 3529 b'<html>text</html>') 3530 sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n' 3531 '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>') 3532 3533 def _check_sample_element(self, e): 3534 self.assertEqual(e.tag, 'file') 3535 self.assertEqual(e[0].tag, 'line') 3536 self.assertEqual(e[0].text, '22') 3537 3538 def test_constructor_args(self): 3539 parser2 = ET.XMLParser(encoding='utf-8', 3540 target=ET.TreeBuilder()) 3541 parser2.feed(self.sample1) 3542 self._check_sample_element(parser2.close()) 3543 3544 def test_subclass(self): 3545 class MyParser(ET.XMLParser): 3546 pass 3547 parser = MyParser() 3548 parser.feed(self.sample1) 3549 self._check_sample_element(parser.close()) 3550 3551 def test_doctype_warning(self): 3552 with warnings.catch_warnings(): 3553 warnings.simplefilter('error', DeprecationWarning) 3554 parser = ET.XMLParser() 3555 parser.feed(self.sample2) 3556 parser.close() 3557 3558 def test_subclass_doctype(self): 3559 _doctype = None 3560 class MyParserWithDoctype(ET.XMLParser): 3561 def doctype(self, *args, **kwargs): 3562 nonlocal _doctype 3563 _doctype = (args, kwargs) 3564 3565 parser = MyParserWithDoctype() 3566 with self.assertWarnsRegex(RuntimeWarning, 'doctype'): 3567 parser.feed(self.sample2) 3568 parser.close() 3569 self.assertIsNone(_doctype) 3570 3571 _doctype = _doctype2 = None 3572 with warnings.catch_warnings(): 3573 warnings.simplefilter('error', DeprecationWarning) 3574 warnings.simplefilter('error', RuntimeWarning) 3575 class DoctypeParser: 3576 def doctype(self, name, pubid, system): 3577 nonlocal _doctype2 3578 _doctype2 = (name, pubid, system) 3579 3580 parser = MyParserWithDoctype(target=DoctypeParser()) 3581 parser.feed(self.sample2) 3582 parser.close() 3583 self.assertIsNone(_doctype) 3584 self.assertEqual(_doctype2, 3585 ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 3586 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) 3587 3588 def test_inherited_doctype(self): 3589 '''Ensure that ordinary usage is not deprecated (Issue 19176)''' 3590 with warnings.catch_warnings(): 3591 warnings.simplefilter('error', DeprecationWarning) 3592 warnings.simplefilter('error', RuntimeWarning) 3593 class MyParserWithoutDoctype(ET.XMLParser): 3594 pass 3595 parser = MyParserWithoutDoctype() 3596 parser.feed(self.sample2) 3597 parser.close() 3598 3599 def test_parse_string(self): 3600 parser = ET.XMLParser(target=ET.TreeBuilder()) 3601 parser.feed(self.sample3) 3602 e = parser.close() 3603 self.assertEqual(e.tag, 'money') 3604 self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b') 3605 self.assertEqual(e.text, '$\xa3\u20ac\U0001017b') 3606 3607 3608class NamespaceParseTest(unittest.TestCase): 3609 def test_find_with_namespace(self): 3610 nsmap = {'h': 'hello', 'f': 'foo'} 3611 doc = ET.fromstring(SAMPLE_XML_NS_ELEMS) 3612 3613 self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1) 3614 self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2) 3615 self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1) 3616 3617 3618class ElementSlicingTest(unittest.TestCase): 3619 def _elem_tags(self, elemlist): 3620 return [e.tag for e in elemlist] 3621 3622 def _subelem_tags(self, elem): 3623 return self._elem_tags(list(elem)) 3624 3625 def _make_elem_with_children(self, numchildren): 3626 """Create an Element with a tag 'a', with the given amount of children 3627 named 'a0', 'a1' ... and so on. 3628 3629 """ 3630 e = ET.Element('a') 3631 for i in range(numchildren): 3632 ET.SubElement(e, 'a%s' % i) 3633 return e 3634 3635 def test_getslice_single_index(self): 3636 e = self._make_elem_with_children(10) 3637 3638 self.assertEqual(e[1].tag, 'a1') 3639 self.assertEqual(e[-2].tag, 'a8') 3640 3641 self.assertRaises(IndexError, lambda: e[12]) 3642 self.assertRaises(IndexError, lambda: e[-12]) 3643 3644 def test_getslice_range(self): 3645 e = self._make_elem_with_children(6) 3646 3647 self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5']) 3648 self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5']) 3649 self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5']) 3650 self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4']) 3651 self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4']) 3652 self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1']) 3653 3654 def test_getslice_steps(self): 3655 e = self._make_elem_with_children(10) 3656 3657 self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9']) 3658 self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9']) 3659 self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8']) 3660 self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9']) 3661 self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3']) 3662 self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3']) 3663 3664 def test_getslice_negative_steps(self): 3665 e = self._make_elem_with_children(4) 3666 3667 self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0']) 3668 self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1']) 3669 self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3']) 3670 self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3']) 3671 self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3']) 3672 3673 def test_delslice(self): 3674 e = self._make_elem_with_children(4) 3675 del e[0:2] 3676 self.assertEqual(self._subelem_tags(e), ['a2', 'a3']) 3677 3678 e = self._make_elem_with_children(4) 3679 del e[0:] 3680 self.assertEqual(self._subelem_tags(e), []) 3681 3682 e = self._make_elem_with_children(4) 3683 del e[::-1] 3684 self.assertEqual(self._subelem_tags(e), []) 3685 3686 e = self._make_elem_with_children(4) 3687 del e[::-2] 3688 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3689 3690 e = self._make_elem_with_children(4) 3691 del e[1::2] 3692 self.assertEqual(self._subelem_tags(e), ['a0', 'a2']) 3693 3694 e = self._make_elem_with_children(2) 3695 del e[::2] 3696 self.assertEqual(self._subelem_tags(e), ['a1']) 3697 3698 def test_setslice_single_index(self): 3699 e = self._make_elem_with_children(4) 3700 e[1] = ET.Element('b') 3701 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3702 3703 e[-2] = ET.Element('c') 3704 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3705 3706 with self.assertRaises(IndexError): 3707 e[5] = ET.Element('d') 3708 with self.assertRaises(IndexError): 3709 e[-5] = ET.Element('d') 3710 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3']) 3711 3712 def test_setslice_range(self): 3713 e = self._make_elem_with_children(4) 3714 e[1:3] = [ET.Element('b%s' % i) for i in range(2)] 3715 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3']) 3716 3717 e = self._make_elem_with_children(4) 3718 e[1:3] = [ET.Element('b')] 3719 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3']) 3720 3721 e = self._make_elem_with_children(4) 3722 e[1:3] = [ET.Element('b%s' % i) for i in range(3)] 3723 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3']) 3724 3725 def test_setslice_steps(self): 3726 e = self._make_elem_with_children(6) 3727 e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)] 3728 self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5']) 3729 3730 e = self._make_elem_with_children(6) 3731 with self.assertRaises(ValueError): 3732 e[1:5:2] = [ET.Element('b')] 3733 with self.assertRaises(ValueError): 3734 e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)] 3735 with self.assertRaises(ValueError): 3736 e[1:5:2] = [] 3737 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5']) 3738 3739 e = self._make_elem_with_children(4) 3740 e[1::sys.maxsize] = [ET.Element('b')] 3741 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3742 e[1::sys.maxsize<<64] = [ET.Element('c')] 3743 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3744 3745 def test_setslice_negative_steps(self): 3746 e = self._make_elem_with_children(4) 3747 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)] 3748 self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3']) 3749 3750 e = self._make_elem_with_children(4) 3751 with self.assertRaises(ValueError): 3752 e[2:0:-1] = [ET.Element('b')] 3753 with self.assertRaises(ValueError): 3754 e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)] 3755 with self.assertRaises(ValueError): 3756 e[2:0:-1] = [] 3757 self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3']) 3758 3759 e = self._make_elem_with_children(4) 3760 e[1::-sys.maxsize] = [ET.Element('b')] 3761 self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3']) 3762 e[1::-sys.maxsize-1] = [ET.Element('c')] 3763 self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3']) 3764 e[1::-sys.maxsize<<64] = [ET.Element('d')] 3765 self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3']) 3766 3767 def test_issue123213_setslice_exception(self): 3768 e = ET.Element('tag') 3769 # Does not hide the internal exception when assigning to the element 3770 with self.assertRaises(ZeroDivisionError): 3771 e[:1] = (1/0 for i in range(2)) 3772 3773 # Still raises the TypeError when assigning with a non-iterable 3774 with self.assertRaises(TypeError): 3775 e[:1] = None 3776 3777 # Preserve the original TypeError message when assigning. 3778 def f(): 3779 raise TypeError("mymessage") 3780 3781 with self.assertRaisesRegex(TypeError, 'mymessage'): 3782 e[:1] = (f() for i in range(2)) 3783 3784class IOTest(unittest.TestCase): 3785 def test_encoding(self): 3786 # Test encoding issues. 3787 elem = ET.Element("tag") 3788 elem.text = "abc" 3789 self.assertEqual(serialize(elem), '<tag>abc</tag>') 3790 for enc in ("utf-8", "us-ascii"): 3791 with self.subTest(enc): 3792 self.assertEqual(serialize(elem, encoding=enc), 3793 b'<tag>abc</tag>') 3794 self.assertEqual(serialize(elem, encoding=enc.upper()), 3795 b'<tag>abc</tag>') 3796 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3797 with self.subTest(enc): 3798 self.assertEqual(serialize(elem, encoding=enc), 3799 ("<?xml version='1.0' encoding='%s'?>\n" 3800 "<tag>abc</tag>" % enc).encode(enc)) 3801 upper = enc.upper() 3802 self.assertEqual(serialize(elem, encoding=upper), 3803 ("<?xml version='1.0' encoding='%s'?>\n" 3804 "<tag>abc</tag>" % upper).encode(enc)) 3805 3806 elem = ET.Element("tag") 3807 elem.text = "<&\"\'>" 3808 self.assertEqual(serialize(elem), '<tag><&"\'></tag>') 3809 self.assertEqual(serialize(elem, encoding="utf-8"), 3810 b'<tag><&"\'></tag>') 3811 self.assertEqual(serialize(elem, encoding="us-ascii"), 3812 b'<tag><&"\'></tag>') 3813 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3814 self.assertEqual(serialize(elem, encoding=enc), 3815 ("<?xml version='1.0' encoding='%s'?>\n" 3816 "<tag><&\"'></tag>" % enc).encode(enc)) 3817 3818 elem = ET.Element("tag") 3819 elem.attrib["key"] = "<&\"\'>" 3820 self.assertEqual(serialize(elem), '<tag key="<&"\'>" />') 3821 self.assertEqual(serialize(elem, encoding="utf-8"), 3822 b'<tag key="<&"\'>" />') 3823 self.assertEqual(serialize(elem, encoding="us-ascii"), 3824 b'<tag key="<&"\'>" />') 3825 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3826 self.assertEqual(serialize(elem, encoding=enc), 3827 ("<?xml version='1.0' encoding='%s'?>\n" 3828 "<tag key=\"<&"'>\" />" % enc).encode(enc)) 3829 3830 elem = ET.Element("tag") 3831 elem.text = '\xe5\xf6\xf6<>' 3832 self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6<></tag>') 3833 self.assertEqual(serialize(elem, encoding="utf-8"), 3834 b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6<></tag>') 3835 self.assertEqual(serialize(elem, encoding="us-ascii"), 3836 b'<tag>åöö<></tag>') 3837 for enc in ("iso-8859-1", "utf-16", "utf-32"): 3838 self.assertEqual(serialize(elem, encoding=enc), 3839 ("<?xml version='1.0' encoding='%s'?>\n" 3840 "<tag>åöö<></tag>" % enc).encode(enc)) 3841 3842 elem = ET.Element("tag") 3843 elem.attrib["key"] = '\xe5\xf6\xf6<>' 3844 self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6<>" />') 3845 self.assertEqual(serialize(elem, encoding="utf-8"), 3846 b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6<>" />') 3847 self.assertEqual(serialize(elem, encoding="us-ascii"), 3848 b'<tag key="åöö<>" />') 3849 for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"): 3850 self.assertEqual(serialize(elem, encoding=enc), 3851 ("<?xml version='1.0' encoding='%s'?>\n" 3852 "<tag key=\"åöö<>\" />" % enc).encode(enc)) 3853 3854 def test_write_to_filename(self): 3855 self.addCleanup(os_helper.unlink, TESTFN) 3856 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3857 tree.write(TESTFN) 3858 with open(TESTFN, 'rb') as f: 3859 self.assertEqual(f.read(), b'''<site>ø</site>''') 3860 3861 def test_write_to_filename_with_encoding(self): 3862 self.addCleanup(os_helper.unlink, TESTFN) 3863 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3864 tree.write(TESTFN, encoding='utf-8') 3865 with open(TESTFN, 'rb') as f: 3866 self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') 3867 3868 tree.write(TESTFN, encoding='ISO-8859-1') 3869 with open(TESTFN, 'rb') as f: 3870 self.assertEqual(f.read(), convlinesep( 3871 b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n''' 3872 b'''<site>\xf8</site>''')) 3873 3874 def test_write_to_filename_as_unicode(self): 3875 self.addCleanup(os_helper.unlink, TESTFN) 3876 with open(TESTFN, 'w') as f: 3877 encoding = f.encoding 3878 os_helper.unlink(TESTFN) 3879 3880 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3881 tree.write(TESTFN, encoding='unicode') 3882 with open(TESTFN, 'rb') as f: 3883 self.assertEqual(f.read(), b"<site>\xc3\xb8</site>") 3884 3885 def test_write_to_text_file(self): 3886 self.addCleanup(os_helper.unlink, TESTFN) 3887 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3888 with open(TESTFN, 'w', encoding='utf-8') as f: 3889 tree.write(f, encoding='unicode') 3890 self.assertFalse(f.closed) 3891 with open(TESTFN, 'rb') as f: 3892 self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') 3893 3894 with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f: 3895 tree.write(f, encoding='unicode') 3896 self.assertFalse(f.closed) 3897 with open(TESTFN, 'rb') as f: 3898 self.assertEqual(f.read(), b'''<site>ø</site>''') 3899 3900 with open(TESTFN, 'w', encoding='ISO-8859-1') as f: 3901 tree.write(f, encoding='unicode') 3902 self.assertFalse(f.closed) 3903 with open(TESTFN, 'rb') as f: 3904 self.assertEqual(f.read(), b'''<site>\xf8</site>''') 3905 3906 def test_write_to_binary_file(self): 3907 self.addCleanup(os_helper.unlink, TESTFN) 3908 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3909 with open(TESTFN, 'wb') as f: 3910 tree.write(f) 3911 self.assertFalse(f.closed) 3912 with open(TESTFN, 'rb') as f: 3913 self.assertEqual(f.read(), b'''<site>ø</site>''') 3914 3915 def test_write_to_binary_file_with_encoding(self): 3916 self.addCleanup(os_helper.unlink, TESTFN) 3917 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3918 with open(TESTFN, 'wb') as f: 3919 tree.write(f, encoding='utf-8') 3920 self.assertFalse(f.closed) 3921 with open(TESTFN, 'rb') as f: 3922 self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''') 3923 3924 with open(TESTFN, 'wb') as f: 3925 tree.write(f, encoding='ISO-8859-1') 3926 self.assertFalse(f.closed) 3927 with open(TESTFN, 'rb') as f: 3928 self.assertEqual(f.read(), 3929 b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n''' 3930 b'''<site>\xf8</site>''') 3931 3932 def test_write_to_binary_file_with_bom(self): 3933 self.addCleanup(os_helper.unlink, TESTFN) 3934 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3935 # test BOM writing to buffered file 3936 with open(TESTFN, 'wb') as f: 3937 tree.write(f, encoding='utf-16') 3938 self.assertFalse(f.closed) 3939 with open(TESTFN, 'rb') as f: 3940 self.assertEqual(f.read(), 3941 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3942 '''<site>\xf8</site>'''.encode("utf-16")) 3943 # test BOM writing to non-buffered file 3944 with open(TESTFN, 'wb', buffering=0) as f: 3945 tree.write(f, encoding='utf-16') 3946 self.assertFalse(f.closed) 3947 with open(TESTFN, 'rb') as f: 3948 self.assertEqual(f.read(), 3949 '''<?xml version='1.0' encoding='utf-16'?>\n''' 3950 '''<site>\xf8</site>'''.encode("utf-16")) 3951 3952 def test_read_from_stringio(self): 3953 tree = ET.ElementTree() 3954 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3955 tree.parse(stream) 3956 self.assertEqual(tree.getroot().tag, 'site') 3957 3958 def test_write_to_stringio(self): 3959 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3960 stream = io.StringIO() 3961 tree.write(stream, encoding='unicode') 3962 self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''') 3963 3964 def test_read_from_bytesio(self): 3965 tree = ET.ElementTree() 3966 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3967 tree.parse(raw) 3968 self.assertEqual(tree.getroot().tag, 'site') 3969 3970 def test_write_to_bytesio(self): 3971 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3972 raw = io.BytesIO() 3973 tree.write(raw) 3974 self.assertEqual(raw.getvalue(), b'''<site>ø</site>''') 3975 3976 class dummy: 3977 pass 3978 3979 def test_read_from_user_text_reader(self): 3980 stream = io.StringIO('''<?xml version="1.0"?><site></site>''') 3981 reader = self.dummy() 3982 reader.read = stream.read 3983 tree = ET.ElementTree() 3984 tree.parse(reader) 3985 self.assertEqual(tree.getroot().tag, 'site') 3986 3987 def test_write_to_user_text_writer(self): 3988 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 3989 stream = io.StringIO() 3990 writer = self.dummy() 3991 writer.write = stream.write 3992 tree.write(writer, encoding='unicode') 3993 self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''') 3994 3995 def test_read_from_user_binary_reader(self): 3996 raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''') 3997 reader = self.dummy() 3998 reader.read = raw.read 3999 tree = ET.ElementTree() 4000 tree.parse(reader) 4001 self.assertEqual(tree.getroot().tag, 'site') 4002 tree = ET.ElementTree() 4003 4004 def test_write_to_user_binary_writer(self): 4005 tree = ET.ElementTree(ET.XML('''<site>\xf8</site>''')) 4006 raw = io.BytesIO() 4007 writer = self.dummy() 4008 writer.write = raw.write 4009 tree.write(writer) 4010 self.assertEqual(raw.getvalue(), b'''<site>ø</site>''') 4011 4012 def test_write_to_user_binary_writer_with_bom(self): 4013 tree = ET.ElementTree(ET.XML('''<site />''')) 4014 raw = io.BytesIO() 4015 writer = self.dummy() 4016 writer.write = raw.write 4017 writer.seekable = lambda: True 4018 writer.tell = raw.tell 4019 tree.write(writer, encoding="utf-16") 4020 self.assertEqual(raw.getvalue(), 4021 '''<?xml version='1.0' encoding='utf-16'?>\n''' 4022 '''<site />'''.encode("utf-16")) 4023 4024 def test_tostringlist_invariant(self): 4025 root = ET.fromstring('<tag>foo</tag>') 4026 self.assertEqual( 4027 ET.tostring(root, 'unicode'), 4028 ''.join(ET.tostringlist(root, 'unicode'))) 4029 self.assertEqual( 4030 ET.tostring(root, 'utf-16'), 4031 b''.join(ET.tostringlist(root, 'utf-16'))) 4032 4033 def test_short_empty_elements(self): 4034 root = ET.fromstring('<tag>a<x />b<y></y>c</tag>') 4035 self.assertEqual( 4036 ET.tostring(root, 'unicode'), 4037 '<tag>a<x />b<y />c</tag>') 4038 self.assertEqual( 4039 ET.tostring(root, 'unicode', short_empty_elements=True), 4040 '<tag>a<x />b<y />c</tag>') 4041 self.assertEqual( 4042 ET.tostring(root, 'unicode', short_empty_elements=False), 4043 '<tag>a<x></x>b<y></y>c</tag>') 4044 4045 4046class ParseErrorTest(unittest.TestCase): 4047 def test_subclass(self): 4048 self.assertIsInstance(ET.ParseError(), SyntaxError) 4049 4050 def _get_error(self, s): 4051 try: 4052 ET.fromstring(s) 4053 except ET.ParseError as e: 4054 return e 4055 4056 def test_error_position(self): 4057 self.assertEqual(self._get_error('foo').position, (1, 0)) 4058 self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5)) 4059 self.assertEqual(self._get_error('foobar<').position, (1, 6)) 4060 4061 def test_error_code(self): 4062 import xml.parsers.expat.errors as ERRORS 4063 self.assertEqual(self._get_error('foo').code, 4064 ERRORS.codes[ERRORS.XML_ERROR_SYNTAX]) 4065 4066 4067class KeywordArgsTest(unittest.TestCase): 4068 # Test various issues with keyword arguments passed to ET.Element 4069 # constructor and methods 4070 def test_issue14818(self): 4071 x = ET.XML("<a>foo</a>") 4072 self.assertEqual(x.find('a', None), 4073 x.find(path='a', namespaces=None)) 4074 self.assertEqual(x.findtext('a', None, None), 4075 x.findtext(path='a', default=None, namespaces=None)) 4076 self.assertEqual(x.findall('a', None), 4077 x.findall(path='a', namespaces=None)) 4078 self.assertEqual(list(x.iterfind('a', None)), 4079 list(x.iterfind(path='a', namespaces=None))) 4080 4081 self.assertEqual(ET.Element('a').attrib, {}) 4082 elements = [ 4083 ET.Element('a', dict(href="#", id="foo")), 4084 ET.Element('a', attrib=dict(href="#", id="foo")), 4085 ET.Element('a', dict(href="#"), id="foo"), 4086 ET.Element('a', href="#", id="foo"), 4087 ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"), 4088 ] 4089 for e in elements: 4090 self.assertEqual(e.tag, 'a') 4091 self.assertEqual(e.attrib, dict(href="#", id="foo")) 4092 4093 e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'}) 4094 self.assertEqual(e2.attrib['key1'], 'value1') 4095 4096 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 4097 ET.Element('a', "I'm not a dict") 4098 with self.assertRaisesRegex(TypeError, 'must be dict, not str'): 4099 ET.Element('a', attrib="I'm not a dict") 4100 4101# -------------------------------------------------------------------- 4102 4103class NoAcceleratorTest(unittest.TestCase): 4104 @classmethod 4105 def setUpClass(cls): 4106 if ET is not pyET: 4107 raise unittest.SkipTest('only for the Python version') 4108 4109 # Test that the C accelerator was not imported for pyET 4110 def test_correct_import_pyET(self): 4111 # The type of methods defined in Python code is types.FunctionType, 4112 # while the type of methods defined inside _elementtree is 4113 # <class 'wrapper_descriptor'> 4114 self.assertIsInstance(pyET.Element.__init__, types.FunctionType) 4115 self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType) 4116 4117# -------------------------------------------------------------------- 4118 4119class BoolTest(unittest.TestCase): 4120 def test_warning(self): 4121 e = ET.fromstring('<a style="new"></a>') 4122 msg = ( 4123 r"Testing an element's truth value will always return True in " 4124 r"future versions. " 4125 r"Use specific 'len\(elem\)' or 'elem is not None' test instead.") 4126 with self.assertWarnsRegex(DeprecationWarning, msg): 4127 result = bool(e) 4128 # Emulate prior behavior for now 4129 self.assertIs(result, False) 4130 4131 # Element with children 4132 ET.SubElement(e, 'b') 4133 with self.assertWarnsRegex(DeprecationWarning, msg): 4134 new_result = bool(e) 4135 self.assertIs(new_result, True) 4136 4137# -------------------------------------------------------------------- 4138 4139def c14n_roundtrip(xml, **options): 4140 return pyET.canonicalize(xml, **options) 4141 4142 4143class C14NTest(unittest.TestCase): 4144 maxDiff = None 4145 4146 # 4147 # simple roundtrip tests (from c14n.py) 4148 4149 def test_simple_roundtrip(self): 4150 # Basics 4151 self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>') 4152 self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME 4153 '<doc xmlns="uri"></doc>') 4154 self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"), 4155 '<prefix:doc xmlns:prefix="uri"></prefix:doc>') 4156 self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"), 4157 '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>') 4158 self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"), 4159 '<elem></elem>') 4160 4161 # C14N spec 4162 self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"), 4163 '<doc>Hello, world!</doc>') 4164 self.assertEqual(c14n_roundtrip("<value>2</value>"), 4165 '<value>2</value>') 4166 self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'), 4167 '<compute>value>"0" && value<"10" ?"valid":"error"</compute>') 4168 self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" && value<"10" ?"valid":"error"'>valid</compute>'''), 4169 '<compute expr="value>"0" && value<"10" ?"valid":"error"">valid</compute>') 4170 self.assertEqual(c14n_roundtrip("<norm attr=' '   
	 ' '/>"), 4171 '<norm attr=" \' 
	 \' "></norm>') 4172 self.assertEqual(c14n_roundtrip("<normNames attr=' A   
	 B '/>"), 4173 '<normNames attr=" A 
	 B "></normNames>') 4174 self.assertEqual(c14n_roundtrip("<normId id=' '   
	 ' '/>"), 4175 '<normId id=" \' 
	 \' "></normId>') 4176 4177 # fragments from PJ's tests 4178 #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"), 4179 #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>') 4180 4181 # Namespace issues 4182 xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>' 4183 self.assertEqual(c14n_roundtrip(xml), xml) 4184 xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>' 4185 self.assertEqual(c14n_roundtrip(xml), xml) 4186 xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>' 4187 self.assertEqual(c14n_roundtrip(xml), xml) 4188 4189 def test_c14n_exclusion(self): 4190 xml = textwrap.dedent("""\ 4191 <root xmlns:x="http://example.com/x"> 4192 <a x:attr="attrx"> 4193 <b>abtext</b> 4194 </a> 4195 <b>btext</b> 4196 <c> 4197 <x:d>dtext</x:d> 4198 </c> 4199 </root> 4200 """) 4201 self.assertEqual( 4202 c14n_roundtrip(xml, strip_text=True), 4203 '<root>' 4204 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 4205 '<b>btext</b>' 4206 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4207 '</root>') 4208 self.assertEqual( 4209 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']), 4210 '<root>' 4211 '<a><b>abtext</b></a>' 4212 '<b>btext</b>' 4213 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4214 '</root>') 4215 self.assertEqual( 4216 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']), 4217 '<root>' 4218 '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>' 4219 '<b>btext</b>' 4220 '<c></c>' 4221 '</root>') 4222 self.assertEqual( 4223 c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'], 4224 exclude_tags=['{http://example.com/x}d']), 4225 '<root>' 4226 '<a><b>abtext</b></a>' 4227 '<b>btext</b>' 4228 '<c></c>' 4229 '</root>') 4230 self.assertEqual( 4231 c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']), 4232 '<root>' 4233 '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>' 4234 '</root>') 4235 self.assertEqual( 4236 c14n_roundtrip(xml, exclude_tags=['a', 'b']), 4237 '<root>\n' 4238 ' \n' 4239 ' \n' 4240 ' <c>\n' 4241 ' <x:d xmlns:x="http://example.com/x">dtext</x:d>\n' 4242 ' </c>\n' 4243 '</root>') 4244 self.assertEqual( 4245 c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']), 4246 '<root>' 4247 '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>' 4248 '<c></c>' 4249 '</root>') 4250 self.assertEqual( 4251 c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']), 4252 '<root>\n' 4253 ' <a xmlns:x="http://example.com/x" x:attr="attrx">\n' 4254 ' \n' 4255 ' </a>\n' 4256 ' \n' 4257 ' <c>\n' 4258 ' \n' 4259 ' </c>\n' 4260 '</root>') 4261 4262 # 4263 # basic method=c14n tests from the c14n 2.0 specification. uses 4264 # test files under xmltestdata/c14n-20. 4265 4266 # note that this uses generated C14N versions of the standard ET.write 4267 # output, not roundtripped C14N (see above). 4268 4269 def test_xml_c14n2(self): 4270 datadir = findfile("c14n-20", subdir="xmltestdata") 4271 full_path = partial(os.path.join, datadir) 4272 4273 files = [filename[:-4] for filename in sorted(os.listdir(datadir)) 4274 if filename.endswith('.xml')] 4275 input_files = [ 4276 filename for filename in files 4277 if filename.startswith('in') 4278 ] 4279 configs = { 4280 filename: { 4281 # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite> 4282 option.tag.split('}')[-1]: ((option.text or '').strip(), option) 4283 for option in ET.parse(full_path(filename) + ".xml").getroot() 4284 } 4285 for filename in files 4286 if filename.startswith('c14n') 4287 } 4288 4289 tests = { 4290 input_file: [ 4291 (filename, configs[filename.rsplit('_', 1)[-1]]) 4292 for filename in files 4293 if filename.startswith(f'out_{input_file}_') 4294 and filename.rsplit('_', 1)[-1] in configs 4295 ] 4296 for input_file in input_files 4297 } 4298 4299 # Make sure we found all test cases. 4300 self.assertEqual(30, len([ 4301 output_file for output_files in tests.values() 4302 for output_file in output_files])) 4303 4304 def get_option(config, option_name, default=None): 4305 return config.get(option_name, (default, ()))[0] 4306 4307 for input_file, output_files in tests.items(): 4308 for output_file, config in output_files: 4309 keep_comments = get_option( 4310 config, 'IgnoreComments') == 'true' # no, it's right :) 4311 strip_text = get_option( 4312 config, 'TrimTextNodes') == 'true' 4313 rewrite_prefixes = get_option( 4314 config, 'PrefixRewrite') == 'sequential' 4315 if 'QNameAware' in config: 4316 qattrs = [ 4317 f"{{{el.get('NS')}}}{el.get('Name')}" 4318 for el in config['QNameAware'][1].findall( 4319 '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr') 4320 ] 4321 qtags = [ 4322 f"{{{el.get('NS')}}}{el.get('Name')}" 4323 for el in config['QNameAware'][1].findall( 4324 '{http://www.w3.org/2010/xml-c14n2}Element') 4325 ] 4326 else: 4327 qtags = qattrs = None 4328 4329 # Build subtest description from config. 4330 config_descr = ','.join( 4331 f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}" 4332 for name, (value, children) in sorted(config.items()) 4333 ) 4334 4335 with self.subTest(f"{output_file}({config_descr})"): 4336 if input_file == 'inNsRedecl' and not rewrite_prefixes: 4337 self.skipTest( 4338 f"Redeclared namespace handling is not supported in {output_file}") 4339 if input_file == 'inNsSuperfluous' and not rewrite_prefixes: 4340 self.skipTest( 4341 f"Redeclared namespace handling is not supported in {output_file}") 4342 if 'QNameAware' in config and config['QNameAware'][1].find( 4343 '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None: 4344 self.skipTest( 4345 f"QName rewriting in XPath text is not supported in {output_file}") 4346 4347 f = full_path(input_file + ".xml") 4348 if input_file == 'inC14N5': 4349 # Hack: avoid setting up external entity resolution in the parser. 4350 with open(full_path('world.txt'), 'rb') as entity_file: 4351 with open(f, 'rb') as f: 4352 f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read())) 4353 4354 text = ET.canonicalize( 4355 from_file=f, 4356 with_comments=keep_comments, 4357 strip_text=strip_text, 4358 rewrite_prefixes=rewrite_prefixes, 4359 qname_aware_tags=qtags, qname_aware_attrs=qattrs) 4360 4361 with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f: 4362 expected = f.read() 4363 if input_file == 'inC14N3': 4364 # FIXME: cET resolves default attributes but ET does not! 4365 expected = expected.replace(' attr="default"', '') 4366 text = text.replace(' attr="default"', '') 4367 self.assertEqual(expected, text) 4368 4369# -------------------------------------------------------------------- 4370 4371def setUpModule(module=None): 4372 # When invoked without a module, runs the Python ET tests by loading pyET. 4373 # Otherwise, uses the given module as the ET. 4374 global pyET 4375 pyET = import_fresh_module('xml.etree.ElementTree', 4376 blocked=['_elementtree']) 4377 if module is None: 4378 module = pyET 4379 4380 global ET 4381 ET = module 4382 4383 # don't interfere with subsequent tests 4384 def cleanup(): 4385 global ET, pyET 4386 ET = pyET = None 4387 unittest.addModuleCleanup(cleanup) 4388 4389 # Provide default namespace mapping and path cache. 4390 from xml.etree import ElementPath 4391 nsmap = ET.register_namespace._namespace_map 4392 # Copy the default namespace mapping 4393 nsmap_copy = nsmap.copy() 4394 unittest.addModuleCleanup(nsmap.update, nsmap_copy) 4395 unittest.addModuleCleanup(nsmap.clear) 4396 4397 # Copy the path cache (should be empty) 4398 path_cache = ElementPath._cache 4399 unittest.addModuleCleanup(setattr, ElementPath, "_cache", path_cache) 4400 ElementPath._cache = path_cache.copy() 4401 4402 # Align the Comment/PI factories. 4403 if hasattr(ET, '_set_factories'): 4404 old_factories = ET._set_factories(ET.Comment, ET.PI) 4405 unittest.addModuleCleanup(ET._set_factories, *old_factories) 4406 4407 4408if __name__ == '__main__': 4409 unittest.main() 4410