• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
4#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
7
8import copy
9import functools
10import html
11import io
12import itertools
13import locale
14import operator
15import os
16import pickle
17import sys
18import textwrap
19import types
20import unittest
21import warnings
22import weakref
23
24from functools import partial
25from itertools import product, islice
26from test import support
27from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
28
29# pyET is the pure-Python implementation.
30#
31# ET is pyET in test_xml_etree and is the C accelerated version in
32# test_xml_etree_c.
33pyET = None
34ET = None
35
36SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
37try:
38    SIMPLE_XMLFILE.encode("utf-8")
39except UnicodeEncodeError:
40    raise unittest.SkipTest("filename is not encodable to utf8")
41SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
42UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
43
44SAMPLE_XML = """\
45<body>
46  <tag class='a'>text</tag>
47  <tag class='b' />
48  <section>
49    <tag class='b' id='inner'>subtext</tag>
50  </section>
51</body>
52"""
53
54SAMPLE_SECTION = """\
55<section>
56  <tag class='b' id='inner'>subtext</tag>
57  <nexttag />
58  <nextsection>
59    <tag />
60  </nextsection>
61</section>
62"""
63
64SAMPLE_XML_NS = """
65<body xmlns="http://effbot.org/ns">
66  <tag>text</tag>
67  <tag />
68  <section>
69    <tag>subtext</tag>
70  </section>
71</body>
72"""
73
74SAMPLE_XML_NS_ELEMS = """
75<root>
76<h:table xmlns:h="hello">
77  <h:tr>
78    <h:td>Apples</h:td>
79    <h:td>Bananas</h:td>
80  </h:tr>
81</h:table>
82
83<f:table xmlns:f="foo">
84  <f:name>African Coffee Table</f:name>
85  <f:width>80</f:width>
86  <f:length>120</f:length>
87</f:table>
88</root>
89"""
90
91ENTITY_XML = """\
92<!DOCTYPE points [
93<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
94%user-entities;
95]>
96<document>&entity;</document>
97"""
98
99EXTERNAL_ENTITY_XML = """\
100<!DOCTYPE points [
101<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
102]>
103<document>&entity;</document>
104"""
105
106def checkwarnings(*filters, quiet=False):
107    def decorator(test):
108        def newtest(*args, **kwargs):
109            with support.check_warnings(*filters, quiet=quiet):
110                test(*args, **kwargs)
111        functools.update_wrapper(newtest, test)
112        return newtest
113    return decorator
114
115
116class ModuleTest(unittest.TestCase):
117    def test_sanity(self):
118        # Import sanity.
119
120        from xml.etree import ElementTree
121        from xml.etree import ElementInclude
122        from xml.etree import ElementPath
123
124    def test_all(self):
125        names = ("xml.etree.ElementTree", "_elementtree")
126        support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",))
127
128
129def serialize(elem, to_string=True, encoding='unicode', **options):
130    if encoding != 'unicode':
131        file = io.BytesIO()
132    else:
133        file = io.StringIO()
134    tree = ET.ElementTree(elem)
135    tree.write(file, encoding=encoding, **options)
136    if to_string:
137        return file.getvalue()
138    else:
139        file.seek(0)
140        return file
141
142def summarize_list(seq):
143    return [elem.tag for elem in seq]
144
145
146class ElementTestCase:
147    @classmethod
148    def setUpClass(cls):
149        cls.modules = {pyET, ET}
150
151    def pickleRoundTrip(self, obj, name, dumper, loader, proto):
152        save_m = sys.modules[name]
153        try:
154            sys.modules[name] = dumper
155            temp = pickle.dumps(obj, proto)
156            sys.modules[name] = loader
157            result = pickle.loads(temp)
158        except pickle.PicklingError as pe:
159            # pyET must be second, because pyET may be (equal to) ET.
160            human = dict([(ET, "cET"), (pyET, "pyET")])
161            raise support.TestFailed("Failed to round-trip %r from %r to %r"
162                                     % (obj,
163                                        human.get(dumper, dumper),
164                                        human.get(loader, loader))) from pe
165        finally:
166            sys.modules[name] = save_m
167        return result
168
169    def assertEqualElements(self, alice, bob):
170        self.assertIsInstance(alice, (ET.Element, pyET.Element))
171        self.assertIsInstance(bob, (ET.Element, pyET.Element))
172        self.assertEqual(len(list(alice)), len(list(bob)))
173        for x, y in zip(alice, bob):
174            self.assertEqualElements(x, y)
175        properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
176        self.assertEqual(properties(alice), properties(bob))
177
178# --------------------------------------------------------------------
179# element tree tests
180
181class ElementTreeTest(unittest.TestCase):
182
183    def serialize_check(self, elem, expected):
184        self.assertEqual(serialize(elem), expected)
185
186    def test_interface(self):
187        # Test element tree interface.
188
189        def check_string(string):
190            len(string)
191            for char in string:
192                self.assertEqual(len(char), 1,
193                        msg="expected one-character string, got %r" % char)
194            new_string = string + ""
195            new_string = string + " "
196            string[:0]
197
198        def check_mapping(mapping):
199            len(mapping)
200            keys = mapping.keys()
201            items = mapping.items()
202            for key in keys:
203                item = mapping[key]
204            mapping["key"] = "value"
205            self.assertEqual(mapping["key"], "value",
206                    msg="expected value string, got %r" % mapping["key"])
207
208        def check_element(element):
209            self.assertTrue(ET.iselement(element), msg="not an element")
210            direlem = dir(element)
211            for attr in 'tag', 'attrib', 'text', 'tail':
212                self.assertTrue(hasattr(element, attr),
213                        msg='no %s member' % attr)
214                self.assertIn(attr, direlem,
215                        msg='no %s visible by dir' % attr)
216
217            check_string(element.tag)
218            check_mapping(element.attrib)
219            if element.text is not None:
220                check_string(element.text)
221            if element.tail is not None:
222                check_string(element.tail)
223            for elem in element:
224                check_element(elem)
225
226        element = ET.Element("tag")
227        check_element(element)
228        tree = ET.ElementTree(element)
229        check_element(tree.getroot())
230        element = ET.Element("t\xe4g", key="value")
231        tree = ET.ElementTree(element)
232        self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
233        element = ET.Element("tag", key="value")
234
235        # Make sure all standard element methods exist.
236
237        def check_method(method):
238            self.assertTrue(hasattr(method, '__call__'),
239                    msg="%s not callable" % method)
240
241        check_method(element.append)
242        check_method(element.extend)
243        check_method(element.insert)
244        check_method(element.remove)
245        check_method(element.getchildren)
246        check_method(element.find)
247        check_method(element.iterfind)
248        check_method(element.findall)
249        check_method(element.findtext)
250        check_method(element.clear)
251        check_method(element.get)
252        check_method(element.set)
253        check_method(element.keys)
254        check_method(element.items)
255        check_method(element.iter)
256        check_method(element.itertext)
257        check_method(element.getiterator)
258
259        # These methods return an iterable. See bug 6472.
260
261        def check_iter(it):
262            check_method(it.__next__)
263
264        check_iter(element.iterfind("tag"))
265        check_iter(element.iterfind("*"))
266        check_iter(tree.iterfind("tag"))
267        check_iter(tree.iterfind("*"))
268
269        # These aliases are provided:
270
271        self.assertEqual(ET.XML, ET.fromstring)
272        self.assertEqual(ET.PI, ET.ProcessingInstruction)
273
274    def test_set_attribute(self):
275        element = ET.Element('tag')
276
277        self.assertEqual(element.tag, 'tag')
278        element.tag = 'Tag'
279        self.assertEqual(element.tag, 'Tag')
280        element.tag = 'TAG'
281        self.assertEqual(element.tag, 'TAG')
282
283        self.assertIsNone(element.text)
284        element.text = 'Text'
285        self.assertEqual(element.text, 'Text')
286        element.text = 'TEXT'
287        self.assertEqual(element.text, 'TEXT')
288
289        self.assertIsNone(element.tail)
290        element.tail = 'Tail'
291        self.assertEqual(element.tail, 'Tail')
292        element.tail = 'TAIL'
293        self.assertEqual(element.tail, 'TAIL')
294
295        self.assertEqual(element.attrib, {})
296        element.attrib = {'a': 'b', 'c': 'd'}
297        self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
298        element.attrib = {'A': 'B', 'C': 'D'}
299        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
300
301    def test_simpleops(self):
302        # Basic method sanity checks.
303
304        elem = ET.XML("<body><tag/></body>")
305        self.serialize_check(elem, '<body><tag /></body>')
306        e = ET.Element("tag2")
307        elem.append(e)
308        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
309        elem.remove(e)
310        self.serialize_check(elem, '<body><tag /></body>')
311        elem.insert(0, e)
312        self.serialize_check(elem, '<body><tag2 /><tag /></body>')
313        elem.remove(e)
314        elem.extend([e])
315        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
316        elem.remove(e)
317
318        element = ET.Element("tag", key="value")
319        self.serialize_check(element, '<tag key="value" />') # 1
320        subelement = ET.Element("subtag")
321        element.append(subelement)
322        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
323        element.insert(0, subelement)
324        self.serialize_check(element,
325                '<tag key="value"><subtag /><subtag /></tag>') # 3
326        element.remove(subelement)
327        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
328        element.remove(subelement)
329        self.serialize_check(element, '<tag key="value" />') # 5
330        with self.assertRaises(ValueError) as cm:
331            element.remove(subelement)
332        self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
333        self.serialize_check(element, '<tag key="value" />') # 6
334        element[0:0] = [subelement, subelement, subelement]
335        self.serialize_check(element[1], '<subtag />')
336        self.assertEqual(element[1:9], [element[1], element[2]])
337        self.assertEqual(element[:9:2], [element[0], element[2]])
338        del element[1:2]
339        self.serialize_check(element,
340                '<tag key="value"><subtag /><subtag /></tag>')
341
342    def test_cdata(self):
343        # Test CDATA handling (etc).
344
345        self.serialize_check(ET.XML("<tag>hello</tag>"),
346                '<tag>hello</tag>')
347        self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
348                '<tag>hello</tag>')
349        self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
350                '<tag>hello</tag>')
351
352    def test_file_init(self):
353        stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
354        tree = ET.ElementTree(file=stringfile)
355        self.assertEqual(tree.find("tag").tag, 'tag')
356        self.assertEqual(tree.find("section/tag").tag, 'tag')
357
358        tree = ET.ElementTree(file=SIMPLE_XMLFILE)
359        self.assertEqual(tree.find("element").tag, 'element')
360        self.assertEqual(tree.find("element/../empty-element").tag,
361                'empty-element')
362
363    def test_path_cache(self):
364        # Check that the path cache behaves sanely.
365
366        from xml.etree import ElementPath
367
368        elem = ET.XML(SAMPLE_XML)
369        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
370        cache_len_10 = len(ElementPath._cache)
371        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
372        self.assertEqual(len(ElementPath._cache), cache_len_10)
373        for i in range(20): ET.ElementTree(elem).find('./'+str(i))
374        self.assertGreater(len(ElementPath._cache), cache_len_10)
375        for i in range(600): ET.ElementTree(elem).find('./'+str(i))
376        self.assertLess(len(ElementPath._cache), 500)
377
378    def test_copy(self):
379        # Test copy handling (etc).
380
381        import copy
382        e1 = ET.XML("<tag>hello<foo/></tag>")
383        e2 = copy.copy(e1)
384        e3 = copy.deepcopy(e1)
385        e1.find("foo").tag = "bar"
386        self.serialize_check(e1, '<tag>hello<bar /></tag>')
387        self.serialize_check(e2, '<tag>hello<bar /></tag>')
388        self.serialize_check(e3, '<tag>hello<foo /></tag>')
389
390    def test_attrib(self):
391        # Test attribute handling.
392
393        elem = ET.Element("tag")
394        elem.get("key") # 1.1
395        self.assertEqual(elem.get("key", "default"), 'default') # 1.2
396
397        elem.set("key", "value")
398        self.assertEqual(elem.get("key"), 'value') # 1.3
399
400        elem = ET.Element("tag", key="value")
401        self.assertEqual(elem.get("key"), 'value') # 2.1
402        self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
403
404        attrib = {"key": "value"}
405        elem = ET.Element("tag", attrib)
406        attrib.clear() # check for aliasing issues
407        self.assertEqual(elem.get("key"), 'value') # 3.1
408        self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
409
410        attrib = {"key": "value"}
411        elem = ET.Element("tag", **attrib)
412        attrib.clear() # check for aliasing issues
413        self.assertEqual(elem.get("key"), 'value') # 4.1
414        self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
415
416        elem = ET.Element("tag", {"key": "other"}, key="value")
417        self.assertEqual(elem.get("key"), 'value') # 5.1
418        self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
419
420        elem = ET.Element('test')
421        elem.text = "aa"
422        elem.set('testa', 'testval')
423        elem.set('testb', 'test2')
424        self.assertEqual(ET.tostring(elem),
425                b'<test testa="testval" testb="test2">aa</test>')
426        self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
427        self.assertEqual(sorted(elem.items()),
428                [('testa', 'testval'), ('testb', 'test2')])
429        self.assertEqual(elem.attrib['testb'], 'test2')
430        elem.attrib['testb'] = 'test1'
431        elem.attrib['testc'] = 'test2'
432        self.assertEqual(ET.tostring(elem),
433                b'<test testa="testval" testb="test1" testc="test2">aa</test>')
434
435        elem = ET.Element('test')
436        elem.set('a', '\r')
437        elem.set('b', '\r\n')
438        elem.set('c', '\t\n\r ')
439        elem.set('d', '\n\n')
440        self.assertEqual(ET.tostring(elem),
441                b'<test a="&#10;" b="&#10;" c="&#09;&#10;&#10; " d="&#10;&#10;" />')
442
443    def test_makeelement(self):
444        # Test makeelement handling.
445
446        elem = ET.Element("tag")
447        attrib = {"key": "value"}
448        subelem = elem.makeelement("subtag", attrib)
449        self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
450        elem.append(subelem)
451        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
452
453        elem.clear()
454        self.serialize_check(elem, '<tag />')
455        elem.append(subelem)
456        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
457        elem.extend([subelem, subelem])
458        self.serialize_check(elem,
459            '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
460        elem[:] = [subelem]
461        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
462        elem[:] = tuple([subelem])
463        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
464
465    def test_parsefile(self):
466        # Test parsing from file.
467
468        tree = ET.parse(SIMPLE_XMLFILE)
469        stream = io.StringIO()
470        tree.write(stream, encoding='unicode')
471        self.assertEqual(stream.getvalue(),
472                '<root>\n'
473                '   <element key="value">text</element>\n'
474                '   <element>text</element>tail\n'
475                '   <empty-element />\n'
476                '</root>')
477        tree = ET.parse(SIMPLE_NS_XMLFILE)
478        stream = io.StringIO()
479        tree.write(stream, encoding='unicode')
480        self.assertEqual(stream.getvalue(),
481                '<ns0:root xmlns:ns0="namespace">\n'
482                '   <ns0:element key="value">text</ns0:element>\n'
483                '   <ns0:element>text</ns0:element>tail\n'
484                '   <ns0:empty-element />\n'
485                '</ns0:root>')
486
487        with open(SIMPLE_XMLFILE) as f:
488            data = f.read()
489
490        parser = ET.XMLParser()
491        self.assertRegex(parser.version, r'^Expat ')
492        parser.feed(data)
493        self.serialize_check(parser.close(),
494                '<root>\n'
495                '   <element key="value">text</element>\n'
496                '   <element>text</element>tail\n'
497                '   <empty-element />\n'
498                '</root>')
499
500        target = ET.TreeBuilder()
501        parser = ET.XMLParser(target=target)
502        parser.feed(data)
503        self.serialize_check(parser.close(),
504                '<root>\n'
505                '   <element key="value">text</element>\n'
506                '   <element>text</element>tail\n'
507                '   <empty-element />\n'
508                '</root>')
509
510    def test_parseliteral(self):
511        element = ET.XML("<html><body>text</body></html>")
512        self.assertEqual(ET.tostring(element, encoding='unicode'),
513                '<html><body>text</body></html>')
514        element = ET.fromstring("<html><body>text</body></html>")
515        self.assertEqual(ET.tostring(element, encoding='unicode'),
516                '<html><body>text</body></html>')
517        sequence = ["<html><body>", "text</bo", "dy></html>"]
518        element = ET.fromstringlist(sequence)
519        self.assertEqual(ET.tostring(element),
520                b'<html><body>text</body></html>')
521        self.assertEqual(b"".join(ET.tostringlist(element)),
522                b'<html><body>text</body></html>')
523        self.assertEqual(ET.tostring(element, "ascii"),
524                b"<?xml version='1.0' encoding='ascii'?>\n"
525                b"<html><body>text</body></html>")
526        _, ids = ET.XMLID("<html><body>text</body></html>")
527        self.assertEqual(len(ids), 0)
528        _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
529        self.assertEqual(len(ids), 1)
530        self.assertEqual(ids["body"].tag, 'body')
531
532    def test_iterparse(self):
533        # Test iterparse interface.
534
535        iterparse = ET.iterparse
536
537        context = iterparse(SIMPLE_XMLFILE)
538        action, elem = next(context)
539        self.assertEqual((action, elem.tag), ('end', 'element'))
540        self.assertEqual([(action, elem.tag) for action, elem in context], [
541                ('end', 'element'),
542                ('end', 'empty-element'),
543                ('end', 'root'),
544            ])
545        self.assertEqual(context.root.tag, 'root')
546
547        context = iterparse(SIMPLE_NS_XMLFILE)
548        self.assertEqual([(action, elem.tag) for action, elem in context], [
549                ('end', '{namespace}element'),
550                ('end', '{namespace}element'),
551                ('end', '{namespace}empty-element'),
552                ('end', '{namespace}root'),
553            ])
554
555        events = ()
556        context = iterparse(SIMPLE_XMLFILE, events)
557        self.assertEqual([(action, elem.tag) for action, elem in context], [])
558
559        events = ()
560        context = iterparse(SIMPLE_XMLFILE, events=events)
561        self.assertEqual([(action, elem.tag) for action, elem in context], [])
562
563        events = ("start", "end")
564        context = iterparse(SIMPLE_XMLFILE, events)
565        self.assertEqual([(action, elem.tag) for action, elem in context], [
566                ('start', 'root'),
567                ('start', 'element'),
568                ('end', 'element'),
569                ('start', 'element'),
570                ('end', 'element'),
571                ('start', 'empty-element'),
572                ('end', 'empty-element'),
573                ('end', 'root'),
574            ])
575
576        events = ("start", "end", "start-ns", "end-ns")
577        context = iterparse(SIMPLE_NS_XMLFILE, events)
578        self.assertEqual([(action, elem.tag) if action in ("start", "end")
579                                             else (action, elem)
580                          for action, elem in context], [
581                ('start-ns', ('', 'namespace')),
582                ('start', '{namespace}root'),
583                ('start', '{namespace}element'),
584                ('end', '{namespace}element'),
585                ('start', '{namespace}element'),
586                ('end', '{namespace}element'),
587                ('start', '{namespace}empty-element'),
588                ('end', '{namespace}empty-element'),
589                ('end', '{namespace}root'),
590                ('end-ns', None),
591            ])
592
593        events = ('start-ns', 'end-ns')
594        context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
595        res = [action for action, elem in context]
596        self.assertEqual(res, ['start-ns', 'end-ns'])
597
598        events = ("start", "end", "bogus")
599        with open(SIMPLE_XMLFILE, "rb") as f:
600            with self.assertRaises(ValueError) as cm:
601                iterparse(f, events)
602            self.assertFalse(f.closed)
603        self.assertEqual(str(cm.exception), "unknown event 'bogus'")
604
605        with support.check_no_resource_warning(self):
606            with self.assertRaises(ValueError) as cm:
607                iterparse(SIMPLE_XMLFILE, events)
608            self.assertEqual(str(cm.exception), "unknown event 'bogus'")
609            del cm
610
611        source = io.BytesIO(
612            b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
613            b"<body xmlns='http://&#233;ffbot.org/ns'\n"
614            b"      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
615        events = ("start-ns",)
616        context = iterparse(source, events)
617        self.assertEqual([(action, elem) for action, elem in context], [
618                ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
619                ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
620            ])
621
622        source = io.StringIO("<document />junk")
623        it = iterparse(source)
624        action, elem = next(it)
625        self.assertEqual((action, elem.tag), ('end', 'document'))
626        with self.assertRaises(ET.ParseError) as cm:
627            next(it)
628        self.assertEqual(str(cm.exception),
629                'junk after document element: line 1, column 12')
630
631        self.addCleanup(support.unlink, TESTFN)
632        with open(TESTFN, "wb") as f:
633            f.write(b"<document />junk")
634        it = iterparse(TESTFN)
635        action, elem = next(it)
636        self.assertEqual((action, elem.tag), ('end', 'document'))
637        with support.check_no_resource_warning(self):
638            with self.assertRaises(ET.ParseError) as cm:
639                next(it)
640            self.assertEqual(str(cm.exception),
641                    'junk after document element: line 1, column 12')
642            del cm, it
643
644    def test_writefile(self):
645        elem = ET.Element("tag")
646        elem.text = "text"
647        self.serialize_check(elem, '<tag>text</tag>')
648        ET.SubElement(elem, "subtag").text = "subtext"
649        self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
650
651        # Test tag suppression
652        elem.tag = None
653        self.serialize_check(elem, 'text<subtag>subtext</subtag>')
654        elem.insert(0, ET.Comment("comment"))
655        self.serialize_check(elem,
656                'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
657
658        elem[0] = ET.PI("key", "value")
659        self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
660
661    def test_custom_builder(self):
662        # Test parser w. custom builder.
663
664        with open(SIMPLE_XMLFILE) as f:
665            data = f.read()
666        class Builder(list):
667            def start(self, tag, attrib):
668                self.append(("start", tag))
669            def end(self, tag):
670                self.append(("end", tag))
671            def data(self, text):
672                pass
673        builder = Builder()
674        parser = ET.XMLParser(target=builder)
675        parser.feed(data)
676        self.assertEqual(builder, [
677                ('start', 'root'),
678                ('start', 'element'),
679                ('end', 'element'),
680                ('start', 'element'),
681                ('end', 'element'),
682                ('start', 'empty-element'),
683                ('end', 'empty-element'),
684                ('end', 'root'),
685            ])
686
687        with open(SIMPLE_NS_XMLFILE) as f:
688            data = f.read()
689        class Builder(list):
690            def start(self, tag, attrib):
691                self.append(("start", tag))
692            def end(self, tag):
693                self.append(("end", tag))
694            def data(self, text):
695                pass
696            def pi(self, target, data):
697                self.append(("pi", target, data))
698            def comment(self, data):
699                self.append(("comment", data))
700            def start_ns(self, prefix, uri):
701                self.append(("start-ns", prefix, uri))
702            def end_ns(self, prefix):
703                self.append(("end-ns", prefix))
704        builder = Builder()
705        parser = ET.XMLParser(target=builder)
706        parser.feed(data)
707        self.assertEqual(builder, [
708                ('pi', 'pi', 'data'),
709                ('comment', ' comment '),
710                ('start-ns', '', 'namespace'),
711                ('start', '{namespace}root'),
712                ('start', '{namespace}element'),
713                ('end', '{namespace}element'),
714                ('start', '{namespace}element'),
715                ('end', '{namespace}element'),
716                ('start', '{namespace}empty-element'),
717                ('end', '{namespace}empty-element'),
718                ('end', '{namespace}root'),
719                ('end-ns', ''),
720            ])
721
722    def test_custom_builder_only_end_ns(self):
723        class Builder(list):
724            def end_ns(self, prefix):
725                self.append(("end-ns", prefix))
726
727        builder = Builder()
728        parser = ET.XMLParser(target=builder)
729        parser.feed(textwrap.dedent("""\
730            <?pi data?>
731            <!-- comment -->
732            <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
733               <a:element key='value'>text</a:element>
734               <p:element>text</p:element>tail
735               <empty-element/>
736            </root>
737            """))
738        self.assertEqual(builder, [
739                ('end-ns', 'a'),
740                ('end-ns', 'p'),
741                ('end-ns', ''),
742            ])
743
744    # Element.getchildren() and ElementTree.getiterator() are deprecated.
745    @checkwarnings(("This method will be removed in future versions.  "
746                    "Use .+ instead.",
747                    DeprecationWarning))
748    def test_getchildren(self):
749        # Test Element.getchildren()
750
751        with open(SIMPLE_XMLFILE, "rb") as f:
752            tree = ET.parse(f)
753        self.assertEqual([summarize_list(elem.getchildren())
754                          for elem in tree.getroot().iter()], [
755                ['element', 'element', 'empty-element'],
756                [],
757                [],
758                [],
759            ])
760        self.assertEqual([summarize_list(elem.getchildren())
761                          for elem in tree.getiterator()], [
762                ['element', 'element', 'empty-element'],
763                [],
764                [],
765                [],
766            ])
767
768        elem = ET.XML(SAMPLE_XML)
769        self.assertEqual(len(elem.getchildren()), 3)
770        self.assertEqual(len(elem[2].getchildren()), 1)
771        self.assertEqual(elem[:], elem.getchildren())
772        child1 = elem[0]
773        child2 = elem[2]
774        del elem[1:2]
775        self.assertEqual(len(elem.getchildren()), 2)
776        self.assertEqual(child1, elem[0])
777        self.assertEqual(child2, elem[1])
778        elem[0:2] = [child2, child1]
779        self.assertEqual(child2, elem[0])
780        self.assertEqual(child1, elem[1])
781        self.assertNotEqual(child1, elem[0])
782        elem.clear()
783        self.assertEqual(elem.getchildren(), [])
784
785    def test_writestring(self):
786        elem = ET.XML("<html><body>text</body></html>")
787        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
788        elem = ET.fromstring("<html><body>text</body></html>")
789        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
790
791    def test_tostring_default_namespace(self):
792        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
793        self.assertEqual(
794            ET.tostring(elem, encoding='unicode'),
795            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
796        )
797        self.assertEqual(
798            ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
799            '<body xmlns="http://effbot.org/ns"><tag /></body>'
800        )
801
802    def test_tostring_default_namespace_different_namespace(self):
803        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
804        self.assertEqual(
805            ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
806            '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
807        )
808
809    def test_tostring_default_namespace_original_no_namespace(self):
810        elem = ET.XML('<body><tag/></body>')
811        EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
812        with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
813            ET.tostring(elem, encoding='unicode', default_namespace='foobar')
814
815    def test_tostring_no_xml_declaration(self):
816        elem = ET.XML('<body><tag/></body>')
817        self.assertEqual(
818            ET.tostring(elem, encoding='unicode'),
819            '<body><tag /></body>'
820        )
821
822    def test_tostring_xml_declaration(self):
823        elem = ET.XML('<body><tag/></body>')
824        self.assertEqual(
825            ET.tostring(elem, encoding='utf8', xml_declaration=True),
826            b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
827        )
828
829    def test_tostring_xml_declaration_unicode_encoding(self):
830        elem = ET.XML('<body><tag/></body>')
831        preferredencoding = locale.getpreferredencoding()
832        self.assertEqual(
833            f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
834            ET.tostring(elem, encoding='unicode', xml_declaration=True)
835        )
836
837    def test_tostring_xml_declaration_cases(self):
838        elem = ET.XML('<body><tag>ø</tag></body>')
839        preferredencoding = locale.getpreferredencoding()
840        TESTCASES = [
841        #   (expected_retval,                  encoding, xml_declaration)
842            # ... xml_declaration = None
843            (b'<body><tag>&#248;</tag></body>', None, None),
844            (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
845            (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
846            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
847             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
848            ('<body><tag>ø</tag></body>', 'unicode', None),
849
850            # ... xml_declaration = False
851            (b"<body><tag>&#248;</tag></body>", None, False),
852            (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
853            (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
854            (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
855            ("<body><tag>ø</tag></body>", 'unicode', False),
856
857            # ... xml_declaration = True
858            (b"<?xml version='1.0' encoding='us-ascii'?>\n"
859             b"<body><tag>&#248;</tag></body>", None, True),
860            (b"<?xml version='1.0' encoding='UTF-8'?>\n"
861             b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
862            (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
863             b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
864            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
865             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
866            (f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
867             "<body><tag>ø</tag></body>", 'unicode', True),
868
869        ]
870        for expected_retval, encoding, xml_declaration in TESTCASES:
871            with self.subTest(f'encoding={encoding} '
872                              f'xml_declaration={xml_declaration}'):
873                self.assertEqual(
874                    ET.tostring(
875                        elem,
876                        encoding=encoding,
877                        xml_declaration=xml_declaration
878                    ),
879                    expected_retval
880                )
881
882    def test_tostringlist_default_namespace(self):
883        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
884        self.assertEqual(
885            ''.join(ET.tostringlist(elem, encoding='unicode')),
886            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
887        )
888        self.assertEqual(
889            ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
890            '<body xmlns="http://effbot.org/ns"><tag /></body>'
891        )
892
893    def test_tostringlist_xml_declaration(self):
894        elem = ET.XML('<body><tag/></body>')
895        self.assertEqual(
896            ''.join(ET.tostringlist(elem, encoding='unicode')),
897            '<body><tag /></body>'
898        )
899        self.assertEqual(
900            b''.join(ET.tostringlist(elem, xml_declaration=True)),
901            b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
902        )
903
904        preferredencoding = locale.getpreferredencoding()
905        stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
906        self.assertEqual(
907            ''.join(stringlist),
908            f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
909        )
910        self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
911        self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
912
913    def test_encoding(self):
914        def check(encoding, body=''):
915            xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
916                   (encoding, body))
917            self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
918            self.assertEqual(ET.XML(xml).text, body)
919        check("ascii", 'a')
920        check("us-ascii", 'a')
921        check("iso-8859-1", '\xbd')
922        check("iso-8859-15", '\u20ac')
923        check("cp437", '\u221a')
924        check("mac-roman", '\u02da')
925
926        def xml(encoding):
927            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
928        def bxml(encoding):
929            return xml(encoding).encode(encoding)
930        supported_encodings = [
931            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
932            'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
933            'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
934            'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
935            'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
936            'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
937            'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
938            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
939            'cp1256', 'cp1257', 'cp1258',
940            'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
941            'mac-roman', 'mac-turkish',
942            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
943            'iso2022-jp-3', 'iso2022-jp-ext',
944            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
945            'hz', 'ptcp154',
946        ]
947        for encoding in supported_encodings:
948            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
949
950        unsupported_ascii_compatible_encodings = [
951            'big5', 'big5hkscs',
952            'cp932', 'cp949', 'cp950',
953            'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
954            'gb2312', 'gbk', 'gb18030',
955            'iso2022-kr', 'johab',
956            'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
957            'utf-7',
958        ]
959        for encoding in unsupported_ascii_compatible_encodings:
960            self.assertRaises(ValueError, ET.XML, bxml(encoding))
961
962        unsupported_ascii_incompatible_encodings = [
963            'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
964            'utf_32', 'utf_32_be', 'utf_32_le',
965        ]
966        for encoding in unsupported_ascii_incompatible_encodings:
967            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
968
969        self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
970        self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
971
972    def test_methods(self):
973        # Test serialization methods.
974
975        e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
976        e.tail = "\n"
977        self.assertEqual(serialize(e),
978                '<html><link /><script>1 &lt; 2</script></html>\n')
979        self.assertEqual(serialize(e, method=None),
980                '<html><link /><script>1 &lt; 2</script></html>\n')
981        self.assertEqual(serialize(e, method="xml"),
982                '<html><link /><script>1 &lt; 2</script></html>\n')
983        self.assertEqual(serialize(e, method="html"),
984                '<html><link><script>1 < 2</script></html>\n')
985        self.assertEqual(serialize(e, method="text"), '1 < 2\n')
986
987    def test_issue18347(self):
988        e = ET.XML('<html><CamelCase>text</CamelCase></html>')
989        self.assertEqual(serialize(e),
990                '<html><CamelCase>text</CamelCase></html>')
991        self.assertEqual(serialize(e, method="html"),
992                '<html><CamelCase>text</CamelCase></html>')
993
994    def test_entity(self):
995        # Test entity handling.
996
997        # 1) good entities
998
999        e = ET.XML("<document title='&#x8230;'>test</document>")
1000        self.assertEqual(serialize(e, encoding="us-ascii"),
1001                b'<document title="&#33328;">test</document>')
1002        self.serialize_check(e, '<document title="\u8230">test</document>')
1003
1004        # 2) bad entities
1005
1006        with self.assertRaises(ET.ParseError) as cm:
1007            ET.XML("<document>&entity;</document>")
1008        self.assertEqual(str(cm.exception),
1009                'undefined entity: line 1, column 10')
1010
1011        with self.assertRaises(ET.ParseError) as cm:
1012            ET.XML(ENTITY_XML)
1013        self.assertEqual(str(cm.exception),
1014                'undefined entity &entity;: line 5, column 10')
1015
1016        # 3) custom entity
1017
1018        parser = ET.XMLParser()
1019        parser.entity["entity"] = "text"
1020        parser.feed(ENTITY_XML)
1021        root = parser.close()
1022        self.serialize_check(root, '<document>text</document>')
1023
1024        # 4) external (SYSTEM) entity
1025
1026        with self.assertRaises(ET.ParseError) as cm:
1027            ET.XML(EXTERNAL_ENTITY_XML)
1028        self.assertEqual(str(cm.exception),
1029                'undefined entity &entity;: line 4, column 10')
1030
1031    def test_namespace(self):
1032        # Test namespace issues.
1033
1034        # 1) xml namespace
1035
1036        elem = ET.XML("<tag xml:lang='en' />")
1037        self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
1038
1039        # 2) other "well-known" namespaces
1040
1041        elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1042        self.serialize_check(elem,
1043            '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
1044
1045        elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1046        self.serialize_check(elem,
1047            '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
1048
1049        elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1050        self.serialize_check(elem,
1051            '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
1052
1053        # 3) unknown namespaces
1054        elem = ET.XML(SAMPLE_XML_NS)
1055        self.serialize_check(elem,
1056            '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
1057            '  <ns0:tag>text</ns0:tag>\n'
1058            '  <ns0:tag />\n'
1059            '  <ns0:section>\n'
1060            '    <ns0:tag>subtext</ns0:tag>\n'
1061            '  </ns0:section>\n'
1062            '</ns0:body>')
1063
1064    def test_qname(self):
1065        # Test QName handling.
1066
1067        # 1) decorated tags
1068
1069        elem = ET.Element("{uri}tag")
1070        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
1071        elem = ET.Element(ET.QName("{uri}tag"))
1072        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
1073        elem = ET.Element(ET.QName("uri", "tag"))
1074        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
1075        elem = ET.Element(ET.QName("uri", "tag"))
1076        subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1077        subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1078        self.serialize_check(elem,
1079            '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
1080
1081        # 2) decorated attributes
1082
1083        elem.clear()
1084        elem.attrib["{uri}key"] = "value"
1085        self.serialize_check(elem,
1086            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
1087
1088        elem.clear()
1089        elem.attrib[ET.QName("{uri}key")] = "value"
1090        self.serialize_check(elem,
1091            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
1092
1093        # 3) decorated values are not converted by default, but the
1094        # QName wrapper can be used for values
1095
1096        elem.clear()
1097        elem.attrib["{uri}key"] = "{uri}value"
1098        self.serialize_check(elem,
1099            '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
1100
1101        elem.clear()
1102        elem.attrib["{uri}key"] = ET.QName("{uri}value")
1103        self.serialize_check(elem,
1104            '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
1105
1106        elem.clear()
1107        subelem = ET.Element("tag")
1108        subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1109        elem.append(subelem)
1110        elem.append(subelem)
1111        self.serialize_check(elem,
1112            '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
1113            '<tag ns1:key="ns2:value" />'
1114            '<tag ns1:key="ns2:value" />'
1115            '</ns0:tag>') # 3.3
1116
1117        # 4) Direct QName tests
1118
1119        self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
1120        self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
1121        q1 = ET.QName('ns', 'tag')
1122        q2 = ET.QName('ns', 'tag')
1123        self.assertEqual(q1, q2)
1124        q2 = ET.QName('ns', 'other-tag')
1125        self.assertNotEqual(q1, q2)
1126        self.assertNotEqual(q1, 'ns:tag')
1127        self.assertEqual(q1, '{ns}tag')
1128
1129    def test_doctype_public(self):
1130        # Test PUBLIC doctype.
1131
1132        elem = ET.XML('<!DOCTYPE html PUBLIC'
1133                ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1134                ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1135                '<html>text</html>')
1136
1137    def test_xpath_tokenizer(self):
1138        # Test the XPath tokenizer.
1139        from xml.etree import ElementPath
1140        def check(p, expected, namespaces=None):
1141            self.assertEqual([op or tag
1142                              for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
1143                             expected)
1144
1145        # tests from the xml specification
1146        check("*", ['*'])
1147        check("text()", ['text', '()'])
1148        check("@name", ['@', 'name'])
1149        check("@*", ['@', '*'])
1150        check("para[1]", ['para', '[', '1', ']'])
1151        check("para[last()]", ['para', '[', 'last', '()', ']'])
1152        check("*/para", ['*', '/', 'para'])
1153        check("/doc/chapter[5]/section[2]",
1154              ['/', 'doc', '/', 'chapter', '[', '5', ']',
1155               '/', 'section', '[', '2', ']'])
1156        check("chapter//para", ['chapter', '//', 'para'])
1157        check("//para", ['//', 'para'])
1158        check("//olist/item", ['//', 'olist', '/', 'item'])
1159        check(".", ['.'])
1160        check(".//para", ['.', '//', 'para'])
1161        check("..", ['..'])
1162        check("../@lang", ['..', '/', '@', 'lang'])
1163        check("chapter[title]", ['chapter', '[', 'title', ']'])
1164        check("employee[@secretary and @assistant]", ['employee',
1165              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
1166
1167        # additional tests
1168        check("@{ns}attr", ['@', '{ns}attr'])
1169        check("{http://spam}egg", ['{http://spam}egg'])
1170        check("./spam.egg", ['.', '/', 'spam.egg'])
1171        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
1172
1173        # wildcard tags
1174        check("{ns}*", ['{ns}*'])
1175        check("{}*", ['{}*'])
1176        check("{*}tag", ['{*}tag'])
1177        check("{*}*", ['{*}*'])
1178        check(".//{*}tag", ['.', '//', '{*}tag'])
1179
1180        # namespace prefix resolution
1181        check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
1182              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1183        check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
1184              {'': 'http://www.w3.org/2001/XMLSchema'})
1185        check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
1186              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1187        check("@type", ['@', 'type'],
1188              {'': 'http://www.w3.org/2001/XMLSchema'})
1189        check("@{*}type", ['@', '{*}type'],
1190              {'': 'http://www.w3.org/2001/XMLSchema'})
1191        check("@{ns}attr", ['@', '{ns}attr'],
1192              {'': 'http://www.w3.org/2001/XMLSchema',
1193               'ns': 'http://www.w3.org/2001/XMLSchema'})
1194
1195    def test_processinginstruction(self):
1196        # Test ProcessingInstruction directly
1197
1198        self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1199                b'<?test instruction?>')
1200        self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1201                b'<?test instruction?>')
1202
1203        # Issue #2746
1204
1205        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1206                b'<?test <testing&>?>')
1207        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1208                b"<?xml version='1.0' encoding='latin-1'?>\n"
1209                b"<?test <testing&>\xe3?>")
1210
1211    def test_html_empty_elems_serialization(self):
1212        # issue 15970
1213        # from http://www.w3.org/TR/html401/index/elements.html
1214        for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1215                        'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
1216            for elem in [element, element.lower()]:
1217                expected = '<%s>' % elem
1218                serialized = serialize(ET.XML('<%s />' % elem), method='html')
1219                self.assertEqual(serialized, expected)
1220                serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1221                                       method='html')
1222                self.assertEqual(serialized, expected)
1223
1224    def test_dump_attribute_order(self):
1225        # See BPO 34160
1226        e = ET.Element('cirriculum', status='public', company='example')
1227        with support.captured_stdout() as stdout:
1228            ET.dump(e)
1229        self.assertEqual(stdout.getvalue(),
1230                         '<cirriculum status="public" company="example" />\n')
1231
1232    def test_tree_write_attribute_order(self):
1233        # See BPO 34160
1234        root = ET.Element('cirriculum', status='public', company='example')
1235        self.assertEqual(serialize(root),
1236                         '<cirriculum status="public" company="example" />')
1237        self.assertEqual(serialize(root, method='html'),
1238                '<cirriculum status="public" company="example"></cirriculum>')
1239
1240
1241class XMLPullParserTest(unittest.TestCase):
1242
1243    def _feed(self, parser, data, chunk_size=None):
1244        if chunk_size is None:
1245            parser.feed(data)
1246        else:
1247            for i in range(0, len(data), chunk_size):
1248                parser.feed(data[i:i+chunk_size])
1249
1250    def assert_events(self, parser, expected, max_events=None):
1251        self.assertEqual(
1252            [(event, (elem.tag, elem.text))
1253             for event, elem in islice(parser.read_events(), max_events)],
1254            expected)
1255
1256    def assert_event_tuples(self, parser, expected, max_events=None):
1257        self.assertEqual(
1258            list(islice(parser.read_events(), max_events)),
1259            expected)
1260
1261    def assert_event_tags(self, parser, expected, max_events=None):
1262        events = islice(parser.read_events(), max_events)
1263        self.assertEqual([(action, elem.tag) for action, elem in events],
1264                         expected)
1265
1266    def test_simple_xml(self):
1267        for chunk_size in (None, 1, 5):
1268            with self.subTest(chunk_size=chunk_size):
1269                parser = ET.XMLPullParser()
1270                self.assert_event_tags(parser, [])
1271                self._feed(parser, "<!-- comment -->\n", chunk_size)
1272                self.assert_event_tags(parser, [])
1273                self._feed(parser,
1274                           "<root>\n  <element key='value'>text</element",
1275                           chunk_size)
1276                self.assert_event_tags(parser, [])
1277                self._feed(parser, ">\n", chunk_size)
1278                self.assert_event_tags(parser, [('end', 'element')])
1279                self._feed(parser, "<element>text</element>tail\n", chunk_size)
1280                self._feed(parser, "<empty-element/>\n", chunk_size)
1281                self.assert_event_tags(parser, [
1282                    ('end', 'element'),
1283                    ('end', 'empty-element'),
1284                    ])
1285                self._feed(parser, "</root>\n", chunk_size)
1286                self.assert_event_tags(parser, [('end', 'root')])
1287                self.assertIsNone(parser.close())
1288
1289    def test_feed_while_iterating(self):
1290        parser = ET.XMLPullParser()
1291        it = parser.read_events()
1292        self._feed(parser, "<root>\n  <element key='value'>text</element>\n")
1293        action, elem = next(it)
1294        self.assertEqual((action, elem.tag), ('end', 'element'))
1295        self._feed(parser, "</root>\n")
1296        action, elem = next(it)
1297        self.assertEqual((action, elem.tag), ('end', 'root'))
1298        with self.assertRaises(StopIteration):
1299            next(it)
1300
1301    def test_simple_xml_with_ns(self):
1302        parser = ET.XMLPullParser()
1303        self.assert_event_tags(parser, [])
1304        self._feed(parser, "<!-- comment -->\n")
1305        self.assert_event_tags(parser, [])
1306        self._feed(parser, "<root xmlns='namespace'>\n")
1307        self.assert_event_tags(parser, [])
1308        self._feed(parser, "<element key='value'>text</element")
1309        self.assert_event_tags(parser, [])
1310        self._feed(parser, ">\n")
1311        self.assert_event_tags(parser, [('end', '{namespace}element')])
1312        self._feed(parser, "<element>text</element>tail\n")
1313        self._feed(parser, "<empty-element/>\n")
1314        self.assert_event_tags(parser, [
1315            ('end', '{namespace}element'),
1316            ('end', '{namespace}empty-element'),
1317            ])
1318        self._feed(parser, "</root>\n")
1319        self.assert_event_tags(parser, [('end', '{namespace}root')])
1320        self.assertIsNone(parser.close())
1321
1322    def test_ns_events(self):
1323        parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
1324        self._feed(parser, "<!-- comment -->\n")
1325        self._feed(parser, "<root xmlns='namespace'>\n")
1326        self.assertEqual(
1327            list(parser.read_events()),
1328            [('start-ns', ('', 'namespace'))])
1329        self._feed(parser, "<element key='value'>text</element")
1330        self._feed(parser, ">\n")
1331        self._feed(parser, "<element>text</element>tail\n")
1332        self._feed(parser, "<empty-element/>\n")
1333        self._feed(parser, "</root>\n")
1334        self.assertEqual(list(parser.read_events()), [('end-ns', None)])
1335        self.assertIsNone(parser.close())
1336
1337    def test_ns_events_start(self):
1338        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1339        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1340        self.assert_event_tuples(parser, [
1341            ('start-ns', ('', 'abc')),
1342            ('start-ns', ('p', 'xyz')),
1343        ], max_events=2)
1344        self.assert_event_tags(parser, [
1345            ('start', '{abc}tag'),
1346        ], max_events=1)
1347
1348        self._feed(parser, "<child />\n")
1349        self.assert_event_tags(parser, [
1350            ('start', '{abc}child'),
1351            ('end', '{abc}child'),
1352        ])
1353
1354        self._feed(parser, "</tag>\n")
1355        parser.close()
1356        self.assert_event_tags(parser, [
1357            ('end', '{abc}tag'),
1358        ])
1359
1360    def test_ns_events_start_end(self):
1361        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1362        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1363        self.assert_event_tuples(parser, [
1364            ('start-ns', ('', 'abc')),
1365            ('start-ns', ('p', 'xyz')),
1366        ], max_events=2)
1367        self.assert_event_tags(parser, [
1368            ('start', '{abc}tag'),
1369        ], max_events=1)
1370
1371        self._feed(parser, "<child />\n")
1372        self.assert_event_tags(parser, [
1373            ('start', '{abc}child'),
1374            ('end', '{abc}child'),
1375        ])
1376
1377        self._feed(parser, "</tag>\n")
1378        parser.close()
1379        self.assert_event_tags(parser, [
1380            ('end', '{abc}tag'),
1381        ], max_events=1)
1382        self.assert_event_tuples(parser, [
1383            ('end-ns', None),
1384            ('end-ns', None),
1385        ])
1386
1387    def test_events(self):
1388        parser = ET.XMLPullParser(events=())
1389        self._feed(parser, "<root/>\n")
1390        self.assert_event_tags(parser, [])
1391
1392        parser = ET.XMLPullParser(events=('start', 'end'))
1393        self._feed(parser, "<!-- text here -->\n")
1394        self.assert_events(parser, [])
1395
1396        parser = ET.XMLPullParser(events=('start', 'end'))
1397        self._feed(parser, "<root>\n")
1398        self.assert_event_tags(parser, [('start', 'root')])
1399        self._feed(parser, "<element key='value'>text</element")
1400        self.assert_event_tags(parser, [('start', 'element')])
1401        self._feed(parser, ">\n")
1402        self.assert_event_tags(parser, [('end', 'element')])
1403        self._feed(parser,
1404                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1405        self.assert_event_tags(parser, [
1406            ('start', '{foo}element'),
1407            ('start', '{foo}empty-element'),
1408            ('end', '{foo}empty-element'),
1409            ('end', '{foo}element'),
1410            ])
1411        self._feed(parser, "</root>")
1412        self.assertIsNone(parser.close())
1413        self.assert_event_tags(parser, [('end', 'root')])
1414
1415        parser = ET.XMLPullParser(events=('start',))
1416        self._feed(parser, "<!-- comment -->\n")
1417        self.assert_event_tags(parser, [])
1418        self._feed(parser, "<root>\n")
1419        self.assert_event_tags(parser, [('start', 'root')])
1420        self._feed(parser, "<element key='value'>text</element")
1421        self.assert_event_tags(parser, [('start', 'element')])
1422        self._feed(parser, ">\n")
1423        self.assert_event_tags(parser, [])
1424        self._feed(parser,
1425                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1426        self.assert_event_tags(parser, [
1427            ('start', '{foo}element'),
1428            ('start', '{foo}empty-element'),
1429            ])
1430        self._feed(parser, "</root>")
1431        self.assertIsNone(parser.close())
1432
1433    def test_events_comment(self):
1434        parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
1435        self._feed(parser, "<!-- text here -->\n")
1436        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1437        self._feed(parser, "<!-- more text here -->\n")
1438        self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
1439        self._feed(parser, "<root-tag>text")
1440        self.assert_event_tags(parser, [('start', 'root-tag')])
1441        self._feed(parser, "<!-- inner comment-->\n")
1442        self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
1443        self._feed(parser, "</root-tag>\n")
1444        self.assert_event_tags(parser, [('end', 'root-tag')])
1445        self._feed(parser, "<!-- outer comment -->\n")
1446        self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
1447
1448        parser = ET.XMLPullParser(events=('comment',))
1449        self._feed(parser, "<!-- text here -->\n")
1450        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1451
1452    def test_events_pi(self):
1453        parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
1454        self._feed(parser, "<?pitarget?>\n")
1455        self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
1456        parser = ET.XMLPullParser(events=('pi',))
1457        self._feed(parser, "<?pitarget some text ?>\n")
1458        self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
1459
1460    def test_events_sequence(self):
1461        # Test that events can be some sequence that's not just a tuple or list
1462        eventset = {'end', 'start'}
1463        parser = ET.XMLPullParser(events=eventset)
1464        self._feed(parser, "<foo>bar</foo>")
1465        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1466
1467        class DummyIter:
1468            def __init__(self):
1469                self.events = iter(['start', 'end', 'start-ns'])
1470            def __iter__(self):
1471                return self
1472            def __next__(self):
1473                return next(self.events)
1474
1475        parser = ET.XMLPullParser(events=DummyIter())
1476        self._feed(parser, "<foo>bar</foo>")
1477        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1478
1479    def test_unknown_event(self):
1480        with self.assertRaises(ValueError):
1481            ET.XMLPullParser(events=('start', 'end', 'bogus'))
1482
1483
1484#
1485# xinclude tests (samples from appendix C of the xinclude specification)
1486
1487XINCLUDE = {}
1488
1489XINCLUDE["C1.xml"] = """\
1490<?xml version='1.0'?>
1491<document xmlns:xi="http://www.w3.org/2001/XInclude">
1492  <p>120 Mz is adequate for an average home user.</p>
1493  <xi:include href="disclaimer.xml"/>
1494</document>
1495"""
1496
1497XINCLUDE["disclaimer.xml"] = """\
1498<?xml version='1.0'?>
1499<disclaimer>
1500  <p>The opinions represented herein represent those of the individual
1501  and should not be interpreted as official policy endorsed by this
1502  organization.</p>
1503</disclaimer>
1504"""
1505
1506XINCLUDE["C2.xml"] = """\
1507<?xml version='1.0'?>
1508<document xmlns:xi="http://www.w3.org/2001/XInclude">
1509  <p>This document has been accessed
1510  <xi:include href="count.txt" parse="text"/> times.</p>
1511</document>
1512"""
1513
1514XINCLUDE["count.txt"] = "324387"
1515
1516XINCLUDE["C2b.xml"] = """\
1517<?xml version='1.0'?>
1518<document xmlns:xi="http://www.w3.org/2001/XInclude">
1519  <p>This document has been <em>accessed</em>
1520  <xi:include href="count.txt" parse="text"/> times.</p>
1521</document>
1522"""
1523
1524XINCLUDE["C3.xml"] = """\
1525<?xml version='1.0'?>
1526<document xmlns:xi="http://www.w3.org/2001/XInclude">
1527  <p>The following is the source of the "data.xml" resource:</p>
1528  <example><xi:include href="data.xml" parse="text"/></example>
1529</document>
1530"""
1531
1532XINCLUDE["data.xml"] = """\
1533<?xml version='1.0'?>
1534<data>
1535  <item><![CDATA[Brooks & Shields]]></item>
1536</data>
1537"""
1538
1539XINCLUDE["C5.xml"] = """\
1540<?xml version='1.0'?>
1541<div xmlns:xi="http://www.w3.org/2001/XInclude">
1542  <xi:include href="example.txt" parse="text">
1543    <xi:fallback>
1544      <xi:include href="fallback-example.txt" parse="text">
1545        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1546      </xi:include>
1547    </xi:fallback>
1548  </xi:include>
1549</div>
1550"""
1551
1552XINCLUDE["default.xml"] = """\
1553<?xml version='1.0'?>
1554<document xmlns:xi="http://www.w3.org/2001/XInclude">
1555  <p>Example.</p>
1556  <xi:include href="{}"/>
1557</document>
1558""".format(html.escape(SIMPLE_XMLFILE, True))
1559
1560#
1561# badly formatted xi:include tags
1562
1563XINCLUDE_BAD = {}
1564
1565XINCLUDE_BAD["B1.xml"] = """\
1566<?xml version='1.0'?>
1567<document xmlns:xi="http://www.w3.org/2001/XInclude">
1568  <p>120 Mz is adequate for an average home user.</p>
1569  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1570</document>
1571"""
1572
1573XINCLUDE_BAD["B2.xml"] = """\
1574<?xml version='1.0'?>
1575<div xmlns:xi="http://www.w3.org/2001/XInclude">
1576    <xi:fallback></xi:fallback>
1577</div>
1578"""
1579
1580class XIncludeTest(unittest.TestCase):
1581
1582    def xinclude_loader(self, href, parse="xml", encoding=None):
1583        try:
1584            data = XINCLUDE[href]
1585        except KeyError:
1586            raise OSError("resource not found")
1587        if parse == "xml":
1588            data = ET.XML(data)
1589        return data
1590
1591    def none_loader(self, href, parser, encoding=None):
1592        return None
1593
1594    def _my_loader(self, href, parse):
1595        # Used to avoid a test-dependency problem where the default loader
1596        # of ElementInclude uses the pyET parser for cET tests.
1597        if parse == 'xml':
1598            with open(href, 'rb') as f:
1599                return ET.parse(f).getroot()
1600        else:
1601            return None
1602
1603    def test_xinclude_default(self):
1604        from xml.etree import ElementInclude
1605        doc = self.xinclude_loader('default.xml')
1606        ElementInclude.include(doc, self._my_loader)
1607        self.assertEqual(serialize(doc),
1608            '<document>\n'
1609            '  <p>Example.</p>\n'
1610            '  <root>\n'
1611            '   <element key="value">text</element>\n'
1612            '   <element>text</element>tail\n'
1613            '   <empty-element />\n'
1614            '</root>\n'
1615            '</document>')
1616
1617    def test_xinclude(self):
1618        from xml.etree import ElementInclude
1619
1620        # Basic inclusion example (XInclude C.1)
1621        document = self.xinclude_loader("C1.xml")
1622        ElementInclude.include(document, self.xinclude_loader)
1623        self.assertEqual(serialize(document),
1624            '<document>\n'
1625            '  <p>120 Mz is adequate for an average home user.</p>\n'
1626            '  <disclaimer>\n'
1627            '  <p>The opinions represented herein represent those of the individual\n'
1628            '  and should not be interpreted as official policy endorsed by this\n'
1629            '  organization.</p>\n'
1630            '</disclaimer>\n'
1631            '</document>') # C1
1632
1633        # Textual inclusion example (XInclude C.2)
1634        document = self.xinclude_loader("C2.xml")
1635        ElementInclude.include(document, self.xinclude_loader)
1636        self.assertEqual(serialize(document),
1637            '<document>\n'
1638            '  <p>This document has been accessed\n'
1639            '  324387 times.</p>\n'
1640            '</document>') # C2
1641
1642        # Textual inclusion after sibling element (based on modified XInclude C.2)
1643        document = self.xinclude_loader("C2b.xml")
1644        ElementInclude.include(document, self.xinclude_loader)
1645        self.assertEqual(serialize(document),
1646            '<document>\n'
1647            '  <p>This document has been <em>accessed</em>\n'
1648            '  324387 times.</p>\n'
1649            '</document>') # C2b
1650
1651        # Textual inclusion of XML example (XInclude C.3)
1652        document = self.xinclude_loader("C3.xml")
1653        ElementInclude.include(document, self.xinclude_loader)
1654        self.assertEqual(serialize(document),
1655            '<document>\n'
1656            '  <p>The following is the source of the "data.xml" resource:</p>\n'
1657            "  <example>&lt;?xml version='1.0'?&gt;\n"
1658            '&lt;data&gt;\n'
1659            '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1660            '&lt;/data&gt;\n'
1661            '</example>\n'
1662            '</document>') # C3
1663
1664        # Fallback example (XInclude C.5)
1665        # Note! Fallback support is not yet implemented
1666        document = self.xinclude_loader("C5.xml")
1667        with self.assertRaises(OSError) as cm:
1668            ElementInclude.include(document, self.xinclude_loader)
1669        self.assertEqual(str(cm.exception), 'resource not found')
1670        self.assertEqual(serialize(document),
1671            '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1672            '  <ns0:include href="example.txt" parse="text">\n'
1673            '    <ns0:fallback>\n'
1674            '      <ns0:include href="fallback-example.txt" parse="text">\n'
1675            '        <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1676            '      </ns0:include>\n'
1677            '    </ns0:fallback>\n'
1678            '  </ns0:include>\n'
1679            '</div>') # C5
1680
1681    def test_xinclude_failures(self):
1682        from xml.etree import ElementInclude
1683
1684        # Test failure to locate included XML file.
1685        document = ET.XML(XINCLUDE["C1.xml"])
1686        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1687            ElementInclude.include(document, loader=self.none_loader)
1688        self.assertEqual(str(cm.exception),
1689                "cannot load 'disclaimer.xml' as 'xml'")
1690
1691        # Test failure to locate included text file.
1692        document = ET.XML(XINCLUDE["C2.xml"])
1693        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1694            ElementInclude.include(document, loader=self.none_loader)
1695        self.assertEqual(str(cm.exception),
1696                "cannot load 'count.txt' as 'text'")
1697
1698        # Test bad parse type.
1699        document = ET.XML(XINCLUDE_BAD["B1.xml"])
1700        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1701            ElementInclude.include(document, loader=self.none_loader)
1702        self.assertEqual(str(cm.exception),
1703                "unknown parse type in xi:include tag ('BAD_TYPE')")
1704
1705        # Test xi:fallback outside xi:include.
1706        document = ET.XML(XINCLUDE_BAD["B2.xml"])
1707        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1708            ElementInclude.include(document, loader=self.none_loader)
1709        self.assertEqual(str(cm.exception),
1710                "xi:fallback tag must be child of xi:include "
1711                "('{http://www.w3.org/2001/XInclude}fallback')")
1712
1713# --------------------------------------------------------------------
1714# reported bugs
1715
1716class BugsTest(unittest.TestCase):
1717
1718    def test_bug_xmltoolkit21(self):
1719        # marshaller gives obscure errors for non-string values
1720
1721        def check(elem):
1722            with self.assertRaises(TypeError) as cm:
1723                serialize(elem)
1724            self.assertEqual(str(cm.exception),
1725                    'cannot serialize 123 (type int)')
1726
1727        elem = ET.Element(123)
1728        check(elem) # tag
1729
1730        elem = ET.Element("elem")
1731        elem.text = 123
1732        check(elem) # text
1733
1734        elem = ET.Element("elem")
1735        elem.tail = 123
1736        check(elem) # tail
1737
1738        elem = ET.Element("elem")
1739        elem.set(123, "123")
1740        check(elem) # attribute key
1741
1742        elem = ET.Element("elem")
1743        elem.set("123", 123)
1744        check(elem) # attribute value
1745
1746    def test_bug_xmltoolkit25(self):
1747        # typo in ElementTree.findtext
1748
1749        elem = ET.XML(SAMPLE_XML)
1750        tree = ET.ElementTree(elem)
1751        self.assertEqual(tree.findtext("tag"), 'text')
1752        self.assertEqual(tree.findtext("section/tag"), 'subtext')
1753
1754    def test_bug_xmltoolkit28(self):
1755        # .//tag causes exceptions
1756
1757        tree = ET.XML("<doc><table><tbody/></table></doc>")
1758        self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1759        self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
1760
1761    def test_bug_xmltoolkitX1(self):
1762        # dump() doesn't flush the output buffer
1763
1764        tree = ET.XML("<doc><table><tbody/></table></doc>")
1765        with support.captured_stdout() as stdout:
1766            ET.dump(tree)
1767            self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
1768
1769    def test_bug_xmltoolkit39(self):
1770        # non-ascii element and attribute names doesn't work
1771
1772        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1773        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1774
1775        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1776                      b"<tag \xe4ttr='v&#228;lue' />")
1777        self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1778        self.assertEqual(ET.tostring(tree, "utf-8"),
1779                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1780
1781        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1782                      b'<t\xe4g>text</t\xe4g>')
1783        self.assertEqual(ET.tostring(tree, "utf-8"),
1784                b'<t\xc3\xa4g>text</t\xc3\xa4g>')
1785
1786        tree = ET.Element("t\u00e4g")
1787        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1788
1789        tree = ET.Element("tag")
1790        tree.set("\u00e4ttr", "v\u00e4lue")
1791        self.assertEqual(ET.tostring(tree, "utf-8"),
1792                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1793
1794    def test_bug_xmltoolkit54(self):
1795        # problems handling internally defined entities
1796
1797        e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
1798                   '<doc>&ldots;</doc>')
1799        self.assertEqual(serialize(e, encoding="us-ascii"),
1800                b'<doc>&#33328;</doc>')
1801        self.assertEqual(serialize(e), '<doc>\u8230</doc>')
1802
1803    def test_bug_xmltoolkit55(self):
1804        # make sure we're reporting the first error, not the last
1805
1806        with self.assertRaises(ET.ParseError) as cm:
1807            ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1808                   b'<doc>&ldots;&ndots;&rdots;</doc>')
1809        self.assertEqual(str(cm.exception),
1810                'undefined entity &ldots;: line 1, column 36')
1811
1812    def test_bug_xmltoolkit60(self):
1813        # Handle crash in stream source.
1814
1815        class ExceptionFile:
1816            def read(self, x):
1817                raise OSError
1818
1819        self.assertRaises(OSError, ET.parse, ExceptionFile())
1820
1821    def test_bug_xmltoolkit62(self):
1822        # Don't crash when using custom entities.
1823
1824        ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
1825        parser = ET.XMLParser()
1826        parser.entity.update(ENTITIES)
1827        parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
1828<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1829<patent-application-publication>
1830<subdoc-abstract>
1831<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1832</subdoc-abstract>
1833</patent-application-publication>""")
1834        t = parser.close()
1835        self.assertEqual(t.find('.//paragraph').text,
1836            'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
1837
1838    @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
1839    def test_bug_xmltoolkit63(self):
1840        # Check reference leak.
1841        def xmltoolkit63():
1842            tree = ET.TreeBuilder()
1843            tree.start("tag", {})
1844            tree.data("text")
1845            tree.end("tag")
1846
1847        xmltoolkit63()
1848        count = sys.getrefcount(None)
1849        for i in range(1000):
1850            xmltoolkit63()
1851        self.assertEqual(sys.getrefcount(None), count)
1852
1853    def test_bug_200708_newline(self):
1854        # Preserve newlines in attributes.
1855
1856        e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1857        self.assertEqual(ET.tostring(e),
1858                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
1859        self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1860                'def _f():\n  return 3\n')
1861        self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1862                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
1863
1864    def test_bug_200708_close(self):
1865        # Test default builder.
1866        parser = ET.XMLParser() # default
1867        parser.feed("<element>some text</element>")
1868        self.assertEqual(parser.close().tag, 'element')
1869
1870        # Test custom builder.
1871        class EchoTarget:
1872            def close(self):
1873                return ET.Element("element") # simulate root
1874        parser = ET.XMLParser(target=EchoTarget())
1875        parser.feed("<element>some text</element>")
1876        self.assertEqual(parser.close().tag, 'element')
1877
1878    def test_bug_200709_default_namespace(self):
1879        e = ET.Element("{default}elem")
1880        s = ET.SubElement(e, "{default}elem")
1881        self.assertEqual(serialize(e, default_namespace="default"), # 1
1882                '<elem xmlns="default"><elem /></elem>')
1883
1884        e = ET.Element("{default}elem")
1885        s = ET.SubElement(e, "{default}elem")
1886        s = ET.SubElement(e, "{not-default}elem")
1887        self.assertEqual(serialize(e, default_namespace="default"), # 2
1888            '<elem xmlns="default" xmlns:ns1="not-default">'
1889            '<elem />'
1890            '<ns1:elem />'
1891            '</elem>')
1892
1893        e = ET.Element("{default}elem")
1894        s = ET.SubElement(e, "{default}elem")
1895        s = ET.SubElement(e, "elem") # unprefixed name
1896        with self.assertRaises(ValueError) as cm:
1897            serialize(e, default_namespace="default") # 3
1898        self.assertEqual(str(cm.exception),
1899                'cannot use non-qualified names with default_namespace option')
1900
1901    def test_bug_200709_register_namespace(self):
1902        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1903        self.assertEqual(ET.tostring(e),
1904            b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1905        ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1906        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1907        self.assertEqual(ET.tostring(e),
1908            b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
1909
1910        # And the Dublin Core namespace is in the default list:
1911
1912        e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1913        self.assertEqual(ET.tostring(e),
1914            b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
1915
1916    def test_bug_200709_element_comment(self):
1917        # Not sure if this can be fixed, really (since the serializer needs
1918        # ET.Comment, not cET.comment).
1919
1920        a = ET.Element('a')
1921        a.append(ET.Comment('foo'))
1922        self.assertEqual(a[0].tag, ET.Comment)
1923
1924        a = ET.Element('a')
1925        a.append(ET.PI('foo'))
1926        self.assertEqual(a[0].tag, ET.PI)
1927
1928    def test_bug_200709_element_insert(self):
1929        a = ET.Element('a')
1930        b = ET.SubElement(a, 'b')
1931        c = ET.SubElement(a, 'c')
1932        d = ET.Element('d')
1933        a.insert(0, d)
1934        self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1935        a.insert(-1, d)
1936        self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
1937
1938    def test_bug_200709_iter_comment(self):
1939        a = ET.Element('a')
1940        b = ET.SubElement(a, 'b')
1941        comment_b = ET.Comment("TEST-b")
1942        b.append(comment_b)
1943        self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
1944
1945    # --------------------------------------------------------------------
1946    # reported on bugs.python.org
1947
1948    def test_bug_1534630(self):
1949        bob = ET.TreeBuilder()
1950        e = bob.data("data")
1951        e = bob.start("tag", {})
1952        e = bob.end("tag")
1953        e = bob.close()
1954        self.assertEqual(serialize(e), '<tag />')
1955
1956    def test_issue6233(self):
1957        e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1958                   b'<body>t\xc3\xa3g</body>')
1959        self.assertEqual(ET.tostring(e, 'ascii'),
1960                b"<?xml version='1.0' encoding='ascii'?>\n"
1961                b'<body>t&#227;g</body>')
1962        e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1963                   b'<body>t\xe3g</body>')
1964        self.assertEqual(ET.tostring(e, 'ascii'),
1965                b"<?xml version='1.0' encoding='ascii'?>\n"
1966                b'<body>t&#227;g</body>')
1967
1968    def test_issue3151(self):
1969        e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1970        self.assertEqual(e.tag, '{${stuff}}localname')
1971        t = ET.ElementTree(e)
1972        self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
1973
1974    def test_issue6565(self):
1975        elem = ET.XML("<body><tag/></body>")
1976        self.assertEqual(summarize_list(elem), ['tag'])
1977        newelem = ET.XML(SAMPLE_XML)
1978        elem[:] = newelem[:]
1979        self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
1980
1981    def test_issue10777(self):
1982        # Registering a namespace twice caused a "dictionary changed size during
1983        # iteration" bug.
1984
1985        ET.register_namespace('test10777', 'http://myuri/')
1986        ET.register_namespace('test10777', 'http://myuri/')
1987
1988    def test_lost_text(self):
1989        # Issue #25902: Borrowed text can disappear
1990        class Text:
1991            def __bool__(self):
1992                e.text = 'changed'
1993                return True
1994
1995        e = ET.Element('tag')
1996        e.text = Text()
1997        i = e.itertext()
1998        t = next(i)
1999        self.assertIsInstance(t, Text)
2000        self.assertIsInstance(e.text, str)
2001        self.assertEqual(e.text, 'changed')
2002
2003    def test_lost_tail(self):
2004        # Issue #25902: Borrowed tail can disappear
2005        class Text:
2006            def __bool__(self):
2007                e[0].tail = 'changed'
2008                return True
2009
2010        e = ET.Element('root')
2011        e.append(ET.Element('tag'))
2012        e[0].tail = Text()
2013        i = e.itertext()
2014        t = next(i)
2015        self.assertIsInstance(t, Text)
2016        self.assertIsInstance(e[0].tail, str)
2017        self.assertEqual(e[0].tail, 'changed')
2018
2019    def test_lost_elem(self):
2020        # Issue #25902: Borrowed element can disappear
2021        class Tag:
2022            def __eq__(self, other):
2023                e[0] = ET.Element('changed')
2024                next(i)
2025                return True
2026
2027        e = ET.Element('root')
2028        e.append(ET.Element(Tag()))
2029        e.append(ET.Element('tag'))
2030        i = e.iter('tag')
2031        try:
2032            t = next(i)
2033        except ValueError:
2034            self.skipTest('generators are not reentrant')
2035        self.assertIsInstance(t.tag, Tag)
2036        self.assertIsInstance(e[0].tag, str)
2037        self.assertEqual(e[0].tag, 'changed')
2038
2039    def check_expat224_utf8_bug(self, text):
2040        xml = b'<a b="%s"/>' % text
2041        root = ET.XML(xml)
2042        self.assertEqual(root.get('b'), text.decode('utf-8'))
2043
2044    def test_expat224_utf8_bug(self):
2045        # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
2046        # Check that Expat 2.2.4 fixed the bug.
2047        #
2048        # Test buffer bounds at odd and even positions.
2049
2050        text = b'\xc3\xa0' * 1024
2051        self.check_expat224_utf8_bug(text)
2052
2053        text = b'x' + b'\xc3\xa0' * 1024
2054        self.check_expat224_utf8_bug(text)
2055
2056    def test_expat224_utf8_bug_file(self):
2057        with open(UTF8_BUG_XMLFILE, 'rb') as fp:
2058            raw = fp.read()
2059        root = ET.fromstring(raw)
2060        xmlattr = root.get('b')
2061
2062        # "Parse" manually the XML file to extract the value of the 'b'
2063        # attribute of the <a b='xxx' /> XML element
2064        text = raw.decode('utf-8').strip()
2065        text = text.replace('\r\n', ' ')
2066        text = text[6:-4]
2067        self.assertEqual(root.get('b'), text)
2068
2069
2070
2071# --------------------------------------------------------------------
2072
2073
2074class BasicElementTest(ElementTestCase, unittest.TestCase):
2075
2076    def test___init__(self):
2077        tag = "foo"
2078        attrib = { "zix": "wyp" }
2079
2080        element_foo = ET.Element(tag, attrib)
2081
2082        # traits of an element
2083        self.assertIsInstance(element_foo, ET.Element)
2084        self.assertIn("tag", dir(element_foo))
2085        self.assertIn("attrib", dir(element_foo))
2086        self.assertIn("text", dir(element_foo))
2087        self.assertIn("tail", dir(element_foo))
2088
2089        # string attributes have expected values
2090        self.assertEqual(element_foo.tag, tag)
2091        self.assertIsNone(element_foo.text)
2092        self.assertIsNone(element_foo.tail)
2093
2094        # attrib is a copy
2095        self.assertIsNot(element_foo.attrib, attrib)
2096        self.assertEqual(element_foo.attrib, attrib)
2097
2098        # attrib isn't linked
2099        attrib["bar"] = "baz"
2100        self.assertIsNot(element_foo.attrib, attrib)
2101        self.assertNotEqual(element_foo.attrib, attrib)
2102
2103    def test___copy__(self):
2104        element_foo = ET.Element("foo", { "zix": "wyp" })
2105        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2106
2107        element_foo2 = copy.copy(element_foo)
2108
2109        # elements are not the same
2110        self.assertIsNot(element_foo2, element_foo)
2111
2112        # string attributes are equal
2113        self.assertEqual(element_foo2.tag, element_foo.tag)
2114        self.assertEqual(element_foo2.text, element_foo.text)
2115        self.assertEqual(element_foo2.tail, element_foo.tail)
2116
2117        # number of children is the same
2118        self.assertEqual(len(element_foo2), len(element_foo))
2119
2120        # children are the same
2121        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2122            self.assertIs(child1, child2)
2123
2124        # attrib is a copy
2125        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2126
2127    def test___deepcopy__(self):
2128        element_foo = ET.Element("foo", { "zix": "wyp" })
2129        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2130
2131        element_foo2 = copy.deepcopy(element_foo)
2132
2133        # elements are not the same
2134        self.assertIsNot(element_foo2, element_foo)
2135
2136        # string attributes are equal
2137        self.assertEqual(element_foo2.tag, element_foo.tag)
2138        self.assertEqual(element_foo2.text, element_foo.text)
2139        self.assertEqual(element_foo2.tail, element_foo.tail)
2140
2141        # number of children is the same
2142        self.assertEqual(len(element_foo2), len(element_foo))
2143
2144        # children are not the same
2145        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2146            self.assertIsNot(child1, child2)
2147
2148        # attrib is a copy
2149        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2150        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2151
2152        # attrib isn't linked
2153        element_foo.attrib["bar"] = "baz"
2154        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2155        self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
2156
2157    def test_augmentation_type_errors(self):
2158        e = ET.Element('joe')
2159        self.assertRaises(TypeError, e.append, 'b')
2160        self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
2161        self.assertRaises(TypeError, e.insert, 0, 'foo')
2162        e[:] = [ET.Element('bar')]
2163        with self.assertRaises(TypeError):
2164            e[0] = 'foo'
2165        with self.assertRaises(TypeError):
2166            e[:] = [ET.Element('bar'), 'foo']
2167
2168        if hasattr(e, '__setstate__'):
2169            state = {
2170                'tag': 'tag',
2171                '_children': [None],  # non-Element
2172                'attrib': 'attr',
2173                'tail': 'tail',
2174                'text': 'text',
2175            }
2176            self.assertRaises(TypeError, e.__setstate__, state)
2177
2178        if hasattr(e, '__deepcopy__'):
2179            class E(ET.Element):
2180                def __deepcopy__(self, memo):
2181                    return None  # non-Element
2182            e[:] = [E('bar')]
2183            self.assertRaises(TypeError, copy.deepcopy, e)
2184
2185    def test_cyclic_gc(self):
2186        class Dummy:
2187            pass
2188
2189        # Test the shortest cycle: d->element->d
2190        d = Dummy()
2191        d.dummyref = ET.Element('joe', attr=d)
2192        wref = weakref.ref(d)
2193        del d
2194        gc_collect()
2195        self.assertIsNone(wref())
2196
2197        # A longer cycle: d->e->e2->d
2198        e = ET.Element('joe')
2199        d = Dummy()
2200        d.dummyref = e
2201        wref = weakref.ref(d)
2202        e2 = ET.SubElement(e, 'foo', attr=d)
2203        del d, e, e2
2204        gc_collect()
2205        self.assertIsNone(wref())
2206
2207        # A cycle between Element objects as children of one another
2208        # e1->e2->e3->e1
2209        e1 = ET.Element('e1')
2210        e2 = ET.Element('e2')
2211        e3 = ET.Element('e3')
2212        e3.append(e1)
2213        e2.append(e3)
2214        e1.append(e2)
2215        wref = weakref.ref(e1)
2216        del e1, e2, e3
2217        gc_collect()
2218        self.assertIsNone(wref())
2219
2220    def test_weakref(self):
2221        flag = False
2222        def wref_cb(w):
2223            nonlocal flag
2224            flag = True
2225        e = ET.Element('e')
2226        wref = weakref.ref(e, wref_cb)
2227        self.assertEqual(wref().tag, 'e')
2228        del e
2229        self.assertEqual(flag, True)
2230        self.assertEqual(wref(), None)
2231
2232    def test_get_keyword_args(self):
2233        e1 = ET.Element('foo' , x=1, y=2, z=3)
2234        self.assertEqual(e1.get('x', default=7), 1)
2235        self.assertEqual(e1.get('w', default=7), 7)
2236
2237    def test_pickle(self):
2238        # issue #16076: the C implementation wasn't pickleable.
2239        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2240            for dumper, loader in product(self.modules, repeat=2):
2241                e = dumper.Element('foo', bar=42)
2242                e.text = "text goes here"
2243                e.tail = "opposite of head"
2244                dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
2245                e.append(dumper.Element('child'))
2246                e.findall('.//grandchild')[0].set('attr', 'other value')
2247
2248                e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
2249                                          dumper, loader, proto)
2250
2251                self.assertEqual(e2.tag, 'foo')
2252                self.assertEqual(e2.attrib['bar'], 42)
2253                self.assertEqual(len(e2), 2)
2254                self.assertEqualElements(e, e2)
2255
2256    def test_pickle_issue18997(self):
2257        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2258            for dumper, loader in product(self.modules, repeat=2):
2259                XMLTEXT = """<?xml version="1.0"?>
2260                    <group><dogs>4</dogs>
2261                    </group>"""
2262                e1 = dumper.fromstring(XMLTEXT)
2263                if hasattr(e1, '__getstate__'):
2264                    self.assertEqual(e1.__getstate__()['tag'], 'group')
2265                e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
2266                                          dumper, loader, proto)
2267                self.assertEqual(e2.tag, 'group')
2268                self.assertEqual(e2[0].tag, 'dogs')
2269
2270
2271class BadElementTest(ElementTestCase, unittest.TestCase):
2272    def test_extend_mutable_list(self):
2273        class X:
2274            @property
2275            def __class__(self):
2276                L[:] = [ET.Element('baz')]
2277                return ET.Element
2278        L = [X()]
2279        e = ET.Element('foo')
2280        try:
2281            e.extend(L)
2282        except TypeError:
2283            pass
2284
2285        class Y(X, ET.Element):
2286            pass
2287        L = [Y('x')]
2288        e = ET.Element('foo')
2289        e.extend(L)
2290
2291    def test_extend_mutable_list2(self):
2292        class X:
2293            @property
2294            def __class__(self):
2295                del L[:]
2296                return ET.Element
2297        L = [X(), ET.Element('baz')]
2298        e = ET.Element('foo')
2299        try:
2300            e.extend(L)
2301        except TypeError:
2302            pass
2303
2304        class Y(X, ET.Element):
2305            pass
2306        L = [Y('bar'), ET.Element('baz')]
2307        e = ET.Element('foo')
2308        e.extend(L)
2309
2310    def test_remove_with_mutating(self):
2311        class X(ET.Element):
2312            def __eq__(self, o):
2313                del e[:]
2314                return False
2315        e = ET.Element('foo')
2316        e.extend([X('bar')])
2317        self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2318
2319        e = ET.Element('foo')
2320        e.extend([ET.Element('bar')])
2321        self.assertRaises(ValueError, e.remove, X('baz'))
2322
2323    def test_recursive_repr(self):
2324        # Issue #25455
2325        e = ET.Element('foo')
2326        with swap_attr(e, 'tag', e):
2327            with self.assertRaises(RuntimeError):
2328                repr(e)  # Should not crash
2329
2330    def test_element_get_text(self):
2331        # Issue #27863
2332        class X(str):
2333            def __del__(self):
2334                try:
2335                    elem.text
2336                except NameError:
2337                    pass
2338
2339        b = ET.TreeBuilder()
2340        b.start('tag', {})
2341        b.data('ABCD')
2342        b.data(X('EFGH'))
2343        b.data('IJKL')
2344        b.end('tag')
2345
2346        elem = b.close()
2347        self.assertEqual(elem.text, 'ABCDEFGHIJKL')
2348
2349    def test_element_get_tail(self):
2350        # Issue #27863
2351        class X(str):
2352            def __del__(self):
2353                try:
2354                    elem[0].tail
2355                except NameError:
2356                    pass
2357
2358        b = ET.TreeBuilder()
2359        b.start('root', {})
2360        b.start('tag', {})
2361        b.end('tag')
2362        b.data('ABCD')
2363        b.data(X('EFGH'))
2364        b.data('IJKL')
2365        b.end('root')
2366
2367        elem = b.close()
2368        self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
2369
2370    def test_subscr(self):
2371        # Issue #27863
2372        class X:
2373            def __index__(self):
2374                del e[:]
2375                return 1
2376
2377        e = ET.Element('elem')
2378        e.append(ET.Element('child'))
2379        e[:X()]  # shouldn't crash
2380
2381        e.append(ET.Element('child'))
2382        e[0:10:X()]  # shouldn't crash
2383
2384    def test_ass_subscr(self):
2385        # Issue #27863
2386        class X:
2387            def __index__(self):
2388                e[:] = []
2389                return 1
2390
2391        e = ET.Element('elem')
2392        for _ in range(10):
2393            e.insert(0, ET.Element('child'))
2394
2395        e[0:10:X()] = []  # shouldn't crash
2396
2397    def test_treebuilder_start(self):
2398        # Issue #27863
2399        def element_factory(x, y):
2400            return []
2401        b = ET.TreeBuilder(element_factory=element_factory)
2402
2403        b.start('tag', {})
2404        b.data('ABCD')
2405        self.assertRaises(AttributeError, b.start, 'tag2', {})
2406        del b
2407        gc_collect()
2408
2409    def test_treebuilder_end(self):
2410        # Issue #27863
2411        def element_factory(x, y):
2412            return []
2413        b = ET.TreeBuilder(element_factory=element_factory)
2414
2415        b.start('tag', {})
2416        b.data('ABCD')
2417        self.assertRaises(AttributeError, b.end, 'tag')
2418        del b
2419        gc_collect()
2420
2421
2422class MutatingElementPath(str):
2423    def __new__(cls, elem, *args):
2424        self = str.__new__(cls, *args)
2425        self.elem = elem
2426        return self
2427    def __eq__(self, o):
2428        del self.elem[:]
2429        return True
2430MutatingElementPath.__hash__ = str.__hash__
2431
2432class BadElementPath(str):
2433    def __eq__(self, o):
2434        raise 1/0
2435BadElementPath.__hash__ = str.__hash__
2436
2437class BadElementPathTest(ElementTestCase, unittest.TestCase):
2438    def setUp(self):
2439        super().setUp()
2440        from xml.etree import ElementPath
2441        self.path_cache = ElementPath._cache
2442        ElementPath._cache = {}
2443
2444    def tearDown(self):
2445        from xml.etree import ElementPath
2446        ElementPath._cache = self.path_cache
2447        super().tearDown()
2448
2449    def test_find_with_mutating(self):
2450        e = ET.Element('foo')
2451        e.extend([ET.Element('bar')])
2452        e.find(MutatingElementPath(e, 'x'))
2453
2454    def test_find_with_error(self):
2455        e = ET.Element('foo')
2456        e.extend([ET.Element('bar')])
2457        try:
2458            e.find(BadElementPath('x'))
2459        except ZeroDivisionError:
2460            pass
2461
2462    def test_findtext_with_mutating(self):
2463        e = ET.Element('foo')
2464        e.extend([ET.Element('bar')])
2465        e.findtext(MutatingElementPath(e, 'x'))
2466
2467    def test_findtext_with_error(self):
2468        e = ET.Element('foo')
2469        e.extend([ET.Element('bar')])
2470        try:
2471            e.findtext(BadElementPath('x'))
2472        except ZeroDivisionError:
2473            pass
2474
2475    def test_findall_with_mutating(self):
2476        e = ET.Element('foo')
2477        e.extend([ET.Element('bar')])
2478        e.findall(MutatingElementPath(e, 'x'))
2479
2480    def test_findall_with_error(self):
2481        e = ET.Element('foo')
2482        e.extend([ET.Element('bar')])
2483        try:
2484            e.findall(BadElementPath('x'))
2485        except ZeroDivisionError:
2486            pass
2487
2488
2489class ElementTreeTypeTest(unittest.TestCase):
2490    def test_istype(self):
2491        self.assertIsInstance(ET.ParseError, type)
2492        self.assertIsInstance(ET.QName, type)
2493        self.assertIsInstance(ET.ElementTree, type)
2494        self.assertIsInstance(ET.Element, type)
2495        self.assertIsInstance(ET.TreeBuilder, type)
2496        self.assertIsInstance(ET.XMLParser, type)
2497
2498    def test_Element_subclass_trivial(self):
2499        class MyElement(ET.Element):
2500            pass
2501
2502        mye = MyElement('foo')
2503        self.assertIsInstance(mye, ET.Element)
2504        self.assertIsInstance(mye, MyElement)
2505        self.assertEqual(mye.tag, 'foo')
2506
2507        # test that attribute assignment works (issue 14849)
2508        mye.text = "joe"
2509        self.assertEqual(mye.text, "joe")
2510
2511    def test_Element_subclass_constructor(self):
2512        class MyElement(ET.Element):
2513            def __init__(self, tag, attrib={}, **extra):
2514                super(MyElement, self).__init__(tag + '__', attrib, **extra)
2515
2516        mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2517        self.assertEqual(mye.tag, 'foo__')
2518        self.assertEqual(sorted(mye.items()),
2519            [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2520
2521    def test_Element_subclass_new_method(self):
2522        class MyElement(ET.Element):
2523            def newmethod(self):
2524                return self.tag
2525
2526        mye = MyElement('joe')
2527        self.assertEqual(mye.newmethod(), 'joe')
2528
2529    def test_Element_subclass_find(self):
2530        class MyElement(ET.Element):
2531            pass
2532
2533        e = ET.Element('foo')
2534        e.text = 'text'
2535        sub = MyElement('bar')
2536        sub.text = 'subtext'
2537        e.append(sub)
2538        self.assertEqual(e.findtext('bar'), 'subtext')
2539        self.assertEqual(e.find('bar').tag, 'bar')
2540        found = list(e.findall('bar'))
2541        self.assertEqual(len(found), 1, found)
2542        self.assertEqual(found[0].tag, 'bar')
2543
2544
2545class ElementFindTest(unittest.TestCase):
2546    def test_find_simple(self):
2547        e = ET.XML(SAMPLE_XML)
2548        self.assertEqual(e.find('tag').tag, 'tag')
2549        self.assertEqual(e.find('section/tag').tag, 'tag')
2550        self.assertEqual(e.find('./tag').tag, 'tag')
2551
2552        e[2] = ET.XML(SAMPLE_SECTION)
2553        self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2554
2555        self.assertEqual(e.findtext('./tag'), 'text')
2556        self.assertEqual(e.findtext('section/tag'), 'subtext')
2557
2558        # section/nexttag is found but has no text
2559        self.assertEqual(e.findtext('section/nexttag'), '')
2560        self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2561
2562        # tog doesn't exist and 'default' kicks in
2563        self.assertIsNone(e.findtext('tog'))
2564        self.assertEqual(e.findtext('tog', 'default'), 'default')
2565
2566        # Issue #16922
2567        self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2568
2569    def test_find_xpath(self):
2570        LINEAR_XML = '''
2571        <body>
2572            <tag class='a'/>
2573            <tag class='b'/>
2574            <tag class='c'/>
2575            <tag class='d'/>
2576        </body>'''
2577        e = ET.XML(LINEAR_XML)
2578
2579        # Test for numeric indexing and last()
2580        self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2581        self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2582        self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2583        self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2584        self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2585
2586        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2587        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2588        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2589        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2590
2591    def test_findall(self):
2592        e = ET.XML(SAMPLE_XML)
2593        e[2] = ET.XML(SAMPLE_SECTION)
2594        self.assertEqual(summarize_list(e.findall('.')), ['body'])
2595        self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2596        self.assertEqual(summarize_list(e.findall('tog')), [])
2597        self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2598        self.assertEqual(summarize_list(e.findall('*')),
2599            ['tag', 'tag', 'section'])
2600        self.assertEqual(summarize_list(e.findall('.//tag')),
2601            ['tag'] * 4)
2602        self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2603        self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2604        self.assertEqual(summarize_list(e.findall('section/*')),
2605            ['tag', 'nexttag', 'nextsection'])
2606        self.assertEqual(summarize_list(e.findall('section//*')),
2607            ['tag', 'nexttag', 'nextsection', 'tag'])
2608        self.assertEqual(summarize_list(e.findall('section/.//*')),
2609            ['tag', 'nexttag', 'nextsection', 'tag'])
2610        self.assertEqual(summarize_list(e.findall('*/*')),
2611            ['tag', 'nexttag', 'nextsection'])
2612        self.assertEqual(summarize_list(e.findall('*//*')),
2613            ['tag', 'nexttag', 'nextsection', 'tag'])
2614        self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
2615        self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
2616        self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
2617        self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
2618
2619        self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
2620            ['tag'] * 3)
2621        self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
2622            ['tag'])
2623        self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
2624            ['tag'] * 2)
2625        self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
2626            ['tag'])
2627        self.assertEqual(summarize_list(e.findall('.//section[tag]')),
2628            ['section'])
2629        self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
2630        self.assertEqual(summarize_list(e.findall('../tag')), [])
2631        self.assertEqual(summarize_list(e.findall('section/../tag')),
2632            ['tag'] * 2)
2633        self.assertEqual(e.findall('section//'), e.findall('section//*'))
2634
2635        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2636            ['section'])
2637        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2638            ['section'])
2639        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2640            ['section'])
2641        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2642            ['section'])
2643        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2644            ['section'])
2645
2646        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2647                         ['tag'])
2648        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2649                         ['tag'])
2650        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2651                         ['tag'])
2652        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2653                         ['tag'])
2654        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2655                         ['tag'])
2656        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2657                         [])
2658        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2659                         [])
2660
2661        # duplicate section => 2x tag matches
2662        e[1] = e[2]
2663        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2664                         ['section', 'section'])
2665        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2666                         ['tag', 'tag'])
2667
2668    def test_test_find_with_ns(self):
2669        e = ET.XML(SAMPLE_XML_NS)
2670        self.assertEqual(summarize_list(e.findall('tag')), [])
2671        self.assertEqual(
2672            summarize_list(e.findall("{http://effbot.org/ns}tag")),
2673            ['{http://effbot.org/ns}tag'] * 2)
2674        self.assertEqual(
2675            summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
2676            ['{http://effbot.org/ns}tag'] * 3)
2677
2678    def test_findall_different_nsmaps(self):
2679        root = ET.XML('''
2680            <a xmlns:x="X" xmlns:y="Y">
2681                <x:b><c/></x:b>
2682                <b/>
2683                <c><x:b/><b/></c><y:b/>
2684            </a>''')
2685        nsmap = {'xx': 'X'}
2686        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2687        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2688        nsmap = {'xx': 'Y'}
2689        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2690        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2691        nsmap = {'xx': 'X', '': 'Y'}
2692        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2693        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
2694
2695    def test_findall_wildcard(self):
2696        root = ET.XML('''
2697            <a xmlns:x="X" xmlns:y="Y">
2698                <x:b><c/></x:b>
2699                <b/>
2700                <c><x:b/><b/></c><y:b/>
2701            </a>''')
2702        root.append(ET.Comment('test'))
2703
2704        self.assertEqual(summarize_list(root.findall("{*}b")),
2705                         ['{X}b', 'b', '{Y}b'])
2706        self.assertEqual(summarize_list(root.findall("{*}c")),
2707                         ['c'])
2708        self.assertEqual(summarize_list(root.findall("{X}*")),
2709                         ['{X}b'])
2710        self.assertEqual(summarize_list(root.findall("{Y}*")),
2711                         ['{Y}b'])
2712        self.assertEqual(summarize_list(root.findall("{}*")),
2713                         ['b', 'c'])
2714        self.assertEqual(summarize_list(root.findall("{}b")),  # only for consistency
2715                         ['b'])
2716        self.assertEqual(summarize_list(root.findall("{}b")),
2717                         summarize_list(root.findall("b")))
2718        self.assertEqual(summarize_list(root.findall("{*}*")),
2719                         ['{X}b', 'b', 'c', '{Y}b'])
2720        # This is an unfortunate difference, but that's how find('*') works.
2721        self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
2722                         summarize_list(root.findall("*")))
2723
2724        self.assertEqual(summarize_list(root.findall(".//{*}b")),
2725                         ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
2726        self.assertEqual(summarize_list(root.findall(".//{*}c")),
2727                         ['c', 'c'])
2728        self.assertEqual(summarize_list(root.findall(".//{X}*")),
2729                         ['{X}b', '{X}b'])
2730        self.assertEqual(summarize_list(root.findall(".//{Y}*")),
2731                         ['{Y}b'])
2732        self.assertEqual(summarize_list(root.findall(".//{}*")),
2733                         ['c', 'b', 'c', 'b'])
2734        self.assertEqual(summarize_list(root.findall(".//{}b")),  # only for consistency
2735                         ['b', 'b'])
2736        self.assertEqual(summarize_list(root.findall(".//{}b")),
2737                         summarize_list(root.findall(".//b")))
2738
2739    def test_bad_find(self):
2740        e = ET.XML(SAMPLE_XML)
2741        with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
2742            e.findall('/tag')
2743
2744    def test_find_through_ElementTree(self):
2745        e = ET.XML(SAMPLE_XML)
2746        self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
2747        self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
2748        self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
2749            ['tag'] * 2)
2750        # this produces a warning
2751        msg = ("This search is broken in 1.3 and earlier, and will be fixed "
2752               "in a future version.  If you rely on the current behaviour, "
2753               "change it to '.+'")
2754        with self.assertWarnsRegex(FutureWarning, msg):
2755            it = ET.ElementTree(e).findall('//tag')
2756        self.assertEqual(summarize_list(it), ['tag'] * 3)
2757
2758
2759class ElementIterTest(unittest.TestCase):
2760    def _ilist(self, elem, tag=None):
2761        return summarize_list(elem.iter(tag))
2762
2763    def test_basic(self):
2764        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
2765        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
2766        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
2767        self.assertEqual(next(doc.iter()).tag, 'html')
2768        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
2769        self.assertEqual(''.join(doc.find('body').itertext()),
2770            'this is a paragraph.')
2771        self.assertEqual(next(doc.itertext()), 'this is a ')
2772
2773        # iterparse should return an iterator
2774        sourcefile = serialize(doc, to_string=False)
2775        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
2776
2777        # With an explicit parser too (issue #9708)
2778        sourcefile = serialize(doc, to_string=False)
2779        parser = ET.XMLParser(target=ET.TreeBuilder())
2780        self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
2781                         'end')
2782
2783        tree = ET.ElementTree(None)
2784        self.assertRaises(AttributeError, tree.iter)
2785
2786        # Issue #16913
2787        doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
2788        self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
2789
2790    def test_corners(self):
2791        # single root, no subelements
2792        a = ET.Element('a')
2793        self.assertEqual(self._ilist(a), ['a'])
2794
2795        # one child
2796        b = ET.SubElement(a, 'b')
2797        self.assertEqual(self._ilist(a), ['a', 'b'])
2798
2799        # one child and one grandchild
2800        c = ET.SubElement(b, 'c')
2801        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
2802
2803        # two children, only first with grandchild
2804        d = ET.SubElement(a, 'd')
2805        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
2806
2807        # replace first child by second
2808        a[0] = a[1]
2809        del a[1]
2810        self.assertEqual(self._ilist(a), ['a', 'd'])
2811
2812    def test_iter_by_tag(self):
2813        doc = ET.XML('''
2814            <document>
2815                <house>
2816                    <room>bedroom1</room>
2817                    <room>bedroom2</room>
2818                </house>
2819                <shed>nothing here
2820                </shed>
2821                <house>
2822                    <room>bedroom8</room>
2823                </house>
2824            </document>''')
2825
2826        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
2827        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
2828
2829        # test that iter also accepts 'tag' as a keyword arg
2830        self.assertEqual(
2831            summarize_list(doc.iter(tag='room')),
2832            ['room'] * 3)
2833
2834        # make sure both tag=None and tag='*' return all tags
2835        all_tags = ['document', 'house', 'room', 'room',
2836                    'shed', 'house', 'room']
2837        self.assertEqual(summarize_list(doc.iter()), all_tags)
2838        self.assertEqual(self._ilist(doc), all_tags)
2839        self.assertEqual(self._ilist(doc, '*'), all_tags)
2840
2841    # Element.getiterator() is deprecated.
2842    @checkwarnings(("This method will be removed in future versions.  "
2843                    "Use .+ instead.", DeprecationWarning))
2844    def test_getiterator(self):
2845        doc = ET.XML('''
2846            <document>
2847                <house>
2848                    <room>bedroom1</room>
2849                    <room>bedroom2</room>
2850                </house>
2851                <shed>nothing here
2852                </shed>
2853                <house>
2854                    <room>bedroom8</room>
2855                </house>
2856            </document>''')
2857
2858        self.assertEqual(summarize_list(doc.getiterator('room')),
2859                         ['room'] * 3)
2860        self.assertEqual(summarize_list(doc.getiterator('house')),
2861                         ['house'] * 2)
2862
2863        # test that getiterator also accepts 'tag' as a keyword arg
2864        self.assertEqual(
2865            summarize_list(doc.getiterator(tag='room')),
2866            ['room'] * 3)
2867
2868        # make sure both tag=None and tag='*' return all tags
2869        all_tags = ['document', 'house', 'room', 'room',
2870                    'shed', 'house', 'room']
2871        self.assertEqual(summarize_list(doc.getiterator()), all_tags)
2872        self.assertEqual(summarize_list(doc.getiterator(None)), all_tags)
2873        self.assertEqual(summarize_list(doc.getiterator('*')), all_tags)
2874
2875    def test_copy(self):
2876        a = ET.Element('a')
2877        it = a.iter()
2878        with self.assertRaises(TypeError):
2879            copy.copy(it)
2880
2881    def test_pickle(self):
2882        a = ET.Element('a')
2883        it = a.iter()
2884        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2885            with self.assertRaises((TypeError, pickle.PicklingError)):
2886                pickle.dumps(it, proto)
2887
2888
2889class TreeBuilderTest(unittest.TestCase):
2890    sample1 = ('<!DOCTYPE html PUBLIC'
2891        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2892        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2893        '<html>text<div>subtext</div>tail</html>')
2894
2895    sample2 = '''<toplevel>sometext</toplevel>'''
2896
2897    def _check_sample1_element(self, e):
2898        self.assertEqual(e.tag, 'html')
2899        self.assertEqual(e.text, 'text')
2900        self.assertEqual(e.tail, None)
2901        self.assertEqual(e.attrib, {})
2902        children = list(e)
2903        self.assertEqual(len(children), 1)
2904        child = children[0]
2905        self.assertEqual(child.tag, 'div')
2906        self.assertEqual(child.text, 'subtext')
2907        self.assertEqual(child.tail, 'tail')
2908        self.assertEqual(child.attrib, {})
2909
2910    def test_dummy_builder(self):
2911        class BaseDummyBuilder:
2912            def close(self):
2913                return 42
2914
2915        class DummyBuilder(BaseDummyBuilder):
2916            data = start = end = lambda *a: None
2917
2918        parser = ET.XMLParser(target=DummyBuilder())
2919        parser.feed(self.sample1)
2920        self.assertEqual(parser.close(), 42)
2921
2922        parser = ET.XMLParser(target=BaseDummyBuilder())
2923        parser.feed(self.sample1)
2924        self.assertEqual(parser.close(), 42)
2925
2926        parser = ET.XMLParser(target=object())
2927        parser.feed(self.sample1)
2928        self.assertIsNone(parser.close())
2929
2930    def test_treebuilder_comment(self):
2931        b = ET.TreeBuilder()
2932        self.assertEqual(b.comment('ctext').tag, ET.Comment)
2933        self.assertEqual(b.comment('ctext').text, 'ctext')
2934
2935        b = ET.TreeBuilder(comment_factory=ET.Comment)
2936        self.assertEqual(b.comment('ctext').tag, ET.Comment)
2937        self.assertEqual(b.comment('ctext').text, 'ctext')
2938
2939        b = ET.TreeBuilder(comment_factory=len)
2940        self.assertEqual(b.comment('ctext'), len('ctext'))
2941
2942    def test_treebuilder_pi(self):
2943        b = ET.TreeBuilder()
2944        self.assertEqual(b.pi('target', None).tag, ET.PI)
2945        self.assertEqual(b.pi('target', None).text, 'target')
2946
2947        b = ET.TreeBuilder(pi_factory=ET.PI)
2948        self.assertEqual(b.pi('target').tag, ET.PI)
2949        self.assertEqual(b.pi('target').text, "target")
2950        self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
2951        self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget  text ")
2952
2953        b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
2954        self.assertEqual(b.pi('target'), (len('target'), None))
2955        self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
2956
2957    def test_late_tail(self):
2958        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
2959        class TreeBuilderSubclass(ET.TreeBuilder):
2960            pass
2961
2962        xml = "<a>text<!-- comment -->tail</a>"
2963        a = ET.fromstring(xml)
2964        self.assertEqual(a.text, "texttail")
2965
2966        parser = ET.XMLParser(target=TreeBuilderSubclass())
2967        parser.feed(xml)
2968        a = parser.close()
2969        self.assertEqual(a.text, "texttail")
2970
2971        xml = "<a>text<?pi data?>tail</a>"
2972        a = ET.fromstring(xml)
2973        self.assertEqual(a.text, "texttail")
2974
2975        xml = "<a>text<?pi data?>tail</a>"
2976        parser = ET.XMLParser(target=TreeBuilderSubclass())
2977        parser.feed(xml)
2978        a = parser.close()
2979        self.assertEqual(a.text, "texttail")
2980
2981    def test_late_tail_mix_pi_comments(self):
2982        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
2983        # Test appending tails to comments/pis.
2984        class TreeBuilderSubclass(ET.TreeBuilder):
2985            pass
2986
2987        xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
2988        parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
2989        parser.feed(xml)
2990        a = parser.close()
2991        self.assertEqual(a[0].text, ' comment ')
2992        self.assertEqual(a[0].tail, '\ntail')
2993        self.assertEqual(a.text, "text ")
2994
2995        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
2996        parser.feed(xml)
2997        a = parser.close()
2998        self.assertEqual(a[0].text, ' comment ')
2999        self.assertEqual(a[0].tail, '\ntail')
3000        self.assertEqual(a.text, "text ")
3001
3002        xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3003        parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3004        parser.feed(xml)
3005        a = parser.close()
3006        self.assertEqual(a[0].text, 'pi data')
3007        self.assertEqual(a[0].tail, 'tail')
3008        self.assertEqual(a.text, "text\n")
3009
3010        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3011        parser.feed(xml)
3012        a = parser.close()
3013        self.assertEqual(a[0].text, 'pi data')
3014        self.assertEqual(a[0].tail, 'tail')
3015        self.assertEqual(a.text, "text\n")
3016
3017    def test_treebuilder_elementfactory_none(self):
3018        parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
3019        parser.feed(self.sample1)
3020        e = parser.close()
3021        self._check_sample1_element(e)
3022
3023    def test_subclass(self):
3024        class MyTreeBuilder(ET.TreeBuilder):
3025            def foobar(self, x):
3026                return x * 2
3027
3028        tb = MyTreeBuilder()
3029        self.assertEqual(tb.foobar(10), 20)
3030
3031        parser = ET.XMLParser(target=tb)
3032        parser.feed(self.sample1)
3033
3034        e = parser.close()
3035        self._check_sample1_element(e)
3036
3037    def test_subclass_comment_pi(self):
3038        class MyTreeBuilder(ET.TreeBuilder):
3039            def foobar(self, x):
3040                return x * 2
3041
3042        tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
3043        self.assertEqual(tb.foobar(10), 20)
3044
3045        parser = ET.XMLParser(target=tb)
3046        parser.feed(self.sample1)
3047        parser.feed('<!-- a comment--><?and a pi?>')
3048
3049        e = parser.close()
3050        self._check_sample1_element(e)
3051
3052    def test_element_factory(self):
3053        lst = []
3054        def myfactory(tag, attrib):
3055            nonlocal lst
3056            lst.append(tag)
3057            return ET.Element(tag, attrib)
3058
3059        tb = ET.TreeBuilder(element_factory=myfactory)
3060        parser = ET.XMLParser(target=tb)
3061        parser.feed(self.sample2)
3062        parser.close()
3063
3064        self.assertEqual(lst, ['toplevel'])
3065
3066    def _check_element_factory_class(self, cls):
3067        tb = ET.TreeBuilder(element_factory=cls)
3068
3069        parser = ET.XMLParser(target=tb)
3070        parser.feed(self.sample1)
3071        e = parser.close()
3072        self.assertIsInstance(e, cls)
3073        self._check_sample1_element(e)
3074
3075    def test_element_factory_subclass(self):
3076        class MyElement(ET.Element):
3077            pass
3078        self._check_element_factory_class(MyElement)
3079
3080    def test_element_factory_pure_python_subclass(self):
3081        # Mimick SimpleTAL's behaviour (issue #16089): both versions of
3082        # TreeBuilder should be able to cope with a subclass of the
3083        # pure Python Element class.
3084        base = ET._Element_Py
3085        # Not from a C extension
3086        self.assertEqual(base.__module__, 'xml.etree.ElementTree')
3087        # Force some multiple inheritance with a C class to make things
3088        # more interesting.
3089        class MyElement(base, ValueError):
3090            pass
3091        self._check_element_factory_class(MyElement)
3092
3093    def test_doctype(self):
3094        class DoctypeParser:
3095            _doctype = None
3096
3097            def doctype(self, name, pubid, system):
3098                self._doctype = (name, pubid, system)
3099
3100            def close(self):
3101                return self._doctype
3102
3103        parser = ET.XMLParser(target=DoctypeParser())
3104        parser.feed(self.sample1)
3105
3106        self.assertEqual(parser.close(),
3107            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3108             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3109
3110    def test_builder_lookup_errors(self):
3111        class RaisingBuilder:
3112            def __init__(self, raise_in=None, what=ValueError):
3113                self.raise_in = raise_in
3114                self.what = what
3115
3116            def __getattr__(self, name):
3117                if name == self.raise_in:
3118                    raise self.what(self.raise_in)
3119                def handle(*args):
3120                    pass
3121                return handle
3122
3123        ET.XMLParser(target=RaisingBuilder())
3124        # cET also checks for 'close' and 'doctype', PyET does it only at need
3125        for event in ('start', 'data', 'end', 'comment', 'pi'):
3126            with self.assertRaisesRegex(ValueError, event):
3127                ET.XMLParser(target=RaisingBuilder(event))
3128
3129        ET.XMLParser(target=RaisingBuilder(what=AttributeError))
3130        for event in ('start', 'data', 'end', 'comment', 'pi'):
3131            parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
3132            parser.feed(self.sample1)
3133            self.assertIsNone(parser.close())
3134
3135
3136class XMLParserTest(unittest.TestCase):
3137    sample1 = b'<file><line>22</line></file>'
3138    sample2 = (b'<!DOCTYPE html PUBLIC'
3139        b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3140        b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3141        b'<html>text</html>')
3142    sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
3143        '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
3144
3145    def _check_sample_element(self, e):
3146        self.assertEqual(e.tag, 'file')
3147        self.assertEqual(e[0].tag, 'line')
3148        self.assertEqual(e[0].text, '22')
3149
3150    def test_constructor_args(self):
3151        parser2 = ET.XMLParser(encoding='utf-8',
3152                               target=ET.TreeBuilder())
3153        parser2.feed(self.sample1)
3154        self._check_sample_element(parser2.close())
3155
3156    def test_subclass(self):
3157        class MyParser(ET.XMLParser):
3158            pass
3159        parser = MyParser()
3160        parser.feed(self.sample1)
3161        self._check_sample_element(parser.close())
3162
3163    def test_doctype_warning(self):
3164        with warnings.catch_warnings():
3165            warnings.simplefilter('error', DeprecationWarning)
3166            parser = ET.XMLParser()
3167            parser.feed(self.sample2)
3168            parser.close()
3169
3170    def test_subclass_doctype(self):
3171        _doctype = None
3172        class MyParserWithDoctype(ET.XMLParser):
3173            def doctype(self, *args, **kwargs):
3174                nonlocal _doctype
3175                _doctype = (args, kwargs)
3176
3177        parser = MyParserWithDoctype()
3178        with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
3179            parser.feed(self.sample2)
3180        parser.close()
3181        self.assertIsNone(_doctype)
3182
3183        _doctype = _doctype2 = None
3184        with warnings.catch_warnings():
3185            warnings.simplefilter('error', DeprecationWarning)
3186            warnings.simplefilter('error', RuntimeWarning)
3187            class DoctypeParser:
3188                def doctype(self, name, pubid, system):
3189                    nonlocal _doctype2
3190                    _doctype2 = (name, pubid, system)
3191
3192            parser = MyParserWithDoctype(target=DoctypeParser())
3193            parser.feed(self.sample2)
3194            parser.close()
3195            self.assertIsNone(_doctype)
3196            self.assertEqual(_doctype2,
3197                ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3198                 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3199
3200    def test_inherited_doctype(self):
3201        '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
3202        with warnings.catch_warnings():
3203            warnings.simplefilter('error', DeprecationWarning)
3204            warnings.simplefilter('error', RuntimeWarning)
3205            class MyParserWithoutDoctype(ET.XMLParser):
3206                pass
3207            parser = MyParserWithoutDoctype()
3208            parser.feed(self.sample2)
3209            parser.close()
3210
3211    def test_parse_string(self):
3212        parser = ET.XMLParser(target=ET.TreeBuilder())
3213        parser.feed(self.sample3)
3214        e = parser.close()
3215        self.assertEqual(e.tag, 'money')
3216        self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
3217        self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
3218
3219
3220class NamespaceParseTest(unittest.TestCase):
3221    def test_find_with_namespace(self):
3222        nsmap = {'h': 'hello', 'f': 'foo'}
3223        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
3224
3225        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
3226        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
3227        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
3228
3229
3230class ElementSlicingTest(unittest.TestCase):
3231    def _elem_tags(self, elemlist):
3232        return [e.tag for e in elemlist]
3233
3234    def _subelem_tags(self, elem):
3235        return self._elem_tags(list(elem))
3236
3237    def _make_elem_with_children(self, numchildren):
3238        """Create an Element with a tag 'a', with the given amount of children
3239           named 'a0', 'a1' ... and so on.
3240
3241        """
3242        e = ET.Element('a')
3243        for i in range(numchildren):
3244            ET.SubElement(e, 'a%s' % i)
3245        return e
3246
3247    def test_getslice_single_index(self):
3248        e = self._make_elem_with_children(10)
3249
3250        self.assertEqual(e[1].tag, 'a1')
3251        self.assertEqual(e[-2].tag, 'a8')
3252
3253        self.assertRaises(IndexError, lambda: e[12])
3254        self.assertRaises(IndexError, lambda: e[-12])
3255
3256    def test_getslice_range(self):
3257        e = self._make_elem_with_children(6)
3258
3259        self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
3260        self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
3261        self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
3262        self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
3263        self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
3264        self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
3265
3266    def test_getslice_steps(self):
3267        e = self._make_elem_with_children(10)
3268
3269        self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
3270        self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
3271        self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
3272        self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
3273        self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
3274        self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
3275
3276    def test_getslice_negative_steps(self):
3277        e = self._make_elem_with_children(4)
3278
3279        self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
3280        self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
3281        self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
3282        self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
3283        self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
3284
3285    def test_delslice(self):
3286        e = self._make_elem_with_children(4)
3287        del e[0:2]
3288        self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
3289
3290        e = self._make_elem_with_children(4)
3291        del e[0:]
3292        self.assertEqual(self._subelem_tags(e), [])
3293
3294        e = self._make_elem_with_children(4)
3295        del e[::-1]
3296        self.assertEqual(self._subelem_tags(e), [])
3297
3298        e = self._make_elem_with_children(4)
3299        del e[::-2]
3300        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3301
3302        e = self._make_elem_with_children(4)
3303        del e[1::2]
3304        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3305
3306        e = self._make_elem_with_children(2)
3307        del e[::2]
3308        self.assertEqual(self._subelem_tags(e), ['a1'])
3309
3310    def test_setslice_single_index(self):
3311        e = self._make_elem_with_children(4)
3312        e[1] = ET.Element('b')
3313        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3314
3315        e[-2] = ET.Element('c')
3316        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3317
3318        with self.assertRaises(IndexError):
3319            e[5] = ET.Element('d')
3320        with self.assertRaises(IndexError):
3321            e[-5] = ET.Element('d')
3322        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3323
3324    def test_setslice_range(self):
3325        e = self._make_elem_with_children(4)
3326        e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
3327        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
3328
3329        e = self._make_elem_with_children(4)
3330        e[1:3] = [ET.Element('b')]
3331        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
3332
3333        e = self._make_elem_with_children(4)
3334        e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
3335        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
3336
3337    def test_setslice_steps(self):
3338        e = self._make_elem_with_children(6)
3339        e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
3340        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
3341
3342        e = self._make_elem_with_children(6)
3343        with self.assertRaises(ValueError):
3344            e[1:5:2] = [ET.Element('b')]
3345        with self.assertRaises(ValueError):
3346            e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
3347        with self.assertRaises(ValueError):
3348            e[1:5:2] = []
3349        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
3350
3351        e = self._make_elem_with_children(4)
3352        e[1::sys.maxsize] = [ET.Element('b')]
3353        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3354        e[1::sys.maxsize<<64] = [ET.Element('c')]
3355        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3356
3357    def test_setslice_negative_steps(self):
3358        e = self._make_elem_with_children(4)
3359        e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
3360        self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
3361
3362        e = self._make_elem_with_children(4)
3363        with self.assertRaises(ValueError):
3364            e[2:0:-1] = [ET.Element('b')]
3365        with self.assertRaises(ValueError):
3366            e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
3367        with self.assertRaises(ValueError):
3368            e[2:0:-1] = []
3369        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
3370
3371        e = self._make_elem_with_children(4)
3372        e[1::-sys.maxsize] = [ET.Element('b')]
3373        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3374        e[1::-sys.maxsize-1] = [ET.Element('c')]
3375        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3376        e[1::-sys.maxsize<<64] = [ET.Element('d')]
3377        self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
3378
3379
3380class IOTest(unittest.TestCase):
3381    def test_encoding(self):
3382        # Test encoding issues.
3383        elem = ET.Element("tag")
3384        elem.text = "abc"
3385        self.assertEqual(serialize(elem), '<tag>abc</tag>')
3386        for enc in ("utf-8", "us-ascii"):
3387            with self.subTest(enc):
3388                self.assertEqual(serialize(elem, encoding=enc),
3389                        b'<tag>abc</tag>')
3390                self.assertEqual(serialize(elem, encoding=enc.upper()),
3391                        b'<tag>abc</tag>')
3392        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3393            with self.subTest(enc):
3394                self.assertEqual(serialize(elem, encoding=enc),
3395                        ("<?xml version='1.0' encoding='%s'?>\n"
3396                         "<tag>abc</tag>" % enc).encode(enc))
3397                upper = enc.upper()
3398                self.assertEqual(serialize(elem, encoding=upper),
3399                        ("<?xml version='1.0' encoding='%s'?>\n"
3400                         "<tag>abc</tag>" % upper).encode(enc))
3401
3402        elem = ET.Element("tag")
3403        elem.text = "<&\"\'>"
3404        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
3405        self.assertEqual(serialize(elem, encoding="utf-8"),
3406                b'<tag>&lt;&amp;"\'&gt;</tag>')
3407        self.assertEqual(serialize(elem, encoding="us-ascii"),
3408                b'<tag>&lt;&amp;"\'&gt;</tag>')
3409        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3410            self.assertEqual(serialize(elem, encoding=enc),
3411                    ("<?xml version='1.0' encoding='%s'?>\n"
3412                     "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
3413
3414        elem = ET.Element("tag")
3415        elem.attrib["key"] = "<&\"\'>"
3416        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
3417        self.assertEqual(serialize(elem, encoding="utf-8"),
3418                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3419        self.assertEqual(serialize(elem, encoding="us-ascii"),
3420                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3421        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3422            self.assertEqual(serialize(elem, encoding=enc),
3423                    ("<?xml version='1.0' encoding='%s'?>\n"
3424                     "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
3425
3426        elem = ET.Element("tag")
3427        elem.text = '\xe5\xf6\xf6<>'
3428        self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
3429        self.assertEqual(serialize(elem, encoding="utf-8"),
3430                b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
3431        self.assertEqual(serialize(elem, encoding="us-ascii"),
3432                b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
3433        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3434            self.assertEqual(serialize(elem, encoding=enc),
3435                    ("<?xml version='1.0' encoding='%s'?>\n"
3436                     "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
3437
3438        elem = ET.Element("tag")
3439        elem.attrib["key"] = '\xe5\xf6\xf6<>'
3440        self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
3441        self.assertEqual(serialize(elem, encoding="utf-8"),
3442                b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
3443        self.assertEqual(serialize(elem, encoding="us-ascii"),
3444                b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
3445        for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
3446            self.assertEqual(serialize(elem, encoding=enc),
3447                    ("<?xml version='1.0' encoding='%s'?>\n"
3448                     "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
3449
3450    def test_write_to_filename(self):
3451        self.addCleanup(support.unlink, TESTFN)
3452        tree = ET.ElementTree(ET.XML('''<site />'''))
3453        tree.write(TESTFN)
3454        with open(TESTFN, 'rb') as f:
3455            self.assertEqual(f.read(), b'''<site />''')
3456
3457    def test_write_to_text_file(self):
3458        self.addCleanup(support.unlink, TESTFN)
3459        tree = ET.ElementTree(ET.XML('''<site />'''))
3460        with open(TESTFN, 'w', encoding='utf-8') as f:
3461            tree.write(f, encoding='unicode')
3462            self.assertFalse(f.closed)
3463        with open(TESTFN, 'rb') as f:
3464            self.assertEqual(f.read(), b'''<site />''')
3465
3466    def test_write_to_binary_file(self):
3467        self.addCleanup(support.unlink, TESTFN)
3468        tree = ET.ElementTree(ET.XML('''<site />'''))
3469        with open(TESTFN, 'wb') as f:
3470            tree.write(f)
3471            self.assertFalse(f.closed)
3472        with open(TESTFN, 'rb') as f:
3473            self.assertEqual(f.read(), b'''<site />''')
3474
3475    def test_write_to_binary_file_with_bom(self):
3476        self.addCleanup(support.unlink, TESTFN)
3477        tree = ET.ElementTree(ET.XML('''<site />'''))
3478        # test BOM writing to buffered file
3479        with open(TESTFN, 'wb') as f:
3480            tree.write(f, encoding='utf-16')
3481            self.assertFalse(f.closed)
3482        with open(TESTFN, 'rb') as f:
3483            self.assertEqual(f.read(),
3484                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3485                    '''<site />'''.encode("utf-16"))
3486        # test BOM writing to non-buffered file
3487        with open(TESTFN, 'wb', buffering=0) as f:
3488            tree.write(f, encoding='utf-16')
3489            self.assertFalse(f.closed)
3490        with open(TESTFN, 'rb') as f:
3491            self.assertEqual(f.read(),
3492                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3493                    '''<site />'''.encode("utf-16"))
3494
3495    def test_read_from_stringio(self):
3496        tree = ET.ElementTree()
3497        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3498        tree.parse(stream)
3499        self.assertEqual(tree.getroot().tag, 'site')
3500
3501    def test_write_to_stringio(self):
3502        tree = ET.ElementTree(ET.XML('''<site />'''))
3503        stream = io.StringIO()
3504        tree.write(stream, encoding='unicode')
3505        self.assertEqual(stream.getvalue(), '''<site />''')
3506
3507    def test_read_from_bytesio(self):
3508        tree = ET.ElementTree()
3509        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3510        tree.parse(raw)
3511        self.assertEqual(tree.getroot().tag, 'site')
3512
3513    def test_write_to_bytesio(self):
3514        tree = ET.ElementTree(ET.XML('''<site />'''))
3515        raw = io.BytesIO()
3516        tree.write(raw)
3517        self.assertEqual(raw.getvalue(), b'''<site />''')
3518
3519    class dummy:
3520        pass
3521
3522    def test_read_from_user_text_reader(self):
3523        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3524        reader = self.dummy()
3525        reader.read = stream.read
3526        tree = ET.ElementTree()
3527        tree.parse(reader)
3528        self.assertEqual(tree.getroot().tag, 'site')
3529
3530    def test_write_to_user_text_writer(self):
3531        tree = ET.ElementTree(ET.XML('''<site />'''))
3532        stream = io.StringIO()
3533        writer = self.dummy()
3534        writer.write = stream.write
3535        tree.write(writer, encoding='unicode')
3536        self.assertEqual(stream.getvalue(), '''<site />''')
3537
3538    def test_read_from_user_binary_reader(self):
3539        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3540        reader = self.dummy()
3541        reader.read = raw.read
3542        tree = ET.ElementTree()
3543        tree.parse(reader)
3544        self.assertEqual(tree.getroot().tag, 'site')
3545        tree = ET.ElementTree()
3546
3547    def test_write_to_user_binary_writer(self):
3548        tree = ET.ElementTree(ET.XML('''<site />'''))
3549        raw = io.BytesIO()
3550        writer = self.dummy()
3551        writer.write = raw.write
3552        tree.write(writer)
3553        self.assertEqual(raw.getvalue(), b'''<site />''')
3554
3555    def test_write_to_user_binary_writer_with_bom(self):
3556        tree = ET.ElementTree(ET.XML('''<site />'''))
3557        raw = io.BytesIO()
3558        writer = self.dummy()
3559        writer.write = raw.write
3560        writer.seekable = lambda: True
3561        writer.tell = raw.tell
3562        tree.write(writer, encoding="utf-16")
3563        self.assertEqual(raw.getvalue(),
3564                '''<?xml version='1.0' encoding='utf-16'?>\n'''
3565                '''<site />'''.encode("utf-16"))
3566
3567    def test_tostringlist_invariant(self):
3568        root = ET.fromstring('<tag>foo</tag>')
3569        self.assertEqual(
3570            ET.tostring(root, 'unicode'),
3571            ''.join(ET.tostringlist(root, 'unicode')))
3572        self.assertEqual(
3573            ET.tostring(root, 'utf-16'),
3574            b''.join(ET.tostringlist(root, 'utf-16')))
3575
3576    def test_short_empty_elements(self):
3577        root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
3578        self.assertEqual(
3579            ET.tostring(root, 'unicode'),
3580            '<tag>a<x />b<y />c</tag>')
3581        self.assertEqual(
3582            ET.tostring(root, 'unicode', short_empty_elements=True),
3583            '<tag>a<x />b<y />c</tag>')
3584        self.assertEqual(
3585            ET.tostring(root, 'unicode', short_empty_elements=False),
3586            '<tag>a<x></x>b<y></y>c</tag>')
3587
3588
3589class ParseErrorTest(unittest.TestCase):
3590    def test_subclass(self):
3591        self.assertIsInstance(ET.ParseError(), SyntaxError)
3592
3593    def _get_error(self, s):
3594        try:
3595            ET.fromstring(s)
3596        except ET.ParseError as e:
3597            return e
3598
3599    def test_error_position(self):
3600        self.assertEqual(self._get_error('foo').position, (1, 0))
3601        self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
3602        self.assertEqual(self._get_error('foobar<').position, (1, 6))
3603
3604    def test_error_code(self):
3605        import xml.parsers.expat.errors as ERRORS
3606        self.assertEqual(self._get_error('foo').code,
3607                ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
3608
3609
3610class KeywordArgsTest(unittest.TestCase):
3611    # Test various issues with keyword arguments passed to ET.Element
3612    # constructor and methods
3613    def test_issue14818(self):
3614        x = ET.XML("<a>foo</a>")
3615        self.assertEqual(x.find('a', None),
3616                         x.find(path='a', namespaces=None))
3617        self.assertEqual(x.findtext('a', None, None),
3618                         x.findtext(path='a', default=None, namespaces=None))
3619        self.assertEqual(x.findall('a', None),
3620                         x.findall(path='a', namespaces=None))
3621        self.assertEqual(list(x.iterfind('a', None)),
3622                         list(x.iterfind(path='a', namespaces=None)))
3623
3624        self.assertEqual(ET.Element('a').attrib, {})
3625        elements = [
3626            ET.Element('a', dict(href="#", id="foo")),
3627            ET.Element('a', attrib=dict(href="#", id="foo")),
3628            ET.Element('a', dict(href="#"), id="foo"),
3629            ET.Element('a', href="#", id="foo"),
3630            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
3631        ]
3632        for e in elements:
3633            self.assertEqual(e.tag, 'a')
3634            self.assertEqual(e.attrib, dict(href="#", id="foo"))
3635
3636        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
3637        self.assertEqual(e2.attrib['key1'], 'value1')
3638
3639        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3640            ET.Element('a', "I'm not a dict")
3641        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3642            ET.Element('a', attrib="I'm not a dict")
3643
3644# --------------------------------------------------------------------
3645
3646class NoAcceleratorTest(unittest.TestCase):
3647    def setUp(self):
3648        if not pyET:
3649            raise unittest.SkipTest('only for the Python version')
3650
3651    # Test that the C accelerator was not imported for pyET
3652    def test_correct_import_pyET(self):
3653        # The type of methods defined in Python code is types.FunctionType,
3654        # while the type of methods defined inside _elementtree is
3655        # <class 'wrapper_descriptor'>
3656        self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
3657        self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
3658
3659
3660# --------------------------------------------------------------------
3661
3662def c14n_roundtrip(xml, **options):
3663    return pyET.canonicalize(xml, **options)
3664
3665
3666class C14NTest(unittest.TestCase):
3667    maxDiff = None
3668
3669    #
3670    # simple roundtrip tests (from c14n.py)
3671
3672    def test_simple_roundtrip(self):
3673        # Basics
3674        self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
3675        self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
3676                '<doc xmlns="uri"></doc>')
3677        self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
3678            '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
3679        self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
3680            '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
3681        self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
3682            '<elem></elem>')
3683
3684        # C14N spec
3685        self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
3686            '<doc>Hello, world!</doc>')
3687        self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
3688            '<value>2</value>')
3689        self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
3690            '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
3691        self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
3692            '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
3693        self.assertEqual(c14n_roundtrip("<norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
3694            '<norm attr=" \'    &#xD;&#xA;&#x9;   \' "></norm>')
3695        self.assertEqual(c14n_roundtrip("<normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>"),
3696            '<normNames attr="   A    &#xD;&#xA;&#x9;   B   "></normNames>')
3697        self.assertEqual(c14n_roundtrip("<normId id=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
3698            '<normId id=" \'    &#xD;&#xA;&#x9;   \' "></normId>')
3699
3700        # fragments from PJ's tests
3701        #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
3702        #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
3703
3704    def test_c14n_exclusion(self):
3705        xml = textwrap.dedent("""\
3706        <root xmlns:x="http://example.com/x">
3707            <a x:attr="attrx">
3708                <b>abtext</b>
3709            </a>
3710            <b>btext</b>
3711            <c>
3712                <x:d>dtext</x:d>
3713            </c>
3714        </root>
3715        """)
3716        self.assertEqual(
3717            c14n_roundtrip(xml, strip_text=True),
3718            '<root>'
3719            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3720            '<b>btext</b>'
3721            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3722            '</root>')
3723        self.assertEqual(
3724            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
3725            '<root>'
3726            '<a><b>abtext</b></a>'
3727            '<b>btext</b>'
3728            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3729            '</root>')
3730        self.assertEqual(
3731            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
3732            '<root>'
3733            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3734            '<b>btext</b>'
3735            '<c></c>'
3736            '</root>')
3737        self.assertEqual(
3738            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
3739                           exclude_tags=['{http://example.com/x}d']),
3740            '<root>'
3741            '<a><b>abtext</b></a>'
3742            '<b>btext</b>'
3743            '<c></c>'
3744            '</root>')
3745        self.assertEqual(
3746            c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
3747            '<root>'
3748            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3749            '</root>')
3750        self.assertEqual(
3751            c14n_roundtrip(xml, exclude_tags=['a', 'b']),
3752            '<root>\n'
3753            '    \n'
3754            '    \n'
3755            '    <c>\n'
3756            '        <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
3757            '    </c>\n'
3758            '</root>')
3759        self.assertEqual(
3760            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
3761            '<root>'
3762            '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
3763            '<c></c>'
3764            '</root>')
3765        self.assertEqual(
3766            c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
3767            '<root>\n'
3768            '    <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
3769            '        \n'
3770            '    </a>\n'
3771            '    \n'
3772            '    <c>\n'
3773            '        \n'
3774            '    </c>\n'
3775            '</root>')
3776
3777    #
3778    # basic method=c14n tests from the c14n 2.0 specification.  uses
3779    # test files under xmltestdata/c14n-20.
3780
3781    # note that this uses generated C14N versions of the standard ET.write
3782    # output, not roundtripped C14N (see above).
3783
3784    def test_xml_c14n2(self):
3785        datadir = findfile("c14n-20", subdir="xmltestdata")
3786        full_path = partial(os.path.join, datadir)
3787
3788        files = [filename[:-4] for filename in sorted(os.listdir(datadir))
3789                 if filename.endswith('.xml')]
3790        input_files = [
3791            filename for filename in files
3792            if filename.startswith('in')
3793        ]
3794        configs = {
3795            filename: {
3796                # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
3797                option.tag.split('}')[-1]: ((option.text or '').strip(), option)
3798                for option in ET.parse(full_path(filename) + ".xml").getroot()
3799            }
3800            for filename in files
3801            if filename.startswith('c14n')
3802        }
3803
3804        tests = {
3805            input_file: [
3806                (filename, configs[filename.rsplit('_', 1)[-1]])
3807                for filename in files
3808                if filename.startswith(f'out_{input_file}_')
3809                and filename.rsplit('_', 1)[-1] in configs
3810            ]
3811            for input_file in input_files
3812        }
3813
3814        # Make sure we found all test cases.
3815        self.assertEqual(30, len([
3816            output_file for output_files in tests.values()
3817            for output_file in output_files]))
3818
3819        def get_option(config, option_name, default=None):
3820            return config.get(option_name, (default, ()))[0]
3821
3822        for input_file, output_files in tests.items():
3823            for output_file, config in output_files:
3824                keep_comments = get_option(
3825                    config, 'IgnoreComments') == 'true'  # no, it's right :)
3826                strip_text = get_option(
3827                    config, 'TrimTextNodes') == 'true'
3828                rewrite_prefixes = get_option(
3829                    config, 'PrefixRewrite') == 'sequential'
3830                if 'QNameAware' in config:
3831                    qattrs = [
3832                        f"{{{el.get('NS')}}}{el.get('Name')}"
3833                        for el in config['QNameAware'][1].findall(
3834                            '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
3835                    ]
3836                    qtags = [
3837                        f"{{{el.get('NS')}}}{el.get('Name')}"
3838                        for el in config['QNameAware'][1].findall(
3839                            '{http://www.w3.org/2010/xml-c14n2}Element')
3840                    ]
3841                else:
3842                    qtags = qattrs = None
3843
3844                # Build subtest description from config.
3845                config_descr = ','.join(
3846                    f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
3847                    for name, (value, children) in sorted(config.items())
3848                )
3849
3850                with self.subTest(f"{output_file}({config_descr})"):
3851                    if input_file == 'inNsRedecl' and not rewrite_prefixes:
3852                        self.skipTest(
3853                            f"Redeclared namespace handling is not supported in {output_file}")
3854                    if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
3855                        self.skipTest(
3856                            f"Redeclared namespace handling is not supported in {output_file}")
3857                    if 'QNameAware' in config and config['QNameAware'][1].find(
3858                            '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
3859                        self.skipTest(
3860                            f"QName rewriting in XPath text is not supported in {output_file}")
3861
3862                    f = full_path(input_file + ".xml")
3863                    if input_file == 'inC14N5':
3864                        # Hack: avoid setting up external entity resolution in the parser.
3865                        with open(full_path('world.txt'), 'rb') as entity_file:
3866                            with open(f, 'rb') as f:
3867                                f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
3868
3869                    text = ET.canonicalize(
3870                        from_file=f,
3871                        with_comments=keep_comments,
3872                        strip_text=strip_text,
3873                        rewrite_prefixes=rewrite_prefixes,
3874                        qname_aware_tags=qtags, qname_aware_attrs=qattrs)
3875
3876                    with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
3877                        expected = f.read()
3878                        if input_file == 'inC14N3':
3879                            # FIXME: cET resolves default attributes but ET does not!
3880                            expected = expected.replace(' attr="default"', '')
3881                            text = text.replace(' attr="default"', '')
3882                    self.assertEqual(expected, text)
3883
3884# --------------------------------------------------------------------
3885
3886
3887def test_main(module=None):
3888    # When invoked without a module, runs the Python ET tests by loading pyET.
3889    # Otherwise, uses the given module as the ET.
3890    global pyET
3891    pyET = import_fresh_module('xml.etree.ElementTree',
3892                               blocked=['_elementtree'])
3893    if module is None:
3894        module = pyET
3895
3896    global ET
3897    ET = module
3898
3899    test_classes = [
3900        ModuleTest,
3901        ElementSlicingTest,
3902        BasicElementTest,
3903        BadElementTest,
3904        BadElementPathTest,
3905        ElementTreeTest,
3906        IOTest,
3907        ParseErrorTest,
3908        XIncludeTest,
3909        ElementTreeTypeTest,
3910        ElementFindTest,
3911        ElementIterTest,
3912        TreeBuilderTest,
3913        XMLParserTest,
3914        XMLPullParserTest,
3915        BugsTest,
3916        KeywordArgsTest,
3917        C14NTest,
3918        ]
3919
3920    # These tests will only run for the pure-Python version that doesn't import
3921    # _elementtree. We can't use skipUnless here, because pyET is filled in only
3922    # after the module is loaded.
3923    if pyET is not ET:
3924        test_classes.extend([
3925            NoAcceleratorTest,
3926            ])
3927
3928    # Provide default namespace mapping and path cache.
3929    from xml.etree import ElementPath
3930    nsmap = ET.register_namespace._namespace_map
3931    # Copy the default namespace mapping
3932    nsmap_copy = nsmap.copy()
3933    # Copy the path cache (should be empty)
3934    path_cache = ElementPath._cache
3935    ElementPath._cache = path_cache.copy()
3936    # Align the Comment/PI factories.
3937    if hasattr(ET, '_set_factories'):
3938        old_factories = ET._set_factories(ET.Comment, ET.PI)
3939    else:
3940        old_factories = None
3941
3942    try:
3943        support.run_unittest(*test_classes)
3944    finally:
3945        from xml.etree import ElementPath
3946        # Restore mapping and path cache
3947        nsmap.clear()
3948        nsmap.update(nsmap_copy)
3949        ElementPath._cache = path_cache
3950        if old_factories is not None:
3951            ET._set_factories(*old_factories)
3952        # don't interfere with subsequent tests
3953        ET = pyET = None
3954
3955
3956if __name__ == '__main__':
3957    test_main()
3958