• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
4#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
7
8import copy
9import functools
10import html
11import io
12import operator
13import pickle
14import sys
15import types
16import unittest
17import warnings
18import weakref
19
20from itertools import product
21from test import support
22from test.support import TESTFN, findfile, import_fresh_module, gc_collect, swap_attr
23
24# pyET is the pure-Python implementation.
25#
26# ET is pyET in test_xml_etree and is the C accelerated version in
27# test_xml_etree_c.
28pyET = None
29ET = None
30
31SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
32try:
33    SIMPLE_XMLFILE.encode("utf-8")
34except UnicodeEncodeError:
35    raise unittest.SkipTest("filename is not encodable to utf8")
36SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
37UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
38
39SAMPLE_XML = """\
40<body>
41  <tag class='a'>text</tag>
42  <tag class='b' />
43  <section>
44    <tag class='b' id='inner'>subtext</tag>
45  </section>
46</body>
47"""
48
49SAMPLE_SECTION = """\
50<section>
51  <tag class='b' id='inner'>subtext</tag>
52  <nexttag />
53  <nextsection>
54    <tag />
55  </nextsection>
56</section>
57"""
58
59SAMPLE_XML_NS = """
60<body xmlns="http://effbot.org/ns">
61  <tag>text</tag>
62  <tag />
63  <section>
64    <tag>subtext</tag>
65  </section>
66</body>
67"""
68
69SAMPLE_XML_NS_ELEMS = """
70<root>
71<h:table xmlns:h="hello">
72  <h:tr>
73    <h:td>Apples</h:td>
74    <h:td>Bananas</h:td>
75  </h:tr>
76</h:table>
77
78<f:table xmlns:f="foo">
79  <f:name>African Coffee Table</f:name>
80  <f:width>80</f:width>
81  <f:length>120</f:length>
82</f:table>
83</root>
84"""
85
86ENTITY_XML = """\
87<!DOCTYPE points [
88<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
89%user-entities;
90]>
91<document>&entity;</document>
92"""
93
94EXTERNAL_ENTITY_XML = """\
95<!DOCTYPE points [
96<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
97]>
98<document>&entity;</document>
99"""
100
101def checkwarnings(*filters, quiet=False):
102    def decorator(test):
103        def newtest(*args, **kwargs):
104            with support.check_warnings(*filters, quiet=quiet):
105                test(*args, **kwargs)
106        functools.update_wrapper(newtest, test)
107        return newtest
108    return decorator
109
110
111class ModuleTest(unittest.TestCase):
112    def test_sanity(self):
113        # Import sanity.
114
115        from xml.etree import ElementTree
116        from xml.etree import ElementInclude
117        from xml.etree import ElementPath
118
119    def test_all(self):
120        names = ("xml.etree.ElementTree", "_elementtree")
121        support.check__all__(self, ET, names, blacklist=("HTML_EMPTY",))
122
123
124def serialize(elem, to_string=True, encoding='unicode', **options):
125    if encoding != 'unicode':
126        file = io.BytesIO()
127    else:
128        file = io.StringIO()
129    tree = ET.ElementTree(elem)
130    tree.write(file, encoding=encoding, **options)
131    if to_string:
132        return file.getvalue()
133    else:
134        file.seek(0)
135        return file
136
137def summarize_list(seq):
138    return [elem.tag for elem in seq]
139
140
141class ElementTestCase:
142    @classmethod
143    def setUpClass(cls):
144        cls.modules = {pyET, ET}
145
146    def pickleRoundTrip(self, obj, name, dumper, loader, proto):
147        save_m = sys.modules[name]
148        try:
149            sys.modules[name] = dumper
150            temp = pickle.dumps(obj, proto)
151            sys.modules[name] = loader
152            result = pickle.loads(temp)
153        except pickle.PicklingError as pe:
154            # pyET must be second, because pyET may be (equal to) ET.
155            human = dict([(ET, "cET"), (pyET, "pyET")])
156            raise support.TestFailed("Failed to round-trip %r from %r to %r"
157                                     % (obj,
158                                        human.get(dumper, dumper),
159                                        human.get(loader, loader))) from pe
160        finally:
161            sys.modules[name] = save_m
162        return result
163
164    def assertEqualElements(self, alice, bob):
165        self.assertIsInstance(alice, (ET.Element, pyET.Element))
166        self.assertIsInstance(bob, (ET.Element, pyET.Element))
167        self.assertEqual(len(list(alice)), len(list(bob)))
168        for x, y in zip(alice, bob):
169            self.assertEqualElements(x, y)
170        properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
171        self.assertEqual(properties(alice), properties(bob))
172
173# --------------------------------------------------------------------
174# element tree tests
175
176class ElementTreeTest(unittest.TestCase):
177
178    def serialize_check(self, elem, expected):
179        self.assertEqual(serialize(elem), expected)
180
181    def test_interface(self):
182        # Test element tree interface.
183
184        def check_string(string):
185            len(string)
186            for char in string:
187                self.assertEqual(len(char), 1,
188                        msg="expected one-character string, got %r" % char)
189            new_string = string + ""
190            new_string = string + " "
191            string[:0]
192
193        def check_mapping(mapping):
194            len(mapping)
195            keys = mapping.keys()
196            items = mapping.items()
197            for key in keys:
198                item = mapping[key]
199            mapping["key"] = "value"
200            self.assertEqual(mapping["key"], "value",
201                    msg="expected value string, got %r" % mapping["key"])
202
203        def check_element(element):
204            self.assertTrue(ET.iselement(element), msg="not an element")
205            direlem = dir(element)
206            for attr in 'tag', 'attrib', 'text', 'tail':
207                self.assertTrue(hasattr(element, attr),
208                        msg='no %s member' % attr)
209                self.assertIn(attr, direlem,
210                        msg='no %s visible by dir' % attr)
211
212            check_string(element.tag)
213            check_mapping(element.attrib)
214            if element.text is not None:
215                check_string(element.text)
216            if element.tail is not None:
217                check_string(element.tail)
218            for elem in element:
219                check_element(elem)
220
221        element = ET.Element("tag")
222        check_element(element)
223        tree = ET.ElementTree(element)
224        check_element(tree.getroot())
225        element = ET.Element("t\xe4g", key="value")
226        tree = ET.ElementTree(element)
227        self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
228        element = ET.Element("tag", key="value")
229
230        # Make sure all standard element methods exist.
231
232        def check_method(method):
233            self.assertTrue(hasattr(method, '__call__'),
234                    msg="%s not callable" % method)
235
236        check_method(element.append)
237        check_method(element.extend)
238        check_method(element.insert)
239        check_method(element.remove)
240        check_method(element.getchildren)
241        check_method(element.find)
242        check_method(element.iterfind)
243        check_method(element.findall)
244        check_method(element.findtext)
245        check_method(element.clear)
246        check_method(element.get)
247        check_method(element.set)
248        check_method(element.keys)
249        check_method(element.items)
250        check_method(element.iter)
251        check_method(element.itertext)
252        check_method(element.getiterator)
253
254        # These methods return an iterable. See bug 6472.
255
256        def check_iter(it):
257            check_method(it.__next__)
258
259        check_iter(element.iterfind("tag"))
260        check_iter(element.iterfind("*"))
261        check_iter(tree.iterfind("tag"))
262        check_iter(tree.iterfind("*"))
263
264        # These aliases are provided:
265
266        self.assertEqual(ET.XML, ET.fromstring)
267        self.assertEqual(ET.PI, ET.ProcessingInstruction)
268
269    def test_set_attribute(self):
270        element = ET.Element('tag')
271
272        self.assertEqual(element.tag, 'tag')
273        element.tag = 'Tag'
274        self.assertEqual(element.tag, 'Tag')
275        element.tag = 'TAG'
276        self.assertEqual(element.tag, 'TAG')
277
278        self.assertIsNone(element.text)
279        element.text = 'Text'
280        self.assertEqual(element.text, 'Text')
281        element.text = 'TEXT'
282        self.assertEqual(element.text, 'TEXT')
283
284        self.assertIsNone(element.tail)
285        element.tail = 'Tail'
286        self.assertEqual(element.tail, 'Tail')
287        element.tail = 'TAIL'
288        self.assertEqual(element.tail, 'TAIL')
289
290        self.assertEqual(element.attrib, {})
291        element.attrib = {'a': 'b', 'c': 'd'}
292        self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
293        element.attrib = {'A': 'B', 'C': 'D'}
294        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
295
296    def test_simpleops(self):
297        # Basic method sanity checks.
298
299        elem = ET.XML("<body><tag/></body>")
300        self.serialize_check(elem, '<body><tag /></body>')
301        e = ET.Element("tag2")
302        elem.append(e)
303        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
304        elem.remove(e)
305        self.serialize_check(elem, '<body><tag /></body>')
306        elem.insert(0, e)
307        self.serialize_check(elem, '<body><tag2 /><tag /></body>')
308        elem.remove(e)
309        elem.extend([e])
310        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
311        elem.remove(e)
312
313        element = ET.Element("tag", key="value")
314        self.serialize_check(element, '<tag key="value" />') # 1
315        subelement = ET.Element("subtag")
316        element.append(subelement)
317        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
318        element.insert(0, subelement)
319        self.serialize_check(element,
320                '<tag key="value"><subtag /><subtag /></tag>') # 3
321        element.remove(subelement)
322        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
323        element.remove(subelement)
324        self.serialize_check(element, '<tag key="value" />') # 5
325        with self.assertRaises(ValueError) as cm:
326            element.remove(subelement)
327        self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
328        self.serialize_check(element, '<tag key="value" />') # 6
329        element[0:0] = [subelement, subelement, subelement]
330        self.serialize_check(element[1], '<subtag />')
331        self.assertEqual(element[1:9], [element[1], element[2]])
332        self.assertEqual(element[:9:2], [element[0], element[2]])
333        del element[1:2]
334        self.serialize_check(element,
335                '<tag key="value"><subtag /><subtag /></tag>')
336
337    def test_cdata(self):
338        # Test CDATA handling (etc).
339
340        self.serialize_check(ET.XML("<tag>hello</tag>"),
341                '<tag>hello</tag>')
342        self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
343                '<tag>hello</tag>')
344        self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
345                '<tag>hello</tag>')
346
347    def test_file_init(self):
348        stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
349        tree = ET.ElementTree(file=stringfile)
350        self.assertEqual(tree.find("tag").tag, 'tag')
351        self.assertEqual(tree.find("section/tag").tag, 'tag')
352
353        tree = ET.ElementTree(file=SIMPLE_XMLFILE)
354        self.assertEqual(tree.find("element").tag, 'element')
355        self.assertEqual(tree.find("element/../empty-element").tag,
356                'empty-element')
357
358    def test_path_cache(self):
359        # Check that the path cache behaves sanely.
360
361        from xml.etree import ElementPath
362
363        elem = ET.XML(SAMPLE_XML)
364        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
365        cache_len_10 = len(ElementPath._cache)
366        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
367        self.assertEqual(len(ElementPath._cache), cache_len_10)
368        for i in range(20): ET.ElementTree(elem).find('./'+str(i))
369        self.assertGreater(len(ElementPath._cache), cache_len_10)
370        for i in range(600): ET.ElementTree(elem).find('./'+str(i))
371        self.assertLess(len(ElementPath._cache), 500)
372
373    def test_copy(self):
374        # Test copy handling (etc).
375
376        import copy
377        e1 = ET.XML("<tag>hello<foo/></tag>")
378        e2 = copy.copy(e1)
379        e3 = copy.deepcopy(e1)
380        e1.find("foo").tag = "bar"
381        self.serialize_check(e1, '<tag>hello<bar /></tag>')
382        self.serialize_check(e2, '<tag>hello<bar /></tag>')
383        self.serialize_check(e3, '<tag>hello<foo /></tag>')
384
385    def test_attrib(self):
386        # Test attribute handling.
387
388        elem = ET.Element("tag")
389        elem.get("key") # 1.1
390        self.assertEqual(elem.get("key", "default"), 'default') # 1.2
391
392        elem.set("key", "value")
393        self.assertEqual(elem.get("key"), 'value') # 1.3
394
395        elem = ET.Element("tag", key="value")
396        self.assertEqual(elem.get("key"), 'value') # 2.1
397        self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
398
399        attrib = {"key": "value"}
400        elem = ET.Element("tag", attrib)
401        attrib.clear() # check for aliasing issues
402        self.assertEqual(elem.get("key"), 'value') # 3.1
403        self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
404
405        attrib = {"key": "value"}
406        elem = ET.Element("tag", **attrib)
407        attrib.clear() # check for aliasing issues
408        self.assertEqual(elem.get("key"), 'value') # 4.1
409        self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
410
411        elem = ET.Element("tag", {"key": "other"}, key="value")
412        self.assertEqual(elem.get("key"), 'value') # 5.1
413        self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
414
415        elem = ET.Element('test')
416        elem.text = "aa"
417        elem.set('testa', 'testval')
418        elem.set('testb', 'test2')
419        self.assertEqual(ET.tostring(elem),
420                b'<test testa="testval" testb="test2">aa</test>')
421        self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
422        self.assertEqual(sorted(elem.items()),
423                [('testa', 'testval'), ('testb', 'test2')])
424        self.assertEqual(elem.attrib['testb'], 'test2')
425        elem.attrib['testb'] = 'test1'
426        elem.attrib['testc'] = 'test2'
427        self.assertEqual(ET.tostring(elem),
428                b'<test testa="testval" testb="test1" testc="test2">aa</test>')
429
430        elem = ET.Element('test')
431        elem.set('a', '\r')
432        elem.set('b', '\r\n')
433        elem.set('c', '\t\n\r ')
434        elem.set('d', '\n\n')
435        self.assertEqual(ET.tostring(elem),
436                b'<test a="&#10;" b="&#10;" c="&#09;&#10;&#10; " d="&#10;&#10;" />')
437
438    def test_makeelement(self):
439        # Test makeelement handling.
440
441        elem = ET.Element("tag")
442        attrib = {"key": "value"}
443        subelem = elem.makeelement("subtag", attrib)
444        self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
445        elem.append(subelem)
446        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
447
448        elem.clear()
449        self.serialize_check(elem, '<tag />')
450        elem.append(subelem)
451        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
452        elem.extend([subelem, subelem])
453        self.serialize_check(elem,
454            '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
455        elem[:] = [subelem]
456        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
457        elem[:] = tuple([subelem])
458        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
459
460    def test_parsefile(self):
461        # Test parsing from file.
462
463        tree = ET.parse(SIMPLE_XMLFILE)
464        stream = io.StringIO()
465        tree.write(stream, encoding='unicode')
466        self.assertEqual(stream.getvalue(),
467                '<root>\n'
468                '   <element key="value">text</element>\n'
469                '   <element>text</element>tail\n'
470                '   <empty-element />\n'
471                '</root>')
472        tree = ET.parse(SIMPLE_NS_XMLFILE)
473        stream = io.StringIO()
474        tree.write(stream, encoding='unicode')
475        self.assertEqual(stream.getvalue(),
476                '<ns0:root xmlns:ns0="namespace">\n'
477                '   <ns0:element key="value">text</ns0:element>\n'
478                '   <ns0:element>text</ns0:element>tail\n'
479                '   <ns0:empty-element />\n'
480                '</ns0:root>')
481
482        with open(SIMPLE_XMLFILE) as f:
483            data = f.read()
484
485        parser = ET.XMLParser()
486        self.assertRegex(parser.version, r'^Expat ')
487        parser.feed(data)
488        self.serialize_check(parser.close(),
489                '<root>\n'
490                '   <element key="value">text</element>\n'
491                '   <element>text</element>tail\n'
492                '   <empty-element />\n'
493                '</root>')
494
495        target = ET.TreeBuilder()
496        parser = ET.XMLParser(target=target)
497        parser.feed(data)
498        self.serialize_check(parser.close(),
499                '<root>\n'
500                '   <element key="value">text</element>\n'
501                '   <element>text</element>tail\n'
502                '   <empty-element />\n'
503                '</root>')
504
505    def test_parseliteral(self):
506        element = ET.XML("<html><body>text</body></html>")
507        self.assertEqual(ET.tostring(element, encoding='unicode'),
508                '<html><body>text</body></html>')
509        element = ET.fromstring("<html><body>text</body></html>")
510        self.assertEqual(ET.tostring(element, encoding='unicode'),
511                '<html><body>text</body></html>')
512        sequence = ["<html><body>", "text</bo", "dy></html>"]
513        element = ET.fromstringlist(sequence)
514        self.assertEqual(ET.tostring(element),
515                b'<html><body>text</body></html>')
516        self.assertEqual(b"".join(ET.tostringlist(element)),
517                b'<html><body>text</body></html>')
518        self.assertEqual(ET.tostring(element, "ascii"),
519                b"<?xml version='1.0' encoding='ascii'?>\n"
520                b"<html><body>text</body></html>")
521        _, ids = ET.XMLID("<html><body>text</body></html>")
522        self.assertEqual(len(ids), 0)
523        _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
524        self.assertEqual(len(ids), 1)
525        self.assertEqual(ids["body"].tag, 'body')
526
527    def test_iterparse(self):
528        # Test iterparse interface.
529
530        iterparse = ET.iterparse
531
532        context = iterparse(SIMPLE_XMLFILE)
533        action, elem = next(context)
534        self.assertEqual((action, elem.tag), ('end', 'element'))
535        self.assertEqual([(action, elem.tag) for action, elem in context], [
536                ('end', 'element'),
537                ('end', 'empty-element'),
538                ('end', 'root'),
539            ])
540        self.assertEqual(context.root.tag, 'root')
541
542        context = iterparse(SIMPLE_NS_XMLFILE)
543        self.assertEqual([(action, elem.tag) for action, elem in context], [
544                ('end', '{namespace}element'),
545                ('end', '{namespace}element'),
546                ('end', '{namespace}empty-element'),
547                ('end', '{namespace}root'),
548            ])
549
550        events = ()
551        context = iterparse(SIMPLE_XMLFILE, events)
552        self.assertEqual([(action, elem.tag) for action, elem in context], [])
553
554        events = ()
555        context = iterparse(SIMPLE_XMLFILE, events=events)
556        self.assertEqual([(action, elem.tag) for action, elem in context], [])
557
558        events = ("start", "end")
559        context = iterparse(SIMPLE_XMLFILE, events)
560        self.assertEqual([(action, elem.tag) for action, elem in context], [
561                ('start', 'root'),
562                ('start', 'element'),
563                ('end', 'element'),
564                ('start', 'element'),
565                ('end', 'element'),
566                ('start', 'empty-element'),
567                ('end', 'empty-element'),
568                ('end', 'root'),
569            ])
570
571        events = ("start", "end", "start-ns", "end-ns")
572        context = iterparse(SIMPLE_NS_XMLFILE, events)
573        self.assertEqual([(action, elem.tag) if action in ("start", "end")
574                                             else (action, elem)
575                          for action, elem in context], [
576                ('start-ns', ('', 'namespace')),
577                ('start', '{namespace}root'),
578                ('start', '{namespace}element'),
579                ('end', '{namespace}element'),
580                ('start', '{namespace}element'),
581                ('end', '{namespace}element'),
582                ('start', '{namespace}empty-element'),
583                ('end', '{namespace}empty-element'),
584                ('end', '{namespace}root'),
585                ('end-ns', None),
586            ])
587
588        events = ('start-ns', 'end-ns')
589        context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
590        res = [action for action, elem in context]
591        self.assertEqual(res, ['start-ns', 'end-ns'])
592
593        events = ("start", "end", "bogus")
594        with open(SIMPLE_XMLFILE, "rb") as f:
595            with self.assertRaises(ValueError) as cm:
596                iterparse(f, events)
597            self.assertFalse(f.closed)
598        self.assertEqual(str(cm.exception), "unknown event 'bogus'")
599
600        with support.check_no_resource_warning(self):
601            with self.assertRaises(ValueError) as cm:
602                iterparse(SIMPLE_XMLFILE, events)
603            self.assertEqual(str(cm.exception), "unknown event 'bogus'")
604            del cm
605
606        source = io.BytesIO(
607            b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
608            b"<body xmlns='http://&#233;ffbot.org/ns'\n"
609            b"      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
610        events = ("start-ns",)
611        context = iterparse(source, events)
612        self.assertEqual([(action, elem) for action, elem in context], [
613                ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
614                ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
615            ])
616
617        source = io.StringIO("<document />junk")
618        it = iterparse(source)
619        action, elem = next(it)
620        self.assertEqual((action, elem.tag), ('end', 'document'))
621        with self.assertRaises(ET.ParseError) as cm:
622            next(it)
623        self.assertEqual(str(cm.exception),
624                'junk after document element: line 1, column 12')
625
626        self.addCleanup(support.unlink, TESTFN)
627        with open(TESTFN, "wb") as f:
628            f.write(b"<document />junk")
629        it = iterparse(TESTFN)
630        action, elem = next(it)
631        self.assertEqual((action, elem.tag), ('end', 'document'))
632        with support.check_no_resource_warning(self):
633            with self.assertRaises(ET.ParseError) as cm:
634                next(it)
635            self.assertEqual(str(cm.exception),
636                    'junk after document element: line 1, column 12')
637            del cm, it
638
639    def test_writefile(self):
640        elem = ET.Element("tag")
641        elem.text = "text"
642        self.serialize_check(elem, '<tag>text</tag>')
643        ET.SubElement(elem, "subtag").text = "subtext"
644        self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
645
646        # Test tag suppression
647        elem.tag = None
648        self.serialize_check(elem, 'text<subtag>subtext</subtag>')
649        elem.insert(0, ET.Comment("comment"))
650        self.serialize_check(elem,
651                'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
652
653        elem[0] = ET.PI("key", "value")
654        self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
655
656    def test_custom_builder(self):
657        # Test parser w. custom builder.
658
659        with open(SIMPLE_XMLFILE) as f:
660            data = f.read()
661        class Builder(list):
662            def start(self, tag, attrib):
663                self.append(("start", tag))
664            def end(self, tag):
665                self.append(("end", tag))
666            def data(self, text):
667                pass
668        builder = Builder()
669        parser = ET.XMLParser(target=builder)
670        parser.feed(data)
671        self.assertEqual(builder, [
672                ('start', 'root'),
673                ('start', 'element'),
674                ('end', 'element'),
675                ('start', 'element'),
676                ('end', 'element'),
677                ('start', 'empty-element'),
678                ('end', 'empty-element'),
679                ('end', 'root'),
680            ])
681
682        with open(SIMPLE_NS_XMLFILE) as f:
683            data = f.read()
684        class Builder(list):
685            def start(self, tag, attrib):
686                self.append(("start", tag))
687            def end(self, tag):
688                self.append(("end", tag))
689            def data(self, text):
690                pass
691            def pi(self, target, data):
692                self.append(("pi", target, data))
693            def comment(self, data):
694                self.append(("comment", data))
695        builder = Builder()
696        parser = ET.XMLParser(target=builder)
697        parser.feed(data)
698        self.assertEqual(builder, [
699                ('pi', 'pi', 'data'),
700                ('comment', ' comment '),
701                ('start', '{namespace}root'),
702                ('start', '{namespace}element'),
703                ('end', '{namespace}element'),
704                ('start', '{namespace}element'),
705                ('end', '{namespace}element'),
706                ('start', '{namespace}empty-element'),
707                ('end', '{namespace}empty-element'),
708                ('end', '{namespace}root'),
709            ])
710
711
712    # Element.getchildren() and ElementTree.getiterator() are deprecated.
713    @checkwarnings(("This method will be removed in future versions.  "
714                    "Use .+ instead.",
715                    (DeprecationWarning, PendingDeprecationWarning)))
716    def test_getchildren(self):
717        # Test Element.getchildren()
718
719        with open(SIMPLE_XMLFILE, "rb") as f:
720            tree = ET.parse(f)
721        self.assertEqual([summarize_list(elem.getchildren())
722                          for elem in tree.getroot().iter()], [
723                ['element', 'element', 'empty-element'],
724                [],
725                [],
726                [],
727            ])
728        self.assertEqual([summarize_list(elem.getchildren())
729                          for elem in tree.getiterator()], [
730                ['element', 'element', 'empty-element'],
731                [],
732                [],
733                [],
734            ])
735
736        elem = ET.XML(SAMPLE_XML)
737        self.assertEqual(len(elem.getchildren()), 3)
738        self.assertEqual(len(elem[2].getchildren()), 1)
739        self.assertEqual(elem[:], elem.getchildren())
740        child1 = elem[0]
741        child2 = elem[2]
742        del elem[1:2]
743        self.assertEqual(len(elem.getchildren()), 2)
744        self.assertEqual(child1, elem[0])
745        self.assertEqual(child2, elem[1])
746        elem[0:2] = [child2, child1]
747        self.assertEqual(child2, elem[0])
748        self.assertEqual(child1, elem[1])
749        self.assertNotEqual(child1, elem[0])
750        elem.clear()
751        self.assertEqual(elem.getchildren(), [])
752
753    def test_writestring(self):
754        elem = ET.XML("<html><body>text</body></html>")
755        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
756        elem = ET.fromstring("<html><body>text</body></html>")
757        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
758
759    def test_encoding(self):
760        def check(encoding, body=''):
761            xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
762                   (encoding, body))
763            self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
764            self.assertEqual(ET.XML(xml).text, body)
765        check("ascii", 'a')
766        check("us-ascii", 'a')
767        check("iso-8859-1", '\xbd')
768        check("iso-8859-15", '\u20ac')
769        check("cp437", '\u221a')
770        check("mac-roman", '\u02da')
771
772        def xml(encoding):
773            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
774        def bxml(encoding):
775            return xml(encoding).encode(encoding)
776        supported_encodings = [
777            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
778            'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
779            'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
780            'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
781            'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
782            'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
783            'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
784            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
785            'cp1256', 'cp1257', 'cp1258',
786            'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
787            'mac-roman', 'mac-turkish',
788            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
789            'iso2022-jp-3', 'iso2022-jp-ext',
790            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
791            'hz', 'ptcp154',
792        ]
793        for encoding in supported_encodings:
794            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
795
796        unsupported_ascii_compatible_encodings = [
797            'big5', 'big5hkscs',
798            'cp932', 'cp949', 'cp950',
799            'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
800            'gb2312', 'gbk', 'gb18030',
801            'iso2022-kr', 'johab',
802            'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
803            'utf-7',
804        ]
805        for encoding in unsupported_ascii_compatible_encodings:
806            self.assertRaises(ValueError, ET.XML, bxml(encoding))
807
808        unsupported_ascii_incompatible_encodings = [
809            'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
810            'utf_32', 'utf_32_be', 'utf_32_le',
811        ]
812        for encoding in unsupported_ascii_incompatible_encodings:
813            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
814
815        self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
816        self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
817
818    def test_methods(self):
819        # Test serialization methods.
820
821        e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
822        e.tail = "\n"
823        self.assertEqual(serialize(e),
824                '<html><link /><script>1 &lt; 2</script></html>\n')
825        self.assertEqual(serialize(e, method=None),
826                '<html><link /><script>1 &lt; 2</script></html>\n')
827        self.assertEqual(serialize(e, method="xml"),
828                '<html><link /><script>1 &lt; 2</script></html>\n')
829        self.assertEqual(serialize(e, method="html"),
830                '<html><link><script>1 < 2</script></html>\n')
831        self.assertEqual(serialize(e, method="text"), '1 < 2\n')
832
833    def test_issue18347(self):
834        e = ET.XML('<html><CamelCase>text</CamelCase></html>')
835        self.assertEqual(serialize(e),
836                '<html><CamelCase>text</CamelCase></html>')
837        self.assertEqual(serialize(e, method="html"),
838                '<html><CamelCase>text</CamelCase></html>')
839
840    def test_entity(self):
841        # Test entity handling.
842
843        # 1) good entities
844
845        e = ET.XML("<document title='&#x8230;'>test</document>")
846        self.assertEqual(serialize(e, encoding="us-ascii"),
847                b'<document title="&#33328;">test</document>')
848        self.serialize_check(e, '<document title="\u8230">test</document>')
849
850        # 2) bad entities
851
852        with self.assertRaises(ET.ParseError) as cm:
853            ET.XML("<document>&entity;</document>")
854        self.assertEqual(str(cm.exception),
855                'undefined entity: line 1, column 10')
856
857        with self.assertRaises(ET.ParseError) as cm:
858            ET.XML(ENTITY_XML)
859        self.assertEqual(str(cm.exception),
860                'undefined entity &entity;: line 5, column 10')
861
862        # 3) custom entity
863
864        parser = ET.XMLParser()
865        parser.entity["entity"] = "text"
866        parser.feed(ENTITY_XML)
867        root = parser.close()
868        self.serialize_check(root, '<document>text</document>')
869
870        # 4) external (SYSTEM) entity
871
872        with self.assertRaises(ET.ParseError) as cm:
873            ET.XML(EXTERNAL_ENTITY_XML)
874        self.assertEqual(str(cm.exception),
875                'undefined entity &entity;: line 4, column 10')
876
877    def test_namespace(self):
878        # Test namespace issues.
879
880        # 1) xml namespace
881
882        elem = ET.XML("<tag xml:lang='en' />")
883        self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
884
885        # 2) other "well-known" namespaces
886
887        elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
888        self.serialize_check(elem,
889            '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
890
891        elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
892        self.serialize_check(elem,
893            '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
894
895        elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
896        self.serialize_check(elem,
897            '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
898
899        # 3) unknown namespaces
900        elem = ET.XML(SAMPLE_XML_NS)
901        self.serialize_check(elem,
902            '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
903            '  <ns0:tag>text</ns0:tag>\n'
904            '  <ns0:tag />\n'
905            '  <ns0:section>\n'
906            '    <ns0:tag>subtext</ns0:tag>\n'
907            '  </ns0:section>\n'
908            '</ns0:body>')
909
910    def test_qname(self):
911        # Test QName handling.
912
913        # 1) decorated tags
914
915        elem = ET.Element("{uri}tag")
916        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
917        elem = ET.Element(ET.QName("{uri}tag"))
918        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
919        elem = ET.Element(ET.QName("uri", "tag"))
920        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
921        elem = ET.Element(ET.QName("uri", "tag"))
922        subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
923        subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
924        self.serialize_check(elem,
925            '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
926
927        # 2) decorated attributes
928
929        elem.clear()
930        elem.attrib["{uri}key"] = "value"
931        self.serialize_check(elem,
932            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
933
934        elem.clear()
935        elem.attrib[ET.QName("{uri}key")] = "value"
936        self.serialize_check(elem,
937            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
938
939        # 3) decorated values are not converted by default, but the
940        # QName wrapper can be used for values
941
942        elem.clear()
943        elem.attrib["{uri}key"] = "{uri}value"
944        self.serialize_check(elem,
945            '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
946
947        elem.clear()
948        elem.attrib["{uri}key"] = ET.QName("{uri}value")
949        self.serialize_check(elem,
950            '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
951
952        elem.clear()
953        subelem = ET.Element("tag")
954        subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
955        elem.append(subelem)
956        elem.append(subelem)
957        self.serialize_check(elem,
958            '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
959            '<tag ns1:key="ns2:value" />'
960            '<tag ns1:key="ns2:value" />'
961            '</ns0:tag>') # 3.3
962
963        # 4) Direct QName tests
964
965        self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
966        self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
967        q1 = ET.QName('ns', 'tag')
968        q2 = ET.QName('ns', 'tag')
969        self.assertEqual(q1, q2)
970        q2 = ET.QName('ns', 'other-tag')
971        self.assertNotEqual(q1, q2)
972        self.assertNotEqual(q1, 'ns:tag')
973        self.assertEqual(q1, '{ns}tag')
974
975    def test_doctype_public(self):
976        # Test PUBLIC doctype.
977
978        elem = ET.XML('<!DOCTYPE html PUBLIC'
979                ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
980                ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
981                '<html>text</html>')
982
983    def test_xpath_tokenizer(self):
984        # Test the XPath tokenizer.
985        from xml.etree import ElementPath
986        def check(p, expected):
987            self.assertEqual([op or tag
988                              for op, tag in ElementPath.xpath_tokenizer(p)],
989                             expected)
990
991        # tests from the xml specification
992        check("*", ['*'])
993        check("text()", ['text', '()'])
994        check("@name", ['@', 'name'])
995        check("@*", ['@', '*'])
996        check("para[1]", ['para', '[', '1', ']'])
997        check("para[last()]", ['para', '[', 'last', '()', ']'])
998        check("*/para", ['*', '/', 'para'])
999        check("/doc/chapter[5]/section[2]",
1000              ['/', 'doc', '/', 'chapter', '[', '5', ']',
1001               '/', 'section', '[', '2', ']'])
1002        check("chapter//para", ['chapter', '//', 'para'])
1003        check("//para", ['//', 'para'])
1004        check("//olist/item", ['//', 'olist', '/', 'item'])
1005        check(".", ['.'])
1006        check(".//para", ['.', '//', 'para'])
1007        check("..", ['..'])
1008        check("../@lang", ['..', '/', '@', 'lang'])
1009        check("chapter[title]", ['chapter', '[', 'title', ']'])
1010        check("employee[@secretary and @assistant]", ['employee',
1011              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
1012
1013        # additional tests
1014        check("{http://spam}egg", ['{http://spam}egg'])
1015        check("./spam.egg", ['.', '/', 'spam.egg'])
1016        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
1017
1018    def test_processinginstruction(self):
1019        # Test ProcessingInstruction directly
1020
1021        self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1022                b'<?test instruction?>')
1023        self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1024                b'<?test instruction?>')
1025
1026        # Issue #2746
1027
1028        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1029                b'<?test <testing&>?>')
1030        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1031                b"<?xml version='1.0' encoding='latin-1'?>\n"
1032                b"<?test <testing&>\xe3?>")
1033
1034    def test_html_empty_elems_serialization(self):
1035        # issue 15970
1036        # from http://www.w3.org/TR/html401/index/elements.html
1037        for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1038                        'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
1039            for elem in [element, element.lower()]:
1040                expected = '<%s>' % elem
1041                serialized = serialize(ET.XML('<%s />' % elem), method='html')
1042                self.assertEqual(serialized, expected)
1043                serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1044                                       method='html')
1045                self.assertEqual(serialized, expected)
1046
1047
1048class XMLPullParserTest(unittest.TestCase):
1049
1050    def _feed(self, parser, data, chunk_size=None):
1051        if chunk_size is None:
1052            parser.feed(data)
1053        else:
1054            for i in range(0, len(data), chunk_size):
1055                parser.feed(data[i:i+chunk_size])
1056
1057    def assert_event_tags(self, parser, expected):
1058        events = parser.read_events()
1059        self.assertEqual([(action, elem.tag) for action, elem in events],
1060                         expected)
1061
1062    def test_simple_xml(self):
1063        for chunk_size in (None, 1, 5):
1064            with self.subTest(chunk_size=chunk_size):
1065                parser = ET.XMLPullParser()
1066                self.assert_event_tags(parser, [])
1067                self._feed(parser, "<!-- comment -->\n", chunk_size)
1068                self.assert_event_tags(parser, [])
1069                self._feed(parser,
1070                           "<root>\n  <element key='value'>text</element",
1071                           chunk_size)
1072                self.assert_event_tags(parser, [])
1073                self._feed(parser, ">\n", chunk_size)
1074                self.assert_event_tags(parser, [('end', 'element')])
1075                self._feed(parser, "<element>text</element>tail\n", chunk_size)
1076                self._feed(parser, "<empty-element/>\n", chunk_size)
1077                self.assert_event_tags(parser, [
1078                    ('end', 'element'),
1079                    ('end', 'empty-element'),
1080                    ])
1081                self._feed(parser, "</root>\n", chunk_size)
1082                self.assert_event_tags(parser, [('end', 'root')])
1083                self.assertIsNone(parser.close())
1084
1085    def test_feed_while_iterating(self):
1086        parser = ET.XMLPullParser()
1087        it = parser.read_events()
1088        self._feed(parser, "<root>\n  <element key='value'>text</element>\n")
1089        action, elem = next(it)
1090        self.assertEqual((action, elem.tag), ('end', 'element'))
1091        self._feed(parser, "</root>\n")
1092        action, elem = next(it)
1093        self.assertEqual((action, elem.tag), ('end', 'root'))
1094        with self.assertRaises(StopIteration):
1095            next(it)
1096
1097    def test_simple_xml_with_ns(self):
1098        parser = ET.XMLPullParser()
1099        self.assert_event_tags(parser, [])
1100        self._feed(parser, "<!-- comment -->\n")
1101        self.assert_event_tags(parser, [])
1102        self._feed(parser, "<root xmlns='namespace'>\n")
1103        self.assert_event_tags(parser, [])
1104        self._feed(parser, "<element key='value'>text</element")
1105        self.assert_event_tags(parser, [])
1106        self._feed(parser, ">\n")
1107        self.assert_event_tags(parser, [('end', '{namespace}element')])
1108        self._feed(parser, "<element>text</element>tail\n")
1109        self._feed(parser, "<empty-element/>\n")
1110        self.assert_event_tags(parser, [
1111            ('end', '{namespace}element'),
1112            ('end', '{namespace}empty-element'),
1113            ])
1114        self._feed(parser, "</root>\n")
1115        self.assert_event_tags(parser, [('end', '{namespace}root')])
1116        self.assertIsNone(parser.close())
1117
1118    def test_ns_events(self):
1119        parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
1120        self._feed(parser, "<!-- comment -->\n")
1121        self._feed(parser, "<root xmlns='namespace'>\n")
1122        self.assertEqual(
1123            list(parser.read_events()),
1124            [('start-ns', ('', 'namespace'))])
1125        self._feed(parser, "<element key='value'>text</element")
1126        self._feed(parser, ">\n")
1127        self._feed(parser, "<element>text</element>tail\n")
1128        self._feed(parser, "<empty-element/>\n")
1129        self._feed(parser, "</root>\n")
1130        self.assertEqual(list(parser.read_events()), [('end-ns', None)])
1131        self.assertIsNone(parser.close())
1132
1133    def test_events(self):
1134        parser = ET.XMLPullParser(events=())
1135        self._feed(parser, "<root/>\n")
1136        self.assert_event_tags(parser, [])
1137
1138        parser = ET.XMLPullParser(events=('start', 'end'))
1139        self._feed(parser, "<!-- comment -->\n")
1140        self.assert_event_tags(parser, [])
1141        self._feed(parser, "<root>\n")
1142        self.assert_event_tags(parser, [('start', 'root')])
1143        self._feed(parser, "<element key='value'>text</element")
1144        self.assert_event_tags(parser, [('start', 'element')])
1145        self._feed(parser, ">\n")
1146        self.assert_event_tags(parser, [('end', 'element')])
1147        self._feed(parser,
1148                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1149        self.assert_event_tags(parser, [
1150            ('start', '{foo}element'),
1151            ('start', '{foo}empty-element'),
1152            ('end', '{foo}empty-element'),
1153            ('end', '{foo}element'),
1154            ])
1155        self._feed(parser, "</root>")
1156        self.assertIsNone(parser.close())
1157        self.assert_event_tags(parser, [('end', 'root')])
1158
1159        parser = ET.XMLPullParser(events=('start',))
1160        self._feed(parser, "<!-- comment -->\n")
1161        self.assert_event_tags(parser, [])
1162        self._feed(parser, "<root>\n")
1163        self.assert_event_tags(parser, [('start', 'root')])
1164        self._feed(parser, "<element key='value'>text</element")
1165        self.assert_event_tags(parser, [('start', 'element')])
1166        self._feed(parser, ">\n")
1167        self.assert_event_tags(parser, [])
1168        self._feed(parser,
1169                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1170        self.assert_event_tags(parser, [
1171            ('start', '{foo}element'),
1172            ('start', '{foo}empty-element'),
1173            ])
1174        self._feed(parser, "</root>")
1175        self.assertIsNone(parser.close())
1176
1177    def test_events_sequence(self):
1178        # Test that events can be some sequence that's not just a tuple or list
1179        eventset = {'end', 'start'}
1180        parser = ET.XMLPullParser(events=eventset)
1181        self._feed(parser, "<foo>bar</foo>")
1182        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1183
1184        class DummyIter:
1185            def __init__(self):
1186                self.events = iter(['start', 'end', 'start-ns'])
1187            def __iter__(self):
1188                return self
1189            def __next__(self):
1190                return next(self.events)
1191
1192        parser = ET.XMLPullParser(events=DummyIter())
1193        self._feed(parser, "<foo>bar</foo>")
1194        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1195
1196
1197    def test_unknown_event(self):
1198        with self.assertRaises(ValueError):
1199            ET.XMLPullParser(events=('start', 'end', 'bogus'))
1200
1201
1202#
1203# xinclude tests (samples from appendix C of the xinclude specification)
1204
1205XINCLUDE = {}
1206
1207XINCLUDE["C1.xml"] = """\
1208<?xml version='1.0'?>
1209<document xmlns:xi="http://www.w3.org/2001/XInclude">
1210  <p>120 Mz is adequate for an average home user.</p>
1211  <xi:include href="disclaimer.xml"/>
1212</document>
1213"""
1214
1215XINCLUDE["disclaimer.xml"] = """\
1216<?xml version='1.0'?>
1217<disclaimer>
1218  <p>The opinions represented herein represent those of the individual
1219  and should not be interpreted as official policy endorsed by this
1220  organization.</p>
1221</disclaimer>
1222"""
1223
1224XINCLUDE["C2.xml"] = """\
1225<?xml version='1.0'?>
1226<document xmlns:xi="http://www.w3.org/2001/XInclude">
1227  <p>This document has been accessed
1228  <xi:include href="count.txt" parse="text"/> times.</p>
1229</document>
1230"""
1231
1232XINCLUDE["count.txt"] = "324387"
1233
1234XINCLUDE["C2b.xml"] = """\
1235<?xml version='1.0'?>
1236<document xmlns:xi="http://www.w3.org/2001/XInclude">
1237  <p>This document has been <em>accessed</em>
1238  <xi:include href="count.txt" parse="text"/> times.</p>
1239</document>
1240"""
1241
1242XINCLUDE["C3.xml"] = """\
1243<?xml version='1.0'?>
1244<document xmlns:xi="http://www.w3.org/2001/XInclude">
1245  <p>The following is the source of the "data.xml" resource:</p>
1246  <example><xi:include href="data.xml" parse="text"/></example>
1247</document>
1248"""
1249
1250XINCLUDE["data.xml"] = """\
1251<?xml version='1.0'?>
1252<data>
1253  <item><![CDATA[Brooks & Shields]]></item>
1254</data>
1255"""
1256
1257XINCLUDE["C5.xml"] = """\
1258<?xml version='1.0'?>
1259<div xmlns:xi="http://www.w3.org/2001/XInclude">
1260  <xi:include href="example.txt" parse="text">
1261    <xi:fallback>
1262      <xi:include href="fallback-example.txt" parse="text">
1263        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1264      </xi:include>
1265    </xi:fallback>
1266  </xi:include>
1267</div>
1268"""
1269
1270XINCLUDE["default.xml"] = """\
1271<?xml version='1.0'?>
1272<document xmlns:xi="http://www.w3.org/2001/XInclude">
1273  <p>Example.</p>
1274  <xi:include href="{}"/>
1275</document>
1276""".format(html.escape(SIMPLE_XMLFILE, True))
1277
1278#
1279# badly formatted xi:include tags
1280
1281XINCLUDE_BAD = {}
1282
1283XINCLUDE_BAD["B1.xml"] = """\
1284<?xml version='1.0'?>
1285<document xmlns:xi="http://www.w3.org/2001/XInclude">
1286  <p>120 Mz is adequate for an average home user.</p>
1287  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1288</document>
1289"""
1290
1291XINCLUDE_BAD["B2.xml"] = """\
1292<?xml version='1.0'?>
1293<div xmlns:xi="http://www.w3.org/2001/XInclude">
1294    <xi:fallback></xi:fallback>
1295</div>
1296"""
1297
1298class XIncludeTest(unittest.TestCase):
1299
1300    def xinclude_loader(self, href, parse="xml", encoding=None):
1301        try:
1302            data = XINCLUDE[href]
1303        except KeyError:
1304            raise OSError("resource not found")
1305        if parse == "xml":
1306            data = ET.XML(data)
1307        return data
1308
1309    def none_loader(self, href, parser, encoding=None):
1310        return None
1311
1312    def _my_loader(self, href, parse):
1313        # Used to avoid a test-dependency problem where the default loader
1314        # of ElementInclude uses the pyET parser for cET tests.
1315        if parse == 'xml':
1316            with open(href, 'rb') as f:
1317                return ET.parse(f).getroot()
1318        else:
1319            return None
1320
1321    def test_xinclude_default(self):
1322        from xml.etree import ElementInclude
1323        doc = self.xinclude_loader('default.xml')
1324        ElementInclude.include(doc, self._my_loader)
1325        self.assertEqual(serialize(doc),
1326            '<document>\n'
1327            '  <p>Example.</p>\n'
1328            '  <root>\n'
1329            '   <element key="value">text</element>\n'
1330            '   <element>text</element>tail\n'
1331            '   <empty-element />\n'
1332            '</root>\n'
1333            '</document>')
1334
1335    def test_xinclude(self):
1336        from xml.etree import ElementInclude
1337
1338        # Basic inclusion example (XInclude C.1)
1339        document = self.xinclude_loader("C1.xml")
1340        ElementInclude.include(document, self.xinclude_loader)
1341        self.assertEqual(serialize(document),
1342            '<document>\n'
1343            '  <p>120 Mz is adequate for an average home user.</p>\n'
1344            '  <disclaimer>\n'
1345            '  <p>The opinions represented herein represent those of the individual\n'
1346            '  and should not be interpreted as official policy endorsed by this\n'
1347            '  organization.</p>\n'
1348            '</disclaimer>\n'
1349            '</document>') # C1
1350
1351        # Textual inclusion example (XInclude C.2)
1352        document = self.xinclude_loader("C2.xml")
1353        ElementInclude.include(document, self.xinclude_loader)
1354        self.assertEqual(serialize(document),
1355            '<document>\n'
1356            '  <p>This document has been accessed\n'
1357            '  324387 times.</p>\n'
1358            '</document>') # C2
1359
1360        # Textual inclusion after sibling element (based on modified XInclude C.2)
1361        document = self.xinclude_loader("C2b.xml")
1362        ElementInclude.include(document, self.xinclude_loader)
1363        self.assertEqual(serialize(document),
1364            '<document>\n'
1365            '  <p>This document has been <em>accessed</em>\n'
1366            '  324387 times.</p>\n'
1367            '</document>') # C2b
1368
1369        # Textual inclusion of XML example (XInclude C.3)
1370        document = self.xinclude_loader("C3.xml")
1371        ElementInclude.include(document, self.xinclude_loader)
1372        self.assertEqual(serialize(document),
1373            '<document>\n'
1374            '  <p>The following is the source of the "data.xml" resource:</p>\n'
1375            "  <example>&lt;?xml version='1.0'?&gt;\n"
1376            '&lt;data&gt;\n'
1377            '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1378            '&lt;/data&gt;\n'
1379            '</example>\n'
1380            '</document>') # C3
1381
1382        # Fallback example (XInclude C.5)
1383        # Note! Fallback support is not yet implemented
1384        document = self.xinclude_loader("C5.xml")
1385        with self.assertRaises(OSError) as cm:
1386            ElementInclude.include(document, self.xinclude_loader)
1387        self.assertEqual(str(cm.exception), 'resource not found')
1388        self.assertEqual(serialize(document),
1389            '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1390            '  <ns0:include href="example.txt" parse="text">\n'
1391            '    <ns0:fallback>\n'
1392            '      <ns0:include href="fallback-example.txt" parse="text">\n'
1393            '        <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1394            '      </ns0:include>\n'
1395            '    </ns0:fallback>\n'
1396            '  </ns0:include>\n'
1397            '</div>') # C5
1398
1399    def test_xinclude_failures(self):
1400        from xml.etree import ElementInclude
1401
1402        # Test failure to locate included XML file.
1403        document = ET.XML(XINCLUDE["C1.xml"])
1404        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1405            ElementInclude.include(document, loader=self.none_loader)
1406        self.assertEqual(str(cm.exception),
1407                "cannot load 'disclaimer.xml' as 'xml'")
1408
1409        # Test failure to locate included text file.
1410        document = ET.XML(XINCLUDE["C2.xml"])
1411        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1412            ElementInclude.include(document, loader=self.none_loader)
1413        self.assertEqual(str(cm.exception),
1414                "cannot load 'count.txt' as 'text'")
1415
1416        # Test bad parse type.
1417        document = ET.XML(XINCLUDE_BAD["B1.xml"])
1418        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1419            ElementInclude.include(document, loader=self.none_loader)
1420        self.assertEqual(str(cm.exception),
1421                "unknown parse type in xi:include tag ('BAD_TYPE')")
1422
1423        # Test xi:fallback outside xi:include.
1424        document = ET.XML(XINCLUDE_BAD["B2.xml"])
1425        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1426            ElementInclude.include(document, loader=self.none_loader)
1427        self.assertEqual(str(cm.exception),
1428                "xi:fallback tag must be child of xi:include "
1429                "('{http://www.w3.org/2001/XInclude}fallback')")
1430
1431# --------------------------------------------------------------------
1432# reported bugs
1433
1434class BugsTest(unittest.TestCase):
1435
1436    def test_bug_xmltoolkit21(self):
1437        # marshaller gives obscure errors for non-string values
1438
1439        def check(elem):
1440            with self.assertRaises(TypeError) as cm:
1441                serialize(elem)
1442            self.assertEqual(str(cm.exception),
1443                    'cannot serialize 123 (type int)')
1444
1445        elem = ET.Element(123)
1446        check(elem) # tag
1447
1448        elem = ET.Element("elem")
1449        elem.text = 123
1450        check(elem) # text
1451
1452        elem = ET.Element("elem")
1453        elem.tail = 123
1454        check(elem) # tail
1455
1456        elem = ET.Element("elem")
1457        elem.set(123, "123")
1458        check(elem) # attribute key
1459
1460        elem = ET.Element("elem")
1461        elem.set("123", 123)
1462        check(elem) # attribute value
1463
1464    def test_bug_xmltoolkit25(self):
1465        # typo in ElementTree.findtext
1466
1467        elem = ET.XML(SAMPLE_XML)
1468        tree = ET.ElementTree(elem)
1469        self.assertEqual(tree.findtext("tag"), 'text')
1470        self.assertEqual(tree.findtext("section/tag"), 'subtext')
1471
1472    def test_bug_xmltoolkit28(self):
1473        # .//tag causes exceptions
1474
1475        tree = ET.XML("<doc><table><tbody/></table></doc>")
1476        self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1477        self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
1478
1479    def test_bug_xmltoolkitX1(self):
1480        # dump() doesn't flush the output buffer
1481
1482        tree = ET.XML("<doc><table><tbody/></table></doc>")
1483        with support.captured_stdout() as stdout:
1484            ET.dump(tree)
1485            self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
1486
1487    def test_bug_xmltoolkit39(self):
1488        # non-ascii element and attribute names doesn't work
1489
1490        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1491        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1492
1493        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1494                      b"<tag \xe4ttr='v&#228;lue' />")
1495        self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1496        self.assertEqual(ET.tostring(tree, "utf-8"),
1497                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1498
1499        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1500                      b'<t\xe4g>text</t\xe4g>')
1501        self.assertEqual(ET.tostring(tree, "utf-8"),
1502                b'<t\xc3\xa4g>text</t\xc3\xa4g>')
1503
1504        tree = ET.Element("t\u00e4g")
1505        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1506
1507        tree = ET.Element("tag")
1508        tree.set("\u00e4ttr", "v\u00e4lue")
1509        self.assertEqual(ET.tostring(tree, "utf-8"),
1510                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1511
1512    def test_bug_xmltoolkit54(self):
1513        # problems handling internally defined entities
1514
1515        e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
1516                   '<doc>&ldots;</doc>')
1517        self.assertEqual(serialize(e, encoding="us-ascii"),
1518                b'<doc>&#33328;</doc>')
1519        self.assertEqual(serialize(e), '<doc>\u8230</doc>')
1520
1521    def test_bug_xmltoolkit55(self):
1522        # make sure we're reporting the first error, not the last
1523
1524        with self.assertRaises(ET.ParseError) as cm:
1525            ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1526                   b'<doc>&ldots;&ndots;&rdots;</doc>')
1527        self.assertEqual(str(cm.exception),
1528                'undefined entity &ldots;: line 1, column 36')
1529
1530    def test_bug_xmltoolkit60(self):
1531        # Handle crash in stream source.
1532
1533        class ExceptionFile:
1534            def read(self, x):
1535                raise OSError
1536
1537        self.assertRaises(OSError, ET.parse, ExceptionFile())
1538
1539    def test_bug_xmltoolkit62(self):
1540        # Don't crash when using custom entities.
1541
1542        ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
1543        parser = ET.XMLParser()
1544        parser.entity.update(ENTITIES)
1545        parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
1546<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1547<patent-application-publication>
1548<subdoc-abstract>
1549<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1550</subdoc-abstract>
1551</patent-application-publication>""")
1552        t = parser.close()
1553        self.assertEqual(t.find('.//paragraph').text,
1554            'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
1555
1556    @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
1557    def test_bug_xmltoolkit63(self):
1558        # Check reference leak.
1559        def xmltoolkit63():
1560            tree = ET.TreeBuilder()
1561            tree.start("tag", {})
1562            tree.data("text")
1563            tree.end("tag")
1564
1565        xmltoolkit63()
1566        count = sys.getrefcount(None)
1567        for i in range(1000):
1568            xmltoolkit63()
1569        self.assertEqual(sys.getrefcount(None), count)
1570
1571    def test_bug_200708_newline(self):
1572        # Preserve newlines in attributes.
1573
1574        e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1575        self.assertEqual(ET.tostring(e),
1576                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
1577        self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1578                'def _f():\n  return 3\n')
1579        self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1580                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
1581
1582    def test_bug_200708_close(self):
1583        # Test default builder.
1584        parser = ET.XMLParser() # default
1585        parser.feed("<element>some text</element>")
1586        self.assertEqual(parser.close().tag, 'element')
1587
1588        # Test custom builder.
1589        class EchoTarget:
1590            def close(self):
1591                return ET.Element("element") # simulate root
1592        parser = ET.XMLParser(target=EchoTarget())
1593        parser.feed("<element>some text</element>")
1594        self.assertEqual(parser.close().tag, 'element')
1595
1596    def test_bug_200709_default_namespace(self):
1597        e = ET.Element("{default}elem")
1598        s = ET.SubElement(e, "{default}elem")
1599        self.assertEqual(serialize(e, default_namespace="default"), # 1
1600                '<elem xmlns="default"><elem /></elem>')
1601
1602        e = ET.Element("{default}elem")
1603        s = ET.SubElement(e, "{default}elem")
1604        s = ET.SubElement(e, "{not-default}elem")
1605        self.assertEqual(serialize(e, default_namespace="default"), # 2
1606            '<elem xmlns="default" xmlns:ns1="not-default">'
1607            '<elem />'
1608            '<ns1:elem />'
1609            '</elem>')
1610
1611        e = ET.Element("{default}elem")
1612        s = ET.SubElement(e, "{default}elem")
1613        s = ET.SubElement(e, "elem") # unprefixed name
1614        with self.assertRaises(ValueError) as cm:
1615            serialize(e, default_namespace="default") # 3
1616        self.assertEqual(str(cm.exception),
1617                'cannot use non-qualified names with default_namespace option')
1618
1619    def test_bug_200709_register_namespace(self):
1620        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1621        self.assertEqual(ET.tostring(e),
1622            b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1623        ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1624        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1625        self.assertEqual(ET.tostring(e),
1626            b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
1627
1628        # And the Dublin Core namespace is in the default list:
1629
1630        e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1631        self.assertEqual(ET.tostring(e),
1632            b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
1633
1634    def test_bug_200709_element_comment(self):
1635        # Not sure if this can be fixed, really (since the serializer needs
1636        # ET.Comment, not cET.comment).
1637
1638        a = ET.Element('a')
1639        a.append(ET.Comment('foo'))
1640        self.assertEqual(a[0].tag, ET.Comment)
1641
1642        a = ET.Element('a')
1643        a.append(ET.PI('foo'))
1644        self.assertEqual(a[0].tag, ET.PI)
1645
1646    def test_bug_200709_element_insert(self):
1647        a = ET.Element('a')
1648        b = ET.SubElement(a, 'b')
1649        c = ET.SubElement(a, 'c')
1650        d = ET.Element('d')
1651        a.insert(0, d)
1652        self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1653        a.insert(-1, d)
1654        self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
1655
1656    def test_bug_200709_iter_comment(self):
1657        a = ET.Element('a')
1658        b = ET.SubElement(a, 'b')
1659        comment_b = ET.Comment("TEST-b")
1660        b.append(comment_b)
1661        self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
1662
1663    # --------------------------------------------------------------------
1664    # reported on bugs.python.org
1665
1666    def test_bug_1534630(self):
1667        bob = ET.TreeBuilder()
1668        e = bob.data("data")
1669        e = bob.start("tag", {})
1670        e = bob.end("tag")
1671        e = bob.close()
1672        self.assertEqual(serialize(e), '<tag />')
1673
1674    def test_issue6233(self):
1675        e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1676                   b'<body>t\xc3\xa3g</body>')
1677        self.assertEqual(ET.tostring(e, 'ascii'),
1678                b"<?xml version='1.0' encoding='ascii'?>\n"
1679                b'<body>t&#227;g</body>')
1680        e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1681                   b'<body>t\xe3g</body>')
1682        self.assertEqual(ET.tostring(e, 'ascii'),
1683                b"<?xml version='1.0' encoding='ascii'?>\n"
1684                b'<body>t&#227;g</body>')
1685
1686    def test_issue3151(self):
1687        e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1688        self.assertEqual(e.tag, '{${stuff}}localname')
1689        t = ET.ElementTree(e)
1690        self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
1691
1692    def test_issue6565(self):
1693        elem = ET.XML("<body><tag/></body>")
1694        self.assertEqual(summarize_list(elem), ['tag'])
1695        newelem = ET.XML(SAMPLE_XML)
1696        elem[:] = newelem[:]
1697        self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
1698
1699    def test_issue10777(self):
1700        # Registering a namespace twice caused a "dictionary changed size during
1701        # iteration" bug.
1702
1703        ET.register_namespace('test10777', 'http://myuri/')
1704        ET.register_namespace('test10777', 'http://myuri/')
1705
1706    def test_lost_text(self):
1707        # Issue #25902: Borrowed text can disappear
1708        class Text:
1709            def __bool__(self):
1710                e.text = 'changed'
1711                return True
1712
1713        e = ET.Element('tag')
1714        e.text = Text()
1715        i = e.itertext()
1716        t = next(i)
1717        self.assertIsInstance(t, Text)
1718        self.assertIsInstance(e.text, str)
1719        self.assertEqual(e.text, 'changed')
1720
1721    def test_lost_tail(self):
1722        # Issue #25902: Borrowed tail can disappear
1723        class Text:
1724            def __bool__(self):
1725                e[0].tail = 'changed'
1726                return True
1727
1728        e = ET.Element('root')
1729        e.append(ET.Element('tag'))
1730        e[0].tail = Text()
1731        i = e.itertext()
1732        t = next(i)
1733        self.assertIsInstance(t, Text)
1734        self.assertIsInstance(e[0].tail, str)
1735        self.assertEqual(e[0].tail, 'changed')
1736
1737    def test_lost_elem(self):
1738        # Issue #25902: Borrowed element can disappear
1739        class Tag:
1740            def __eq__(self, other):
1741                e[0] = ET.Element('changed')
1742                next(i)
1743                return True
1744
1745        e = ET.Element('root')
1746        e.append(ET.Element(Tag()))
1747        e.append(ET.Element('tag'))
1748        i = e.iter('tag')
1749        try:
1750            t = next(i)
1751        except ValueError:
1752            self.skipTest('generators are not reentrant')
1753        self.assertIsInstance(t.tag, Tag)
1754        self.assertIsInstance(e[0].tag, str)
1755        self.assertEqual(e[0].tag, 'changed')
1756
1757    def check_expat224_utf8_bug(self, text):
1758        xml = b'<a b="%s"/>' % text
1759        root = ET.XML(xml)
1760        self.assertEqual(root.get('b'), text.decode('utf-8'))
1761
1762    def test_expat224_utf8_bug(self):
1763        # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
1764        # Check that Expat 2.2.4 fixed the bug.
1765        #
1766        # Test buffer bounds at odd and even positions.
1767
1768        text = b'\xc3\xa0' * 1024
1769        self.check_expat224_utf8_bug(text)
1770
1771        text = b'x' + b'\xc3\xa0' * 1024
1772        self.check_expat224_utf8_bug(text)
1773
1774    def test_expat224_utf8_bug_file(self):
1775        with open(UTF8_BUG_XMLFILE, 'rb') as fp:
1776            raw = fp.read()
1777        root = ET.fromstring(raw)
1778        xmlattr = root.get('b')
1779
1780        # "Parse" manually the XML file to extract the value of the 'b'
1781        # attribute of the <a b='xxx' /> XML element
1782        text = raw.decode('utf-8').strip()
1783        text = text.replace('\r\n', ' ')
1784        text = text[6:-4]
1785        self.assertEqual(root.get('b'), text)
1786
1787
1788
1789# --------------------------------------------------------------------
1790
1791
1792class BasicElementTest(ElementTestCase, unittest.TestCase):
1793    def test_augmentation_type_errors(self):
1794        e = ET.Element('joe')
1795        self.assertRaises(TypeError, e.append, 'b')
1796        self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
1797        self.assertRaises(TypeError, e.insert, 0, 'foo')
1798
1799    def test_cyclic_gc(self):
1800        class Dummy:
1801            pass
1802
1803        # Test the shortest cycle: d->element->d
1804        d = Dummy()
1805        d.dummyref = ET.Element('joe', attr=d)
1806        wref = weakref.ref(d)
1807        del d
1808        gc_collect()
1809        self.assertIsNone(wref())
1810
1811        # A longer cycle: d->e->e2->d
1812        e = ET.Element('joe')
1813        d = Dummy()
1814        d.dummyref = e
1815        wref = weakref.ref(d)
1816        e2 = ET.SubElement(e, 'foo', attr=d)
1817        del d, e, e2
1818        gc_collect()
1819        self.assertIsNone(wref())
1820
1821        # A cycle between Element objects as children of one another
1822        # e1->e2->e3->e1
1823        e1 = ET.Element('e1')
1824        e2 = ET.Element('e2')
1825        e3 = ET.Element('e3')
1826        e1.append(e2)
1827        e2.append(e2)
1828        e3.append(e1)
1829        wref = weakref.ref(e1)
1830        del e1, e2, e3
1831        gc_collect()
1832        self.assertIsNone(wref())
1833
1834    def test_weakref(self):
1835        flag = False
1836        def wref_cb(w):
1837            nonlocal flag
1838            flag = True
1839        e = ET.Element('e')
1840        wref = weakref.ref(e, wref_cb)
1841        self.assertEqual(wref().tag, 'e')
1842        del e
1843        self.assertEqual(flag, True)
1844        self.assertEqual(wref(), None)
1845
1846    def test_get_keyword_args(self):
1847        e1 = ET.Element('foo' , x=1, y=2, z=3)
1848        self.assertEqual(e1.get('x', default=7), 1)
1849        self.assertEqual(e1.get('w', default=7), 7)
1850
1851    def test_pickle(self):
1852        # issue #16076: the C implementation wasn't pickleable.
1853        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1854            for dumper, loader in product(self.modules, repeat=2):
1855                e = dumper.Element('foo', bar=42)
1856                e.text = "text goes here"
1857                e.tail = "opposite of head"
1858                dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
1859                e.append(dumper.Element('child'))
1860                e.findall('.//grandchild')[0].set('attr', 'other value')
1861
1862                e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
1863                                          dumper, loader, proto)
1864
1865                self.assertEqual(e2.tag, 'foo')
1866                self.assertEqual(e2.attrib['bar'], 42)
1867                self.assertEqual(len(e2), 2)
1868                self.assertEqualElements(e, e2)
1869
1870    def test_pickle_issue18997(self):
1871        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
1872            for dumper, loader in product(self.modules, repeat=2):
1873                XMLTEXT = """<?xml version="1.0"?>
1874                    <group><dogs>4</dogs>
1875                    </group>"""
1876                e1 = dumper.fromstring(XMLTEXT)
1877                if hasattr(e1, '__getstate__'):
1878                    self.assertEqual(e1.__getstate__()['tag'], 'group')
1879                e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
1880                                          dumper, loader, proto)
1881                self.assertEqual(e2.tag, 'group')
1882                self.assertEqual(e2[0].tag, 'dogs')
1883
1884
1885class BadElementTest(ElementTestCase, unittest.TestCase):
1886    def test_extend_mutable_list(self):
1887        class X:
1888            @property
1889            def __class__(self):
1890                L[:] = [ET.Element('baz')]
1891                return ET.Element
1892        L = [X()]
1893        e = ET.Element('foo')
1894        try:
1895            e.extend(L)
1896        except TypeError:
1897            pass
1898
1899        class Y(X, ET.Element):
1900            pass
1901        L = [Y('x')]
1902        e = ET.Element('foo')
1903        e.extend(L)
1904
1905    def test_extend_mutable_list2(self):
1906        class X:
1907            @property
1908            def __class__(self):
1909                del L[:]
1910                return ET.Element
1911        L = [X(), ET.Element('baz')]
1912        e = ET.Element('foo')
1913        try:
1914            e.extend(L)
1915        except TypeError:
1916            pass
1917
1918        class Y(X, ET.Element):
1919            pass
1920        L = [Y('bar'), ET.Element('baz')]
1921        e = ET.Element('foo')
1922        e.extend(L)
1923
1924    def test_remove_with_mutating(self):
1925        class X(ET.Element):
1926            def __eq__(self, o):
1927                del e[:]
1928                return False
1929        e = ET.Element('foo')
1930        e.extend([X('bar')])
1931        self.assertRaises(ValueError, e.remove, ET.Element('baz'))
1932
1933        e = ET.Element('foo')
1934        e.extend([ET.Element('bar')])
1935        self.assertRaises(ValueError, e.remove, X('baz'))
1936
1937    def test_recursive_repr(self):
1938        # Issue #25455
1939        e = ET.Element('foo')
1940        with swap_attr(e, 'tag', e):
1941            with self.assertRaises(RuntimeError):
1942                repr(e)  # Should not crash
1943
1944    def test_element_get_text(self):
1945        # Issue #27863
1946        class X(str):
1947            def __del__(self):
1948                try:
1949                    elem.text
1950                except NameError:
1951                    pass
1952
1953        b = ET.TreeBuilder()
1954        b.start('tag', {})
1955        b.data('ABCD')
1956        b.data(X('EFGH'))
1957        b.data('IJKL')
1958        b.end('tag')
1959
1960        elem = b.close()
1961        self.assertEqual(elem.text, 'ABCDEFGHIJKL')
1962
1963    def test_element_get_tail(self):
1964        # Issue #27863
1965        class X(str):
1966            def __del__(self):
1967                try:
1968                    elem[0].tail
1969                except NameError:
1970                    pass
1971
1972        b = ET.TreeBuilder()
1973        b.start('root', {})
1974        b.start('tag', {})
1975        b.end('tag')
1976        b.data('ABCD')
1977        b.data(X('EFGH'))
1978        b.data('IJKL')
1979        b.end('root')
1980
1981        elem = b.close()
1982        self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
1983
1984    def test_element_iter(self):
1985        # Issue #27863
1986        state = {
1987            'tag': 'tag',
1988            '_children': [None],  # non-Element
1989            'attrib': 'attr',
1990            'tail': 'tail',
1991            'text': 'text',
1992        }
1993
1994        e = ET.Element('tag')
1995        try:
1996            e.__setstate__(state)
1997        except AttributeError:
1998            e.__dict__ = state
1999
2000        it = e.iter()
2001        self.assertIs(next(it), e)
2002        self.assertRaises(AttributeError, next, it)
2003
2004    def test_subscr(self):
2005        # Issue #27863
2006        class X:
2007            def __index__(self):
2008                del e[:]
2009                return 1
2010
2011        e = ET.Element('elem')
2012        e.append(ET.Element('child'))
2013        e[:X()]  # shouldn't crash
2014
2015        e.append(ET.Element('child'))
2016        e[0:10:X()]  # shouldn't crash
2017
2018    def test_ass_subscr(self):
2019        # Issue #27863
2020        class X:
2021            def __index__(self):
2022                e[:] = []
2023                return 1
2024
2025        e = ET.Element('elem')
2026        for _ in range(10):
2027            e.insert(0, ET.Element('child'))
2028
2029        e[0:10:X()] = []  # shouldn't crash
2030
2031    def test_treebuilder_start(self):
2032        # Issue #27863
2033        def element_factory(x, y):
2034            return []
2035        b = ET.TreeBuilder(element_factory=element_factory)
2036
2037        b.start('tag', {})
2038        b.data('ABCD')
2039        self.assertRaises(AttributeError, b.start, 'tag2', {})
2040        del b
2041        gc_collect()
2042
2043    def test_treebuilder_end(self):
2044        # Issue #27863
2045        def element_factory(x, y):
2046            return []
2047        b = ET.TreeBuilder(element_factory=element_factory)
2048
2049        b.start('tag', {})
2050        b.data('ABCD')
2051        self.assertRaises(AttributeError, b.end, 'tag')
2052        del b
2053        gc_collect()
2054
2055
2056class MutatingElementPath(str):
2057    def __new__(cls, elem, *args):
2058        self = str.__new__(cls, *args)
2059        self.elem = elem
2060        return self
2061    def __eq__(self, o):
2062        del self.elem[:]
2063        return True
2064MutatingElementPath.__hash__ = str.__hash__
2065
2066class BadElementPath(str):
2067    def __eq__(self, o):
2068        raise 1/0
2069BadElementPath.__hash__ = str.__hash__
2070
2071class BadElementPathTest(ElementTestCase, unittest.TestCase):
2072    def setUp(self):
2073        super().setUp()
2074        from xml.etree import ElementPath
2075        self.path_cache = ElementPath._cache
2076        ElementPath._cache = {}
2077
2078    def tearDown(self):
2079        from xml.etree import ElementPath
2080        ElementPath._cache = self.path_cache
2081        super().tearDown()
2082
2083    def test_find_with_mutating(self):
2084        e = ET.Element('foo')
2085        e.extend([ET.Element('bar')])
2086        e.find(MutatingElementPath(e, 'x'))
2087
2088    def test_find_with_error(self):
2089        e = ET.Element('foo')
2090        e.extend([ET.Element('bar')])
2091        try:
2092            e.find(BadElementPath('x'))
2093        except ZeroDivisionError:
2094            pass
2095
2096    def test_findtext_with_mutating(self):
2097        e = ET.Element('foo')
2098        e.extend([ET.Element('bar')])
2099        e.findtext(MutatingElementPath(e, 'x'))
2100
2101    def test_findtext_with_error(self):
2102        e = ET.Element('foo')
2103        e.extend([ET.Element('bar')])
2104        try:
2105            e.findtext(BadElementPath('x'))
2106        except ZeroDivisionError:
2107            pass
2108
2109    def test_findall_with_mutating(self):
2110        e = ET.Element('foo')
2111        e.extend([ET.Element('bar')])
2112        e.findall(MutatingElementPath(e, 'x'))
2113
2114    def test_findall_with_error(self):
2115        e = ET.Element('foo')
2116        e.extend([ET.Element('bar')])
2117        try:
2118            e.findall(BadElementPath('x'))
2119        except ZeroDivisionError:
2120            pass
2121
2122
2123class ElementTreeTypeTest(unittest.TestCase):
2124    def test_istype(self):
2125        self.assertIsInstance(ET.ParseError, type)
2126        self.assertIsInstance(ET.QName, type)
2127        self.assertIsInstance(ET.ElementTree, type)
2128        self.assertIsInstance(ET.Element, type)
2129        self.assertIsInstance(ET.TreeBuilder, type)
2130        self.assertIsInstance(ET.XMLParser, type)
2131
2132    def test_Element_subclass_trivial(self):
2133        class MyElement(ET.Element):
2134            pass
2135
2136        mye = MyElement('foo')
2137        self.assertIsInstance(mye, ET.Element)
2138        self.assertIsInstance(mye, MyElement)
2139        self.assertEqual(mye.tag, 'foo')
2140
2141        # test that attribute assignment works (issue 14849)
2142        mye.text = "joe"
2143        self.assertEqual(mye.text, "joe")
2144
2145    def test_Element_subclass_constructor(self):
2146        class MyElement(ET.Element):
2147            def __init__(self, tag, attrib={}, **extra):
2148                super(MyElement, self).__init__(tag + '__', attrib, **extra)
2149
2150        mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2151        self.assertEqual(mye.tag, 'foo__')
2152        self.assertEqual(sorted(mye.items()),
2153            [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2154
2155    def test_Element_subclass_new_method(self):
2156        class MyElement(ET.Element):
2157            def newmethod(self):
2158                return self.tag
2159
2160        mye = MyElement('joe')
2161        self.assertEqual(mye.newmethod(), 'joe')
2162
2163    def test_Element_subclass_find(self):
2164        class MyElement(ET.Element):
2165            pass
2166
2167        e = ET.Element('foo')
2168        e.text = 'text'
2169        sub = MyElement('bar')
2170        sub.text = 'subtext'
2171        e.append(sub)
2172        self.assertEqual(e.findtext('bar'), 'subtext')
2173        self.assertEqual(e.find('bar').tag, 'bar')
2174        found = list(e.findall('bar'))
2175        self.assertEqual(len(found), 1, found)
2176        self.assertEqual(found[0].tag, 'bar')
2177
2178
2179class ElementFindTest(unittest.TestCase):
2180    def test_find_simple(self):
2181        e = ET.XML(SAMPLE_XML)
2182        self.assertEqual(e.find('tag').tag, 'tag')
2183        self.assertEqual(e.find('section/tag').tag, 'tag')
2184        self.assertEqual(e.find('./tag').tag, 'tag')
2185
2186        e[2] = ET.XML(SAMPLE_SECTION)
2187        self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2188
2189        self.assertEqual(e.findtext('./tag'), 'text')
2190        self.assertEqual(e.findtext('section/tag'), 'subtext')
2191
2192        # section/nexttag is found but has no text
2193        self.assertEqual(e.findtext('section/nexttag'), '')
2194        self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2195
2196        # tog doesn't exist and 'default' kicks in
2197        self.assertIsNone(e.findtext('tog'))
2198        self.assertEqual(e.findtext('tog', 'default'), 'default')
2199
2200        # Issue #16922
2201        self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2202
2203    def test_find_xpath(self):
2204        LINEAR_XML = '''
2205        <body>
2206            <tag class='a'/>
2207            <tag class='b'/>
2208            <tag class='c'/>
2209            <tag class='d'/>
2210        </body>'''
2211        e = ET.XML(LINEAR_XML)
2212
2213        # Test for numeric indexing and last()
2214        self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2215        self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2216        self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2217        self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2218        self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2219
2220        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2221        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2222        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2223        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2224
2225    def test_findall(self):
2226        e = ET.XML(SAMPLE_XML)
2227        e[2] = ET.XML(SAMPLE_SECTION)
2228        self.assertEqual(summarize_list(e.findall('.')), ['body'])
2229        self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2230        self.assertEqual(summarize_list(e.findall('tog')), [])
2231        self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2232        self.assertEqual(summarize_list(e.findall('*')),
2233            ['tag', 'tag', 'section'])
2234        self.assertEqual(summarize_list(e.findall('.//tag')),
2235            ['tag'] * 4)
2236        self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2237        self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2238        self.assertEqual(summarize_list(e.findall('section/*')),
2239            ['tag', 'nexttag', 'nextsection'])
2240        self.assertEqual(summarize_list(e.findall('section//*')),
2241            ['tag', 'nexttag', 'nextsection', 'tag'])
2242        self.assertEqual(summarize_list(e.findall('section/.//*')),
2243            ['tag', 'nexttag', 'nextsection', 'tag'])
2244        self.assertEqual(summarize_list(e.findall('*/*')),
2245            ['tag', 'nexttag', 'nextsection'])
2246        self.assertEqual(summarize_list(e.findall('*//*')),
2247            ['tag', 'nexttag', 'nextsection', 'tag'])
2248        self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
2249        self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
2250        self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
2251        self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
2252
2253        self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
2254            ['tag'] * 3)
2255        self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
2256            ['tag'])
2257        self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
2258            ['tag'] * 2)
2259        self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
2260            ['tag'])
2261        self.assertEqual(summarize_list(e.findall('.//section[tag]')),
2262            ['section'])
2263        self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
2264        self.assertEqual(summarize_list(e.findall('../tag')), [])
2265        self.assertEqual(summarize_list(e.findall('section/../tag')),
2266            ['tag'] * 2)
2267        self.assertEqual(e.findall('section//'), e.findall('section//*'))
2268
2269        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2270            ['section'])
2271        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2272            ['section'])
2273        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2274            ['section'])
2275        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2276            ['section'])
2277        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2278            ['section'])
2279
2280        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2281                         ['tag'])
2282        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2283                         ['tag'])
2284        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2285                         ['tag'])
2286        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2287                         ['tag'])
2288        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2289                         ['tag'])
2290        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2291                         [])
2292        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2293                         [])
2294
2295        # duplicate section => 2x tag matches
2296        e[1] = e[2]
2297        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2298                         ['section', 'section'])
2299        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2300                         ['tag', 'tag'])
2301
2302    def test_test_find_with_ns(self):
2303        e = ET.XML(SAMPLE_XML_NS)
2304        self.assertEqual(summarize_list(e.findall('tag')), [])
2305        self.assertEqual(
2306            summarize_list(e.findall("{http://effbot.org/ns}tag")),
2307            ['{http://effbot.org/ns}tag'] * 2)
2308        self.assertEqual(
2309            summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
2310            ['{http://effbot.org/ns}tag'] * 3)
2311
2312    def test_findall_different_nsmaps(self):
2313        root = ET.XML('''
2314            <a xmlns:x="X" xmlns:y="Y">
2315                <x:b><c/></x:b>
2316                <b/>
2317                <c><x:b/><b/></c><y:b/>
2318            </a>''')
2319        nsmap = {'xx': 'X'}
2320        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2321        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2322        nsmap = {'xx': 'Y'}
2323        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2324        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2325
2326    def test_bad_find(self):
2327        e = ET.XML(SAMPLE_XML)
2328        with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
2329            e.findall('/tag')
2330
2331    def test_find_through_ElementTree(self):
2332        e = ET.XML(SAMPLE_XML)
2333        self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
2334        self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
2335        self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
2336            ['tag'] * 2)
2337        # this produces a warning
2338        msg = ("This search is broken in 1.3 and earlier, and will be fixed "
2339               "in a future version.  If you rely on the current behaviour, "
2340               "change it to '.+'")
2341        with self.assertWarnsRegex(FutureWarning, msg):
2342            it = ET.ElementTree(e).findall('//tag')
2343        self.assertEqual(summarize_list(it), ['tag'] * 3)
2344
2345
2346class ElementIterTest(unittest.TestCase):
2347    def _ilist(self, elem, tag=None):
2348        return summarize_list(elem.iter(tag))
2349
2350    def test_basic(self):
2351        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
2352        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
2353        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
2354        self.assertEqual(next(doc.iter()).tag, 'html')
2355        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
2356        self.assertEqual(''.join(doc.find('body').itertext()),
2357            'this is a paragraph.')
2358        self.assertEqual(next(doc.itertext()), 'this is a ')
2359
2360        # iterparse should return an iterator
2361        sourcefile = serialize(doc, to_string=False)
2362        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
2363
2364        # With an explicit parser too (issue #9708)
2365        sourcefile = serialize(doc, to_string=False)
2366        parser = ET.XMLParser(target=ET.TreeBuilder())
2367        self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
2368                         'end')
2369
2370        tree = ET.ElementTree(None)
2371        self.assertRaises(AttributeError, tree.iter)
2372
2373        # Issue #16913
2374        doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
2375        self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
2376
2377    def test_corners(self):
2378        # single root, no subelements
2379        a = ET.Element('a')
2380        self.assertEqual(self._ilist(a), ['a'])
2381
2382        # one child
2383        b = ET.SubElement(a, 'b')
2384        self.assertEqual(self._ilist(a), ['a', 'b'])
2385
2386        # one child and one grandchild
2387        c = ET.SubElement(b, 'c')
2388        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
2389
2390        # two children, only first with grandchild
2391        d = ET.SubElement(a, 'd')
2392        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
2393
2394        # replace first child by second
2395        a[0] = a[1]
2396        del a[1]
2397        self.assertEqual(self._ilist(a), ['a', 'd'])
2398
2399    def test_iter_by_tag(self):
2400        doc = ET.XML('''
2401            <document>
2402                <house>
2403                    <room>bedroom1</room>
2404                    <room>bedroom2</room>
2405                </house>
2406                <shed>nothing here
2407                </shed>
2408                <house>
2409                    <room>bedroom8</room>
2410                </house>
2411            </document>''')
2412
2413        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
2414        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
2415
2416        # test that iter also accepts 'tag' as a keyword arg
2417        self.assertEqual(
2418            summarize_list(doc.iter(tag='room')),
2419            ['room'] * 3)
2420
2421        # make sure both tag=None and tag='*' return all tags
2422        all_tags = ['document', 'house', 'room', 'room',
2423                    'shed', 'house', 'room']
2424        self.assertEqual(summarize_list(doc.iter()), all_tags)
2425        self.assertEqual(self._ilist(doc), all_tags)
2426        self.assertEqual(self._ilist(doc, '*'), all_tags)
2427
2428    # Element.getiterator() is deprecated.
2429    @checkwarnings(("This method will be removed in future versions.  "
2430                    "Use .+ instead.", PendingDeprecationWarning))
2431    def test_getiterator(self):
2432        doc = ET.XML('''
2433            <document>
2434                <house>
2435                    <room>bedroom1</room>
2436                    <room>bedroom2</room>
2437                </house>
2438                <shed>nothing here
2439                </shed>
2440                <house>
2441                    <room>bedroom8</room>
2442                </house>
2443            </document>''')
2444
2445        self.assertEqual(summarize_list(doc.getiterator('room')),
2446                         ['room'] * 3)
2447        self.assertEqual(summarize_list(doc.getiterator('house')),
2448                         ['house'] * 2)
2449
2450        # test that getiterator also accepts 'tag' as a keyword arg
2451        self.assertEqual(
2452            summarize_list(doc.getiterator(tag='room')),
2453            ['room'] * 3)
2454
2455        # make sure both tag=None and tag='*' return all tags
2456        all_tags = ['document', 'house', 'room', 'room',
2457                    'shed', 'house', 'room']
2458        self.assertEqual(summarize_list(doc.getiterator()), all_tags)
2459        self.assertEqual(summarize_list(doc.getiterator(None)), all_tags)
2460        self.assertEqual(summarize_list(doc.getiterator('*')), all_tags)
2461
2462    def test_copy(self):
2463        a = ET.Element('a')
2464        it = a.iter()
2465        with self.assertRaises(TypeError):
2466            copy.copy(it)
2467
2468    def test_pickle(self):
2469        a = ET.Element('a')
2470        it = a.iter()
2471        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2472            with self.assertRaises((TypeError, pickle.PicklingError)):
2473                pickle.dumps(it, proto)
2474
2475
2476class TreeBuilderTest(unittest.TestCase):
2477    sample1 = ('<!DOCTYPE html PUBLIC'
2478        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2479        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2480        '<html>text<div>subtext</div>tail</html>')
2481
2482    sample2 = '''<toplevel>sometext</toplevel>'''
2483
2484    def _check_sample1_element(self, e):
2485        self.assertEqual(e.tag, 'html')
2486        self.assertEqual(e.text, 'text')
2487        self.assertEqual(e.tail, None)
2488        self.assertEqual(e.attrib, {})
2489        children = list(e)
2490        self.assertEqual(len(children), 1)
2491        child = children[0]
2492        self.assertEqual(child.tag, 'div')
2493        self.assertEqual(child.text, 'subtext')
2494        self.assertEqual(child.tail, 'tail')
2495        self.assertEqual(child.attrib, {})
2496
2497    def test_dummy_builder(self):
2498        class BaseDummyBuilder:
2499            def close(self):
2500                return 42
2501
2502        class DummyBuilder(BaseDummyBuilder):
2503            data = start = end = lambda *a: None
2504
2505        parser = ET.XMLParser(target=DummyBuilder())
2506        parser.feed(self.sample1)
2507        self.assertEqual(parser.close(), 42)
2508
2509        parser = ET.XMLParser(target=BaseDummyBuilder())
2510        parser.feed(self.sample1)
2511        self.assertEqual(parser.close(), 42)
2512
2513        parser = ET.XMLParser(target=object())
2514        parser.feed(self.sample1)
2515        self.assertIsNone(parser.close())
2516
2517    def test_treebuilder_elementfactory_none(self):
2518        parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
2519        parser.feed(self.sample1)
2520        e = parser.close()
2521        self._check_sample1_element(e)
2522
2523    def test_subclass(self):
2524        class MyTreeBuilder(ET.TreeBuilder):
2525            def foobar(self, x):
2526                return x * 2
2527
2528        tb = MyTreeBuilder()
2529        self.assertEqual(tb.foobar(10), 20)
2530
2531        parser = ET.XMLParser(target=tb)
2532        parser.feed(self.sample1)
2533
2534        e = parser.close()
2535        self._check_sample1_element(e)
2536
2537    def test_element_factory(self):
2538        lst = []
2539        def myfactory(tag, attrib):
2540            nonlocal lst
2541            lst.append(tag)
2542            return ET.Element(tag, attrib)
2543
2544        tb = ET.TreeBuilder(element_factory=myfactory)
2545        parser = ET.XMLParser(target=tb)
2546        parser.feed(self.sample2)
2547        parser.close()
2548
2549        self.assertEqual(lst, ['toplevel'])
2550
2551    def _check_element_factory_class(self, cls):
2552        tb = ET.TreeBuilder(element_factory=cls)
2553
2554        parser = ET.XMLParser(target=tb)
2555        parser.feed(self.sample1)
2556        e = parser.close()
2557        self.assertIsInstance(e, cls)
2558        self._check_sample1_element(e)
2559
2560    def test_element_factory_subclass(self):
2561        class MyElement(ET.Element):
2562            pass
2563        self._check_element_factory_class(MyElement)
2564
2565    def test_element_factory_pure_python_subclass(self):
2566        # Mimick SimpleTAL's behaviour (issue #16089): both versions of
2567        # TreeBuilder should be able to cope with a subclass of the
2568        # pure Python Element class.
2569        base = ET._Element_Py
2570        # Not from a C extension
2571        self.assertEqual(base.__module__, 'xml.etree.ElementTree')
2572        # Force some multiple inheritance with a C class to make things
2573        # more interesting.
2574        class MyElement(base, ValueError):
2575            pass
2576        self._check_element_factory_class(MyElement)
2577
2578    def test_doctype(self):
2579        class DoctypeParser:
2580            _doctype = None
2581
2582            def doctype(self, name, pubid, system):
2583                self._doctype = (name, pubid, system)
2584
2585            def close(self):
2586                return self._doctype
2587
2588        parser = ET.XMLParser(target=DoctypeParser())
2589        parser.feed(self.sample1)
2590
2591        self.assertEqual(parser.close(),
2592            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2593             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2594
2595    def test_builder_lookup_errors(self):
2596        class RaisingBuilder:
2597            def __init__(self, raise_in=None, what=ValueError):
2598                self.raise_in = raise_in
2599                self.what = what
2600
2601            def __getattr__(self, name):
2602                if name == self.raise_in:
2603                    raise self.what(self.raise_in)
2604                def handle(*args):
2605                    pass
2606                return handle
2607
2608        ET.XMLParser(target=RaisingBuilder())
2609        # cET also checks for 'close' and 'doctype', PyET does it only at need
2610        for event in ('start', 'data', 'end', 'comment', 'pi'):
2611            with self.assertRaisesRegex(ValueError, event):
2612                ET.XMLParser(target=RaisingBuilder(event))
2613
2614        ET.XMLParser(target=RaisingBuilder(what=AttributeError))
2615        for event in ('start', 'data', 'end', 'comment', 'pi'):
2616            parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
2617            parser.feed(self.sample1)
2618            self.assertIsNone(parser.close())
2619
2620
2621class XMLParserTest(unittest.TestCase):
2622    sample1 = b'<file><line>22</line></file>'
2623    sample2 = (b'<!DOCTYPE html PUBLIC'
2624        b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2625        b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2626        b'<html>text</html>')
2627    sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
2628        '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
2629
2630    def _check_sample_element(self, e):
2631        self.assertEqual(e.tag, 'file')
2632        self.assertEqual(e[0].tag, 'line')
2633        self.assertEqual(e[0].text, '22')
2634
2635    def test_constructor_args(self):
2636        # Positional args. The first (html) is not supported, but should be
2637        # nevertheless correctly accepted.
2638        with self.assertWarnsRegex(DeprecationWarning, r'\bhtml\b'):
2639            parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
2640        parser.feed(self.sample1)
2641        self._check_sample_element(parser.close())
2642
2643        # Now as keyword args.
2644        parser2 = ET.XMLParser(encoding='utf-8',
2645                               target=ET.TreeBuilder())
2646        parser2.feed(self.sample1)
2647        self._check_sample_element(parser2.close())
2648
2649    def test_subclass(self):
2650        class MyParser(ET.XMLParser):
2651            pass
2652        parser = MyParser()
2653        parser.feed(self.sample1)
2654        self._check_sample_element(parser.close())
2655
2656    def test_doctype_warning(self):
2657        parser = ET.XMLParser()
2658        with self.assertWarns(DeprecationWarning):
2659            parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2660                'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
2661        parser.feed('<html/>')
2662        parser.close()
2663
2664        with warnings.catch_warnings():
2665            warnings.simplefilter('error', DeprecationWarning)
2666            parser = ET.XMLParser()
2667            parser.feed(self.sample2)
2668            parser.close()
2669
2670    def test_subclass_doctype(self):
2671        _doctype = None
2672        class MyParserWithDoctype(ET.XMLParser):
2673            def doctype(self, name, pubid, system):
2674                nonlocal _doctype
2675                _doctype = (name, pubid, system)
2676
2677        parser = MyParserWithDoctype()
2678        with self.assertWarns(DeprecationWarning):
2679            parser.feed(self.sample2)
2680        parser.close()
2681        self.assertEqual(_doctype,
2682            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2683             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2684
2685        _doctype = _doctype2 = None
2686        with warnings.catch_warnings():
2687            warnings.simplefilter('error', DeprecationWarning)
2688            class DoctypeParser:
2689                def doctype(self, name, pubid, system):
2690                    nonlocal _doctype2
2691                    _doctype2 = (name, pubid, system)
2692
2693            parser = MyParserWithDoctype(target=DoctypeParser())
2694            parser.feed(self.sample2)
2695            parser.close()
2696            self.assertIsNone(_doctype)
2697            self.assertEqual(_doctype2,
2698                ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2699                 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2700
2701    def test_inherited_doctype(self):
2702        '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
2703        with warnings.catch_warnings():
2704            warnings.simplefilter('error', DeprecationWarning)
2705            class MyParserWithoutDoctype(ET.XMLParser):
2706                pass
2707            parser = MyParserWithoutDoctype()
2708            parser.feed(self.sample2)
2709            parser.close()
2710
2711    def test_parse_string(self):
2712        parser = ET.XMLParser(target=ET.TreeBuilder())
2713        parser.feed(self.sample3)
2714        e = parser.close()
2715        self.assertEqual(e.tag, 'money')
2716        self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
2717        self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
2718
2719
2720class NamespaceParseTest(unittest.TestCase):
2721    def test_find_with_namespace(self):
2722        nsmap = {'h': 'hello', 'f': 'foo'}
2723        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
2724
2725        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
2726        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
2727        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
2728
2729
2730class ElementSlicingTest(unittest.TestCase):
2731    def _elem_tags(self, elemlist):
2732        return [e.tag for e in elemlist]
2733
2734    def _subelem_tags(self, elem):
2735        return self._elem_tags(list(elem))
2736
2737    def _make_elem_with_children(self, numchildren):
2738        """Create an Element with a tag 'a', with the given amount of children
2739           named 'a0', 'a1' ... and so on.
2740
2741        """
2742        e = ET.Element('a')
2743        for i in range(numchildren):
2744            ET.SubElement(e, 'a%s' % i)
2745        return e
2746
2747    def test_getslice_single_index(self):
2748        e = self._make_elem_with_children(10)
2749
2750        self.assertEqual(e[1].tag, 'a1')
2751        self.assertEqual(e[-2].tag, 'a8')
2752
2753        self.assertRaises(IndexError, lambda: e[12])
2754        self.assertRaises(IndexError, lambda: e[-12])
2755
2756    def test_getslice_range(self):
2757        e = self._make_elem_with_children(6)
2758
2759        self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
2760        self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
2761        self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
2762        self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
2763        self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
2764        self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
2765
2766    def test_getslice_steps(self):
2767        e = self._make_elem_with_children(10)
2768
2769        self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
2770        self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
2771        self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
2772        self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
2773        self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
2774        self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
2775
2776    def test_getslice_negative_steps(self):
2777        e = self._make_elem_with_children(4)
2778
2779        self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
2780        self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
2781        self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
2782        self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
2783        self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
2784
2785    def test_delslice(self):
2786        e = self._make_elem_with_children(4)
2787        del e[0:2]
2788        self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
2789
2790        e = self._make_elem_with_children(4)
2791        del e[0:]
2792        self.assertEqual(self._subelem_tags(e), [])
2793
2794        e = self._make_elem_with_children(4)
2795        del e[::-1]
2796        self.assertEqual(self._subelem_tags(e), [])
2797
2798        e = self._make_elem_with_children(4)
2799        del e[::-2]
2800        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2801
2802        e = self._make_elem_with_children(4)
2803        del e[1::2]
2804        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2805
2806        e = self._make_elem_with_children(2)
2807        del e[::2]
2808        self.assertEqual(self._subelem_tags(e), ['a1'])
2809
2810    def test_setslice_single_index(self):
2811        e = self._make_elem_with_children(4)
2812        e[1] = ET.Element('b')
2813        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2814
2815        e[-2] = ET.Element('c')
2816        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
2817
2818        with self.assertRaises(IndexError):
2819            e[5] = ET.Element('d')
2820        with self.assertRaises(IndexError):
2821            e[-5] = ET.Element('d')
2822        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
2823
2824    def test_setslice_range(self):
2825        e = self._make_elem_with_children(4)
2826        e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
2827        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
2828
2829        e = self._make_elem_with_children(4)
2830        e[1:3] = [ET.Element('b')]
2831        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
2832
2833        e = self._make_elem_with_children(4)
2834        e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
2835        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
2836
2837    def test_setslice_steps(self):
2838        e = self._make_elem_with_children(6)
2839        e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
2840        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
2841
2842        e = self._make_elem_with_children(6)
2843        with self.assertRaises(ValueError):
2844            e[1:5:2] = [ET.Element('b')]
2845        with self.assertRaises(ValueError):
2846            e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
2847        with self.assertRaises(ValueError):
2848            e[1:5:2] = []
2849        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
2850
2851        e = self._make_elem_with_children(4)
2852        e[1::sys.maxsize] = [ET.Element('b')]
2853        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2854        e[1::sys.maxsize<<64] = [ET.Element('c')]
2855        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
2856
2857    def test_setslice_negative_steps(self):
2858        e = self._make_elem_with_children(4)
2859        e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
2860        self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
2861
2862        e = self._make_elem_with_children(4)
2863        with self.assertRaises(ValueError):
2864            e[2:0:-1] = [ET.Element('b')]
2865        with self.assertRaises(ValueError):
2866            e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
2867        with self.assertRaises(ValueError):
2868            e[2:0:-1] = []
2869        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
2870
2871        e = self._make_elem_with_children(4)
2872        e[1::-sys.maxsize] = [ET.Element('b')]
2873        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2874        e[1::-sys.maxsize-1] = [ET.Element('c')]
2875        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
2876        e[1::-sys.maxsize<<64] = [ET.Element('d')]
2877        self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
2878
2879
2880class IOTest(unittest.TestCase):
2881    def test_encoding(self):
2882        # Test encoding issues.
2883        elem = ET.Element("tag")
2884        elem.text = "abc"
2885        self.assertEqual(serialize(elem), '<tag>abc</tag>')
2886        for enc in ("utf-8", "us-ascii"):
2887            with self.subTest(enc):
2888                self.assertEqual(serialize(elem, encoding=enc),
2889                        b'<tag>abc</tag>')
2890                self.assertEqual(serialize(elem, encoding=enc.upper()),
2891                        b'<tag>abc</tag>')
2892        for enc in ("iso-8859-1", "utf-16", "utf-32"):
2893            with self.subTest(enc):
2894                self.assertEqual(serialize(elem, encoding=enc),
2895                        ("<?xml version='1.0' encoding='%s'?>\n"
2896                         "<tag>abc</tag>" % enc).encode(enc))
2897                upper = enc.upper()
2898                self.assertEqual(serialize(elem, encoding=upper),
2899                        ("<?xml version='1.0' encoding='%s'?>\n"
2900                         "<tag>abc</tag>" % upper).encode(enc))
2901
2902        elem = ET.Element("tag")
2903        elem.text = "<&\"\'>"
2904        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
2905        self.assertEqual(serialize(elem, encoding="utf-8"),
2906                b'<tag>&lt;&amp;"\'&gt;</tag>')
2907        self.assertEqual(serialize(elem, encoding="us-ascii"),
2908                b'<tag>&lt;&amp;"\'&gt;</tag>')
2909        for enc in ("iso-8859-1", "utf-16", "utf-32"):
2910            self.assertEqual(serialize(elem, encoding=enc),
2911                    ("<?xml version='1.0' encoding='%s'?>\n"
2912                     "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
2913
2914        elem = ET.Element("tag")
2915        elem.attrib["key"] = "<&\"\'>"
2916        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
2917        self.assertEqual(serialize(elem, encoding="utf-8"),
2918                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2919        self.assertEqual(serialize(elem, encoding="us-ascii"),
2920                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2921        for enc in ("iso-8859-1", "utf-16", "utf-32"):
2922            self.assertEqual(serialize(elem, encoding=enc),
2923                    ("<?xml version='1.0' encoding='%s'?>\n"
2924                     "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
2925
2926        elem = ET.Element("tag")
2927        elem.text = '\xe5\xf6\xf6<>'
2928        self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
2929        self.assertEqual(serialize(elem, encoding="utf-8"),
2930                b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
2931        self.assertEqual(serialize(elem, encoding="us-ascii"),
2932                b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
2933        for enc in ("iso-8859-1", "utf-16", "utf-32"):
2934            self.assertEqual(serialize(elem, encoding=enc),
2935                    ("<?xml version='1.0' encoding='%s'?>\n"
2936                     "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
2937
2938        elem = ET.Element("tag")
2939        elem.attrib["key"] = '\xe5\xf6\xf6<>'
2940        self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
2941        self.assertEqual(serialize(elem, encoding="utf-8"),
2942                b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
2943        self.assertEqual(serialize(elem, encoding="us-ascii"),
2944                b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
2945        for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
2946            self.assertEqual(serialize(elem, encoding=enc),
2947                    ("<?xml version='1.0' encoding='%s'?>\n"
2948                     "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
2949
2950    def test_write_to_filename(self):
2951        self.addCleanup(support.unlink, TESTFN)
2952        tree = ET.ElementTree(ET.XML('''<site />'''))
2953        tree.write(TESTFN)
2954        with open(TESTFN, 'rb') as f:
2955            self.assertEqual(f.read(), b'''<site />''')
2956
2957    def test_write_to_text_file(self):
2958        self.addCleanup(support.unlink, TESTFN)
2959        tree = ET.ElementTree(ET.XML('''<site />'''))
2960        with open(TESTFN, 'w', encoding='utf-8') as f:
2961            tree.write(f, encoding='unicode')
2962            self.assertFalse(f.closed)
2963        with open(TESTFN, 'rb') as f:
2964            self.assertEqual(f.read(), b'''<site />''')
2965
2966    def test_write_to_binary_file(self):
2967        self.addCleanup(support.unlink, TESTFN)
2968        tree = ET.ElementTree(ET.XML('''<site />'''))
2969        with open(TESTFN, 'wb') as f:
2970            tree.write(f)
2971            self.assertFalse(f.closed)
2972        with open(TESTFN, 'rb') as f:
2973            self.assertEqual(f.read(), b'''<site />''')
2974
2975    def test_write_to_binary_file_with_bom(self):
2976        self.addCleanup(support.unlink, TESTFN)
2977        tree = ET.ElementTree(ET.XML('''<site />'''))
2978        # test BOM writing to buffered file
2979        with open(TESTFN, 'wb') as f:
2980            tree.write(f, encoding='utf-16')
2981            self.assertFalse(f.closed)
2982        with open(TESTFN, 'rb') as f:
2983            self.assertEqual(f.read(),
2984                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
2985                    '''<site />'''.encode("utf-16"))
2986        # test BOM writing to non-buffered file
2987        with open(TESTFN, 'wb', buffering=0) as f:
2988            tree.write(f, encoding='utf-16')
2989            self.assertFalse(f.closed)
2990        with open(TESTFN, 'rb') as f:
2991            self.assertEqual(f.read(),
2992                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
2993                    '''<site />'''.encode("utf-16"))
2994
2995    def test_read_from_stringio(self):
2996        tree = ET.ElementTree()
2997        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
2998        tree.parse(stream)
2999        self.assertEqual(tree.getroot().tag, 'site')
3000
3001    def test_write_to_stringio(self):
3002        tree = ET.ElementTree(ET.XML('''<site />'''))
3003        stream = io.StringIO()
3004        tree.write(stream, encoding='unicode')
3005        self.assertEqual(stream.getvalue(), '''<site />''')
3006
3007    def test_read_from_bytesio(self):
3008        tree = ET.ElementTree()
3009        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3010        tree.parse(raw)
3011        self.assertEqual(tree.getroot().tag, 'site')
3012
3013    def test_write_to_bytesio(self):
3014        tree = ET.ElementTree(ET.XML('''<site />'''))
3015        raw = io.BytesIO()
3016        tree.write(raw)
3017        self.assertEqual(raw.getvalue(), b'''<site />''')
3018
3019    class dummy:
3020        pass
3021
3022    def test_read_from_user_text_reader(self):
3023        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3024        reader = self.dummy()
3025        reader.read = stream.read
3026        tree = ET.ElementTree()
3027        tree.parse(reader)
3028        self.assertEqual(tree.getroot().tag, 'site')
3029
3030    def test_write_to_user_text_writer(self):
3031        tree = ET.ElementTree(ET.XML('''<site />'''))
3032        stream = io.StringIO()
3033        writer = self.dummy()
3034        writer.write = stream.write
3035        tree.write(writer, encoding='unicode')
3036        self.assertEqual(stream.getvalue(), '''<site />''')
3037
3038    def test_read_from_user_binary_reader(self):
3039        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3040        reader = self.dummy()
3041        reader.read = raw.read
3042        tree = ET.ElementTree()
3043        tree.parse(reader)
3044        self.assertEqual(tree.getroot().tag, 'site')
3045        tree = ET.ElementTree()
3046
3047    def test_write_to_user_binary_writer(self):
3048        tree = ET.ElementTree(ET.XML('''<site />'''))
3049        raw = io.BytesIO()
3050        writer = self.dummy()
3051        writer.write = raw.write
3052        tree.write(writer)
3053        self.assertEqual(raw.getvalue(), b'''<site />''')
3054
3055    def test_write_to_user_binary_writer_with_bom(self):
3056        tree = ET.ElementTree(ET.XML('''<site />'''))
3057        raw = io.BytesIO()
3058        writer = self.dummy()
3059        writer.write = raw.write
3060        writer.seekable = lambda: True
3061        writer.tell = raw.tell
3062        tree.write(writer, encoding="utf-16")
3063        self.assertEqual(raw.getvalue(),
3064                '''<?xml version='1.0' encoding='utf-16'?>\n'''
3065                '''<site />'''.encode("utf-16"))
3066
3067    def test_tostringlist_invariant(self):
3068        root = ET.fromstring('<tag>foo</tag>')
3069        self.assertEqual(
3070            ET.tostring(root, 'unicode'),
3071            ''.join(ET.tostringlist(root, 'unicode')))
3072        self.assertEqual(
3073            ET.tostring(root, 'utf-16'),
3074            b''.join(ET.tostringlist(root, 'utf-16')))
3075
3076    def test_short_empty_elements(self):
3077        root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
3078        self.assertEqual(
3079            ET.tostring(root, 'unicode'),
3080            '<tag>a<x />b<y />c</tag>')
3081        self.assertEqual(
3082            ET.tostring(root, 'unicode', short_empty_elements=True),
3083            '<tag>a<x />b<y />c</tag>')
3084        self.assertEqual(
3085            ET.tostring(root, 'unicode', short_empty_elements=False),
3086            '<tag>a<x></x>b<y></y>c</tag>')
3087
3088
3089class ParseErrorTest(unittest.TestCase):
3090    def test_subclass(self):
3091        self.assertIsInstance(ET.ParseError(), SyntaxError)
3092
3093    def _get_error(self, s):
3094        try:
3095            ET.fromstring(s)
3096        except ET.ParseError as e:
3097            return e
3098
3099    def test_error_position(self):
3100        self.assertEqual(self._get_error('foo').position, (1, 0))
3101        self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
3102        self.assertEqual(self._get_error('foobar<').position, (1, 6))
3103
3104    def test_error_code(self):
3105        import xml.parsers.expat.errors as ERRORS
3106        self.assertEqual(self._get_error('foo').code,
3107                ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
3108
3109
3110class KeywordArgsTest(unittest.TestCase):
3111    # Test various issues with keyword arguments passed to ET.Element
3112    # constructor and methods
3113    def test_issue14818(self):
3114        x = ET.XML("<a>foo</a>")
3115        self.assertEqual(x.find('a', None),
3116                         x.find(path='a', namespaces=None))
3117        self.assertEqual(x.findtext('a', None, None),
3118                         x.findtext(path='a', default=None, namespaces=None))
3119        self.assertEqual(x.findall('a', None),
3120                         x.findall(path='a', namespaces=None))
3121        self.assertEqual(list(x.iterfind('a', None)),
3122                         list(x.iterfind(path='a', namespaces=None)))
3123
3124        self.assertEqual(ET.Element('a').attrib, {})
3125        elements = [
3126            ET.Element('a', dict(href="#", id="foo")),
3127            ET.Element('a', attrib=dict(href="#", id="foo")),
3128            ET.Element('a', dict(href="#"), id="foo"),
3129            ET.Element('a', href="#", id="foo"),
3130            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
3131        ]
3132        for e in elements:
3133            self.assertEqual(e.tag, 'a')
3134            self.assertEqual(e.attrib, dict(href="#", id="foo"))
3135
3136        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
3137        self.assertEqual(e2.attrib['key1'], 'value1')
3138
3139        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3140            ET.Element('a', "I'm not a dict")
3141        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3142            ET.Element('a', attrib="I'm not a dict")
3143
3144# --------------------------------------------------------------------
3145
3146class NoAcceleratorTest(unittest.TestCase):
3147    def setUp(self):
3148        if not pyET:
3149            raise unittest.SkipTest('only for the Python version')
3150
3151    # Test that the C accelerator was not imported for pyET
3152    def test_correct_import_pyET(self):
3153        # The type of methods defined in Python code is types.FunctionType,
3154        # while the type of methods defined inside _elementtree is
3155        # <class 'wrapper_descriptor'>
3156        self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
3157        self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
3158
3159# --------------------------------------------------------------------
3160
3161
3162def test_main(module=None):
3163    # When invoked without a module, runs the Python ET tests by loading pyET.
3164    # Otherwise, uses the given module as the ET.
3165    global pyET
3166    pyET = import_fresh_module('xml.etree.ElementTree',
3167                               blocked=['_elementtree'])
3168    if module is None:
3169        module = pyET
3170
3171    global ET
3172    ET = module
3173
3174    test_classes = [
3175        ModuleTest,
3176        ElementSlicingTest,
3177        BasicElementTest,
3178        BadElementTest,
3179        BadElementPathTest,
3180        ElementTreeTest,
3181        IOTest,
3182        ParseErrorTest,
3183        XIncludeTest,
3184        ElementTreeTypeTest,
3185        ElementFindTest,
3186        ElementIterTest,
3187        TreeBuilderTest,
3188        XMLParserTest,
3189        XMLPullParserTest,
3190        BugsTest,
3191        ]
3192
3193    # These tests will only run for the pure-Python version that doesn't import
3194    # _elementtree. We can't use skipUnless here, because pyET is filled in only
3195    # after the module is loaded.
3196    if pyET is not ET:
3197        test_classes.extend([
3198            NoAcceleratorTest,
3199            ])
3200
3201    # Provide default namespace mapping and path cache.
3202    from xml.etree import ElementPath
3203    nsmap = ET.register_namespace._namespace_map
3204    # Copy the default namespace mapping
3205    nsmap_copy = nsmap.copy()
3206    # Copy the path cache (should be empty)
3207    path_cache = ElementPath._cache
3208    ElementPath._cache = path_cache.copy()
3209    try:
3210        support.run_unittest(*test_classes)
3211    finally:
3212        from xml.etree import ElementPath
3213        # Restore mapping and path cache
3214        nsmap.clear()
3215        nsmap.update(nsmap_copy)
3216        ElementPath._cache = path_cache
3217        # don't interfere with subsequent tests
3218        ET = pyET = None
3219
3220
3221if __name__ == '__main__':
3222    test_main()
3223