• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
4#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
7
8import copy
9import functools
10import html
11import io
12import itertools
13import locale
14import operator
15import os
16import pickle
17import sys
18import textwrap
19import types
20import unittest
21import warnings
22import weakref
23
24from functools import partial
25from itertools import product, islice
26from test import support
27from test.support import os_helper
28from test.support import warnings_helper
29from test.support import findfile, gc_collect, swap_attr, swap_item
30from test.support.import_helper import import_fresh_module
31from test.support.os_helper import TESTFN
32
33
34# pyET is the pure-Python implementation.
35#
36# ET is pyET in test_xml_etree and is the C accelerated version in
37# test_xml_etree_c.
38pyET = None
39ET = None
40
41SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
42try:
43    SIMPLE_XMLFILE.encode("utf-8")
44except UnicodeEncodeError:
45    raise unittest.SkipTest("filename is not encodable to utf8")
46SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
47UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
48
49SAMPLE_XML = """\
50<body>
51  <tag class='a'>text</tag>
52  <tag class='b' />
53  <section>
54    <tag class='b' id='inner'>subtext</tag>
55  </section>
56</body>
57"""
58
59SAMPLE_SECTION = """\
60<section>
61  <tag class='b' id='inner'>subtext</tag>
62  <nexttag />
63  <nextsection>
64    <tag />
65  </nextsection>
66</section>
67"""
68
69SAMPLE_XML_NS = """
70<body xmlns="http://effbot.org/ns">
71  <tag>text</tag>
72  <tag />
73  <section>
74    <tag>subtext</tag>
75  </section>
76</body>
77"""
78
79SAMPLE_XML_NS_ELEMS = """
80<root>
81<h:table xmlns:h="hello">
82  <h:tr>
83    <h:td>Apples</h:td>
84    <h:td>Bananas</h:td>
85  </h:tr>
86</h:table>
87
88<f:table xmlns:f="foo">
89  <f:name>African Coffee Table</f:name>
90  <f:width>80</f:width>
91  <f:length>120</f:length>
92</f:table>
93</root>
94"""
95
96ENTITY_XML = """\
97<!DOCTYPE points [
98<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
99%user-entities;
100]>
101<document>&entity;</document>
102"""
103
104EXTERNAL_ENTITY_XML = """\
105<!DOCTYPE points [
106<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
107]>
108<document>&entity;</document>
109"""
110
111ATTLIST_XML = """\
112<?xml version="1.0" encoding="UTF-8"?>
113<!DOCTYPE Foo [
114<!ELEMENT foo (bar*)>
115<!ELEMENT bar (#PCDATA)*>
116<!ATTLIST bar xml:lang CDATA "eng">
117<!ENTITY qux "quux">
118]>
119<foo>
120<bar>&qux;</bar>
121</foo>
122"""
123
124def checkwarnings(*filters, quiet=False):
125    def decorator(test):
126        def newtest(*args, **kwargs):
127            with warnings_helper.check_warnings(*filters, quiet=quiet):
128                test(*args, **kwargs)
129        functools.update_wrapper(newtest, test)
130        return newtest
131    return decorator
132
133
134class ModuleTest(unittest.TestCase):
135    def test_sanity(self):
136        # Import sanity.
137
138        from xml.etree import ElementTree
139        from xml.etree import ElementInclude
140        from xml.etree import ElementPath
141
142    def test_all(self):
143        names = ("xml.etree.ElementTree", "_elementtree")
144        support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",))
145
146
147def serialize(elem, to_string=True, encoding='unicode', **options):
148    if encoding != 'unicode':
149        file = io.BytesIO()
150    else:
151        file = io.StringIO()
152    tree = ET.ElementTree(elem)
153    tree.write(file, encoding=encoding, **options)
154    if to_string:
155        return file.getvalue()
156    else:
157        file.seek(0)
158        return file
159
160def summarize_list(seq):
161    return [elem.tag for elem in seq]
162
163
164class ElementTestCase:
165    @classmethod
166    def setUpClass(cls):
167        cls.modules = {pyET, ET}
168
169    def pickleRoundTrip(self, obj, name, dumper, loader, proto):
170        try:
171            with swap_item(sys.modules, name, dumper):
172                temp = pickle.dumps(obj, proto)
173            with swap_item(sys.modules, name, loader):
174                result = pickle.loads(temp)
175        except pickle.PicklingError as pe:
176            # pyET must be second, because pyET may be (equal to) ET.
177            human = dict([(ET, "cET"), (pyET, "pyET")])
178            raise support.TestFailed("Failed to round-trip %r from %r to %r"
179                                     % (obj,
180                                        human.get(dumper, dumper),
181                                        human.get(loader, loader))) from pe
182        return result
183
184    def assertEqualElements(self, alice, bob):
185        self.assertIsInstance(alice, (ET.Element, pyET.Element))
186        self.assertIsInstance(bob, (ET.Element, pyET.Element))
187        self.assertEqual(len(list(alice)), len(list(bob)))
188        for x, y in zip(alice, bob):
189            self.assertEqualElements(x, y)
190        properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
191        self.assertEqual(properties(alice), properties(bob))
192
193# --------------------------------------------------------------------
194# element tree tests
195
196class ElementTreeTest(unittest.TestCase):
197
198    def serialize_check(self, elem, expected):
199        self.assertEqual(serialize(elem), expected)
200
201    def test_interface(self):
202        # Test element tree interface.
203
204        def check_string(string):
205            len(string)
206            for char in string:
207                self.assertEqual(len(char), 1,
208                        msg="expected one-character string, got %r" % char)
209            new_string = string + ""
210            new_string = string + " "
211            string[:0]
212
213        def check_mapping(mapping):
214            len(mapping)
215            keys = mapping.keys()
216            items = mapping.items()
217            for key in keys:
218                item = mapping[key]
219            mapping["key"] = "value"
220            self.assertEqual(mapping["key"], "value",
221                    msg="expected value string, got %r" % mapping["key"])
222
223        def check_element(element):
224            self.assertTrue(ET.iselement(element), msg="not an element")
225            direlem = dir(element)
226            for attr in 'tag', 'attrib', 'text', 'tail':
227                self.assertTrue(hasattr(element, attr),
228                        msg='no %s member' % attr)
229                self.assertIn(attr, direlem,
230                        msg='no %s visible by dir' % attr)
231
232            check_string(element.tag)
233            check_mapping(element.attrib)
234            if element.text is not None:
235                check_string(element.text)
236            if element.tail is not None:
237                check_string(element.tail)
238            for elem in element:
239                check_element(elem)
240
241        element = ET.Element("tag")
242        check_element(element)
243        tree = ET.ElementTree(element)
244        check_element(tree.getroot())
245        element = ET.Element("t\xe4g", key="value")
246        tree = ET.ElementTree(element)
247        self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
248        element = ET.Element("tag", key="value")
249
250        # Make sure all standard element methods exist.
251
252        def check_method(method):
253            self.assertTrue(hasattr(method, '__call__'),
254                    msg="%s not callable" % method)
255
256        check_method(element.append)
257        check_method(element.extend)
258        check_method(element.insert)
259        check_method(element.remove)
260        check_method(element.find)
261        check_method(element.iterfind)
262        check_method(element.findall)
263        check_method(element.findtext)
264        check_method(element.clear)
265        check_method(element.get)
266        check_method(element.set)
267        check_method(element.keys)
268        check_method(element.items)
269        check_method(element.iter)
270        check_method(element.itertext)
271
272        # These methods return an iterable. See bug 6472.
273
274        def check_iter(it):
275            check_method(it.__next__)
276
277        check_iter(element.iterfind("tag"))
278        check_iter(element.iterfind("*"))
279        check_iter(tree.iterfind("tag"))
280        check_iter(tree.iterfind("*"))
281
282        # These aliases are provided:
283
284        self.assertEqual(ET.XML, ET.fromstring)
285        self.assertEqual(ET.PI, ET.ProcessingInstruction)
286
287    def test_set_attribute(self):
288        element = ET.Element('tag')
289
290        self.assertEqual(element.tag, 'tag')
291        element.tag = 'Tag'
292        self.assertEqual(element.tag, 'Tag')
293        element.tag = 'TAG'
294        self.assertEqual(element.tag, 'TAG')
295
296        self.assertIsNone(element.text)
297        element.text = 'Text'
298        self.assertEqual(element.text, 'Text')
299        element.text = 'TEXT'
300        self.assertEqual(element.text, 'TEXT')
301
302        self.assertIsNone(element.tail)
303        element.tail = 'Tail'
304        self.assertEqual(element.tail, 'Tail')
305        element.tail = 'TAIL'
306        self.assertEqual(element.tail, 'TAIL')
307
308        self.assertEqual(element.attrib, {})
309        element.attrib = {'a': 'b', 'c': 'd'}
310        self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
311        element.attrib = {'A': 'B', 'C': 'D'}
312        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
313
314    def test_simpleops(self):
315        # Basic method sanity checks.
316
317        elem = ET.XML("<body><tag/></body>")
318        self.serialize_check(elem, '<body><tag /></body>')
319        e = ET.Element("tag2")
320        elem.append(e)
321        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
322        elem.remove(e)
323        self.serialize_check(elem, '<body><tag /></body>')
324        elem.insert(0, e)
325        self.serialize_check(elem, '<body><tag2 /><tag /></body>')
326        elem.remove(e)
327        elem.extend([e])
328        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
329        elem.remove(e)
330        elem.extend(iter([e]))
331        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
332        elem.remove(e)
333
334        element = ET.Element("tag", key="value")
335        self.serialize_check(element, '<tag key="value" />') # 1
336        subelement = ET.Element("subtag")
337        element.append(subelement)
338        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
339        element.insert(0, subelement)
340        self.serialize_check(element,
341                '<tag key="value"><subtag /><subtag /></tag>') # 3
342        element.remove(subelement)
343        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
344        element.remove(subelement)
345        self.serialize_check(element, '<tag key="value" />') # 5
346        with self.assertRaises(ValueError) as cm:
347            element.remove(subelement)
348        self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
349        self.serialize_check(element, '<tag key="value" />') # 6
350        element[0:0] = [subelement, subelement, subelement]
351        self.serialize_check(element[1], '<subtag />')
352        self.assertEqual(element[1:9], [element[1], element[2]])
353        self.assertEqual(element[:9:2], [element[0], element[2]])
354        del element[1:2]
355        self.serialize_check(element,
356                '<tag key="value"><subtag /><subtag /></tag>')
357
358    def test_cdata(self):
359        # Test CDATA handling (etc).
360
361        self.serialize_check(ET.XML("<tag>hello</tag>"),
362                '<tag>hello</tag>')
363        self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
364                '<tag>hello</tag>')
365        self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
366                '<tag>hello</tag>')
367
368    def test_file_init(self):
369        stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
370        tree = ET.ElementTree(file=stringfile)
371        self.assertEqual(tree.find("tag").tag, 'tag')
372        self.assertEqual(tree.find("section/tag").tag, 'tag')
373
374        tree = ET.ElementTree(file=SIMPLE_XMLFILE)
375        self.assertEqual(tree.find("element").tag, 'element')
376        self.assertEqual(tree.find("element/../empty-element").tag,
377                'empty-element')
378
379    def test_path_cache(self):
380        # Check that the path cache behaves sanely.
381
382        from xml.etree import ElementPath
383
384        elem = ET.XML(SAMPLE_XML)
385        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
386        cache_len_10 = len(ElementPath._cache)
387        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
388        self.assertEqual(len(ElementPath._cache), cache_len_10)
389        for i in range(20): ET.ElementTree(elem).find('./'+str(i))
390        self.assertGreater(len(ElementPath._cache), cache_len_10)
391        for i in range(600): ET.ElementTree(elem).find('./'+str(i))
392        self.assertLess(len(ElementPath._cache), 500)
393
394    def test_copy(self):
395        # Test copy handling (etc).
396
397        import copy
398        e1 = ET.XML("<tag>hello<foo/></tag>")
399        e2 = copy.copy(e1)
400        e3 = copy.deepcopy(e1)
401        e1.find("foo").tag = "bar"
402        self.serialize_check(e1, '<tag>hello<bar /></tag>')
403        self.serialize_check(e2, '<tag>hello<bar /></tag>')
404        self.serialize_check(e3, '<tag>hello<foo /></tag>')
405
406    def test_attrib(self):
407        # Test attribute handling.
408
409        elem = ET.Element("tag")
410        elem.get("key") # 1.1
411        self.assertEqual(elem.get("key", "default"), 'default') # 1.2
412
413        elem.set("key", "value")
414        self.assertEqual(elem.get("key"), 'value') # 1.3
415
416        elem = ET.Element("tag", key="value")
417        self.assertEqual(elem.get("key"), 'value') # 2.1
418        self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
419
420        attrib = {"key": "value"}
421        elem = ET.Element("tag", attrib)
422        attrib.clear() # check for aliasing issues
423        self.assertEqual(elem.get("key"), 'value') # 3.1
424        self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
425
426        attrib = {"key": "value"}
427        elem = ET.Element("tag", **attrib)
428        attrib.clear() # check for aliasing issues
429        self.assertEqual(elem.get("key"), 'value') # 4.1
430        self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
431
432        elem = ET.Element("tag", {"key": "other"}, key="value")
433        self.assertEqual(elem.get("key"), 'value') # 5.1
434        self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
435
436        elem = ET.Element('test')
437        elem.text = "aa"
438        elem.set('testa', 'testval')
439        elem.set('testb', 'test2')
440        self.assertEqual(ET.tostring(elem),
441                b'<test testa="testval" testb="test2">aa</test>')
442        self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
443        self.assertEqual(sorted(elem.items()),
444                [('testa', 'testval'), ('testb', 'test2')])
445        self.assertEqual(elem.attrib['testb'], 'test2')
446        elem.attrib['testb'] = 'test1'
447        elem.attrib['testc'] = 'test2'
448        self.assertEqual(ET.tostring(elem),
449                b'<test testa="testval" testb="test1" testc="test2">aa</test>')
450
451        # Test preserving white space chars in attributes
452        elem = ET.Element('test')
453        elem.set('a', '\r')
454        elem.set('b', '\r\n')
455        elem.set('c', '\t\n\r ')
456        elem.set('d', '\n\n\r\r\t\t  ')
457        self.assertEqual(ET.tostring(elem),
458                b'<test a="&#13;" b="&#13;&#10;" c="&#09;&#10;&#13; " d="&#10;&#10;&#13;&#13;&#09;&#09;  " />')
459
460    def test_makeelement(self):
461        # Test makeelement handling.
462
463        elem = ET.Element("tag")
464        attrib = {"key": "value"}
465        subelem = elem.makeelement("subtag", attrib)
466        self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
467        elem.append(subelem)
468        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
469
470        elem.clear()
471        self.serialize_check(elem, '<tag />')
472        elem.append(subelem)
473        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
474        elem.extend([subelem, subelem])
475        self.serialize_check(elem,
476            '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
477        elem[:] = [subelem]
478        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
479        elem[:] = tuple([subelem])
480        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
481
482    def test_parsefile(self):
483        # Test parsing from file.
484
485        tree = ET.parse(SIMPLE_XMLFILE)
486        stream = io.StringIO()
487        tree.write(stream, encoding='unicode')
488        self.assertEqual(stream.getvalue(),
489                '<root>\n'
490                '   <element key="value">text</element>\n'
491                '   <element>text</element>tail\n'
492                '   <empty-element />\n'
493                '</root>')
494        tree = ET.parse(SIMPLE_NS_XMLFILE)
495        stream = io.StringIO()
496        tree.write(stream, encoding='unicode')
497        self.assertEqual(stream.getvalue(),
498                '<ns0:root xmlns:ns0="namespace">\n'
499                '   <ns0:element key="value">text</ns0:element>\n'
500                '   <ns0:element>text</ns0:element>tail\n'
501                '   <ns0:empty-element />\n'
502                '</ns0:root>')
503
504        with open(SIMPLE_XMLFILE) as f:
505            data = f.read()
506
507        parser = ET.XMLParser()
508        self.assertRegex(parser.version, r'^Expat ')
509        parser.feed(data)
510        self.serialize_check(parser.close(),
511                '<root>\n'
512                '   <element key="value">text</element>\n'
513                '   <element>text</element>tail\n'
514                '   <empty-element />\n'
515                '</root>')
516
517        target = ET.TreeBuilder()
518        parser = ET.XMLParser(target=target)
519        parser.feed(data)
520        self.serialize_check(parser.close(),
521                '<root>\n'
522                '   <element key="value">text</element>\n'
523                '   <element>text</element>tail\n'
524                '   <empty-element />\n'
525                '</root>')
526
527    def test_parseliteral(self):
528        element = ET.XML("<html><body>text</body></html>")
529        self.assertEqual(ET.tostring(element, encoding='unicode'),
530                '<html><body>text</body></html>')
531        element = ET.fromstring("<html><body>text</body></html>")
532        self.assertEqual(ET.tostring(element, encoding='unicode'),
533                '<html><body>text</body></html>')
534        sequence = ["<html><body>", "text</bo", "dy></html>"]
535        element = ET.fromstringlist(sequence)
536        self.assertEqual(ET.tostring(element),
537                b'<html><body>text</body></html>')
538        self.assertEqual(b"".join(ET.tostringlist(element)),
539                b'<html><body>text</body></html>')
540        self.assertEqual(ET.tostring(element, "ascii"),
541                b"<?xml version='1.0' encoding='ascii'?>\n"
542                b"<html><body>text</body></html>")
543        _, ids = ET.XMLID("<html><body>text</body></html>")
544        self.assertEqual(len(ids), 0)
545        _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
546        self.assertEqual(len(ids), 1)
547        self.assertEqual(ids["body"].tag, 'body')
548
549    def test_iterparse(self):
550        # Test iterparse interface.
551
552        iterparse = ET.iterparse
553
554        context = iterparse(SIMPLE_XMLFILE)
555        action, elem = next(context)
556        self.assertEqual((action, elem.tag), ('end', 'element'))
557        self.assertEqual([(action, elem.tag) for action, elem in context], [
558                ('end', 'element'),
559                ('end', 'empty-element'),
560                ('end', 'root'),
561            ])
562        self.assertEqual(context.root.tag, 'root')
563
564        context = iterparse(SIMPLE_NS_XMLFILE)
565        self.assertEqual([(action, elem.tag) for action, elem in context], [
566                ('end', '{namespace}element'),
567                ('end', '{namespace}element'),
568                ('end', '{namespace}empty-element'),
569                ('end', '{namespace}root'),
570            ])
571
572        events = ()
573        context = iterparse(SIMPLE_XMLFILE, events)
574        self.assertEqual([(action, elem.tag) for action, elem in context], [])
575
576        events = ()
577        context = iterparse(SIMPLE_XMLFILE, events=events)
578        self.assertEqual([(action, elem.tag) for action, elem in context], [])
579
580        events = ("start", "end")
581        context = iterparse(SIMPLE_XMLFILE, events)
582        self.assertEqual([(action, elem.tag) for action, elem in context], [
583                ('start', 'root'),
584                ('start', 'element'),
585                ('end', 'element'),
586                ('start', 'element'),
587                ('end', 'element'),
588                ('start', 'empty-element'),
589                ('end', 'empty-element'),
590                ('end', 'root'),
591            ])
592
593        events = ("start", "end", "start-ns", "end-ns")
594        context = iterparse(SIMPLE_NS_XMLFILE, events)
595        self.assertEqual([(action, elem.tag) if action in ("start", "end")
596                                             else (action, elem)
597                          for action, elem in context], [
598                ('start-ns', ('', 'namespace')),
599                ('start', '{namespace}root'),
600                ('start', '{namespace}element'),
601                ('end', '{namespace}element'),
602                ('start', '{namespace}element'),
603                ('end', '{namespace}element'),
604                ('start', '{namespace}empty-element'),
605                ('end', '{namespace}empty-element'),
606                ('end', '{namespace}root'),
607                ('end-ns', None),
608            ])
609
610        events = ('start-ns', 'end-ns')
611        context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
612        res = [action for action, elem in context]
613        self.assertEqual(res, ['start-ns', 'end-ns'])
614
615        events = ("start", "end", "bogus")
616        with open(SIMPLE_XMLFILE, "rb") as f:
617            with self.assertRaises(ValueError) as cm:
618                iterparse(f, events)
619            self.assertFalse(f.closed)
620        self.assertEqual(str(cm.exception), "unknown event 'bogus'")
621
622        with warnings_helper.check_no_resource_warning(self):
623            with self.assertRaises(ValueError) as cm:
624                iterparse(SIMPLE_XMLFILE, events)
625            self.assertEqual(str(cm.exception), "unknown event 'bogus'")
626            del cm
627
628        source = io.BytesIO(
629            b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
630            b"<body xmlns='http://&#233;ffbot.org/ns'\n"
631            b"      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
632        events = ("start-ns",)
633        context = iterparse(source, events)
634        self.assertEqual([(action, elem) for action, elem in context], [
635                ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
636                ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
637            ])
638
639        source = io.StringIO("<document />junk")
640        it = iterparse(source)
641        action, elem = next(it)
642        self.assertEqual((action, elem.tag), ('end', 'document'))
643        with self.assertRaises(ET.ParseError) as cm:
644            next(it)
645        self.assertEqual(str(cm.exception),
646                'junk after document element: line 1, column 12')
647
648        self.addCleanup(os_helper.unlink, TESTFN)
649        with open(TESTFN, "wb") as f:
650            f.write(b"<document />junk")
651        it = iterparse(TESTFN)
652        action, elem = next(it)
653        self.assertEqual((action, elem.tag), ('end', 'document'))
654        with warnings_helper.check_no_resource_warning(self):
655            with self.assertRaises(ET.ParseError) as cm:
656                next(it)
657            self.assertEqual(str(cm.exception),
658                    'junk after document element: line 1, column 12')
659            del cm, it
660
661    def test_writefile(self):
662        elem = ET.Element("tag")
663        elem.text = "text"
664        self.serialize_check(elem, '<tag>text</tag>')
665        ET.SubElement(elem, "subtag").text = "subtext"
666        self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
667
668        # Test tag suppression
669        elem.tag = None
670        self.serialize_check(elem, 'text<subtag>subtext</subtag>')
671        elem.insert(0, ET.Comment("comment"))
672        self.serialize_check(elem,
673                'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
674
675        elem[0] = ET.PI("key", "value")
676        self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
677
678    def test_custom_builder(self):
679        # Test parser w. custom builder.
680
681        with open(SIMPLE_XMLFILE) as f:
682            data = f.read()
683        class Builder(list):
684            def start(self, tag, attrib):
685                self.append(("start", tag))
686            def end(self, tag):
687                self.append(("end", tag))
688            def data(self, text):
689                pass
690        builder = Builder()
691        parser = ET.XMLParser(target=builder)
692        parser.feed(data)
693        self.assertEqual(builder, [
694                ('start', 'root'),
695                ('start', 'element'),
696                ('end', 'element'),
697                ('start', 'element'),
698                ('end', 'element'),
699                ('start', 'empty-element'),
700                ('end', 'empty-element'),
701                ('end', 'root'),
702            ])
703
704        with open(SIMPLE_NS_XMLFILE) as f:
705            data = f.read()
706        class Builder(list):
707            def start(self, tag, attrib):
708                self.append(("start", tag))
709            def end(self, tag):
710                self.append(("end", tag))
711            def data(self, text):
712                pass
713            def pi(self, target, data):
714                self.append(("pi", target, data))
715            def comment(self, data):
716                self.append(("comment", data))
717            def start_ns(self, prefix, uri):
718                self.append(("start-ns", prefix, uri))
719            def end_ns(self, prefix):
720                self.append(("end-ns", prefix))
721        builder = Builder()
722        parser = ET.XMLParser(target=builder)
723        parser.feed(data)
724        self.assertEqual(builder, [
725                ('pi', 'pi', 'data'),
726                ('comment', ' comment '),
727                ('start-ns', '', 'namespace'),
728                ('start', '{namespace}root'),
729                ('start', '{namespace}element'),
730                ('end', '{namespace}element'),
731                ('start', '{namespace}element'),
732                ('end', '{namespace}element'),
733                ('start', '{namespace}empty-element'),
734                ('end', '{namespace}empty-element'),
735                ('end', '{namespace}root'),
736                ('end-ns', ''),
737            ])
738
739    def test_custom_builder_only_end_ns(self):
740        class Builder(list):
741            def end_ns(self, prefix):
742                self.append(("end-ns", prefix))
743
744        builder = Builder()
745        parser = ET.XMLParser(target=builder)
746        parser.feed(textwrap.dedent("""\
747            <?pi data?>
748            <!-- comment -->
749            <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
750               <a:element key='value'>text</a:element>
751               <p:element>text</p:element>tail
752               <empty-element/>
753            </root>
754            """))
755        self.assertEqual(builder, [
756                ('end-ns', 'a'),
757                ('end-ns', 'p'),
758                ('end-ns', ''),
759            ])
760
761    def test_children(self):
762        # Test Element children iteration
763
764        with open(SIMPLE_XMLFILE, "rb") as f:
765            tree = ET.parse(f)
766        self.assertEqual([summarize_list(elem)
767                          for elem in tree.getroot().iter()], [
768                ['element', 'element', 'empty-element'],
769                [],
770                [],
771                [],
772            ])
773        self.assertEqual([summarize_list(elem)
774                          for elem in tree.iter()], [
775                ['element', 'element', 'empty-element'],
776                [],
777                [],
778                [],
779            ])
780
781        elem = ET.XML(SAMPLE_XML)
782        self.assertEqual(len(list(elem)), 3)
783        self.assertEqual(len(list(elem[2])), 1)
784        self.assertEqual(elem[:], list(elem))
785        child1 = elem[0]
786        child2 = elem[2]
787        del elem[1:2]
788        self.assertEqual(len(list(elem)), 2)
789        self.assertEqual(child1, elem[0])
790        self.assertEqual(child2, elem[1])
791        elem[0:2] = [child2, child1]
792        self.assertEqual(child2, elem[0])
793        self.assertEqual(child1, elem[1])
794        self.assertNotEqual(child1, elem[0])
795        elem.clear()
796        self.assertEqual(list(elem), [])
797
798    def test_writestring(self):
799        elem = ET.XML("<html><body>text</body></html>")
800        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
801        elem = ET.fromstring("<html><body>text</body></html>")
802        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
803
804    def test_indent(self):
805        elem = ET.XML("<root></root>")
806        ET.indent(elem)
807        self.assertEqual(ET.tostring(elem), b'<root />')
808
809        elem = ET.XML("<html><body>text</body></html>")
810        ET.indent(elem)
811        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
812
813        elem = ET.XML("<html> <body>text</body>  </html>")
814        ET.indent(elem)
815        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
816
817        elem = ET.XML("<html><body>text</body>tail</html>")
818        ET.indent(elem)
819        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>tail</html>')
820
821        elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
822        ET.indent(elem)
823        self.assertEqual(
824            ET.tostring(elem),
825            b'<html>\n'
826            b'  <body>\n'
827            b'    <p>par</p>\n'
828            b'    <p>text</p>\n'
829            b'    <p>\n'
830            b'      <br />\n'
831            b'    </p>\n'
832            b'  </body>\n'
833            b'</html>'
834        )
835
836        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
837        ET.indent(elem)
838        self.assertEqual(
839            ET.tostring(elem),
840            b'<html>\n'
841            b'  <body>\n'
842            b'    <p>pre<br />post</p>\n'
843            b'    <p>text</p>\n'
844            b'  </body>\n'
845            b'</html>'
846        )
847
848    def test_indent_space(self):
849        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
850        ET.indent(elem, space='\t')
851        self.assertEqual(
852            ET.tostring(elem),
853            b'<html>\n'
854            b'\t<body>\n'
855            b'\t\t<p>pre<br />post</p>\n'
856            b'\t\t<p>text</p>\n'
857            b'\t</body>\n'
858            b'</html>'
859        )
860
861        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
862        ET.indent(elem, space='')
863        self.assertEqual(
864            ET.tostring(elem),
865            b'<html>\n'
866            b'<body>\n'
867            b'<p>pre<br />post</p>\n'
868            b'<p>text</p>\n'
869            b'</body>\n'
870            b'</html>'
871        )
872
873    def test_indent_space_caching(self):
874        elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
875        ET.indent(elem)
876        self.assertEqual(
877            {el.tail for el in elem.iter()},
878            {None, "\n", "\n  ", "\n    "}
879        )
880        self.assertEqual(
881            {el.text for el in elem.iter()},
882            {None, "\n  ", "\n    ", "\n      ", "par", "text"}
883        )
884        self.assertEqual(
885            len({el.tail for el in elem.iter()}),
886            len({id(el.tail) for el in elem.iter()}),
887        )
888
889    def test_indent_level(self):
890        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
891        with self.assertRaises(ValueError):
892            ET.indent(elem, level=-1)
893        self.assertEqual(
894            ET.tostring(elem),
895            b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
896        )
897
898        ET.indent(elem, level=2)
899        self.assertEqual(
900            ET.tostring(elem),
901            b'<html>\n'
902            b'      <body>\n'
903            b'        <p>pre<br />post</p>\n'
904            b'        <p>text</p>\n'
905            b'      </body>\n'
906            b'    </html>'
907        )
908
909        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
910        ET.indent(elem, level=1, space=' ')
911        self.assertEqual(
912            ET.tostring(elem),
913            b'<html>\n'
914            b'  <body>\n'
915            b'   <p>pre<br />post</p>\n'
916            b'   <p>text</p>\n'
917            b'  </body>\n'
918            b' </html>'
919        )
920
921    def test_tostring_default_namespace(self):
922        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
923        self.assertEqual(
924            ET.tostring(elem, encoding='unicode'),
925            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
926        )
927        self.assertEqual(
928            ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
929            '<body xmlns="http://effbot.org/ns"><tag /></body>'
930        )
931
932    def test_tostring_default_namespace_different_namespace(self):
933        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
934        self.assertEqual(
935            ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
936            '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
937        )
938
939    def test_tostring_default_namespace_original_no_namespace(self):
940        elem = ET.XML('<body><tag/></body>')
941        EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
942        with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
943            ET.tostring(elem, encoding='unicode', default_namespace='foobar')
944
945    def test_tostring_no_xml_declaration(self):
946        elem = ET.XML('<body><tag/></body>')
947        self.assertEqual(
948            ET.tostring(elem, encoding='unicode'),
949            '<body><tag /></body>'
950        )
951
952    def test_tostring_xml_declaration(self):
953        elem = ET.XML('<body><tag/></body>')
954        self.assertEqual(
955            ET.tostring(elem, encoding='utf8', xml_declaration=True),
956            b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
957        )
958
959    def test_tostring_xml_declaration_unicode_encoding(self):
960        elem = ET.XML('<body><tag/></body>')
961        preferredencoding = locale.getpreferredencoding()
962        self.assertEqual(
963            f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
964            ET.tostring(elem, encoding='unicode', xml_declaration=True)
965        )
966
967    def test_tostring_xml_declaration_cases(self):
968        elem = ET.XML('<body><tag>ø</tag></body>')
969        preferredencoding = locale.getpreferredencoding()
970        TESTCASES = [
971        #   (expected_retval,                  encoding, xml_declaration)
972            # ... xml_declaration = None
973            (b'<body><tag>&#248;</tag></body>', None, None),
974            (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
975            (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
976            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
977             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
978            ('<body><tag>ø</tag></body>', 'unicode', None),
979
980            # ... xml_declaration = False
981            (b"<body><tag>&#248;</tag></body>", None, False),
982            (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
983            (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
984            (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
985            ("<body><tag>ø</tag></body>", 'unicode', False),
986
987            # ... xml_declaration = True
988            (b"<?xml version='1.0' encoding='us-ascii'?>\n"
989             b"<body><tag>&#248;</tag></body>", None, True),
990            (b"<?xml version='1.0' encoding='UTF-8'?>\n"
991             b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
992            (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
993             b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
994            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
995             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
996            (f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
997             "<body><tag>ø</tag></body>", 'unicode', True),
998
999        ]
1000        for expected_retval, encoding, xml_declaration in TESTCASES:
1001            with self.subTest(f'encoding={encoding} '
1002                              f'xml_declaration={xml_declaration}'):
1003                self.assertEqual(
1004                    ET.tostring(
1005                        elem,
1006                        encoding=encoding,
1007                        xml_declaration=xml_declaration
1008                    ),
1009                    expected_retval
1010                )
1011
1012    def test_tostringlist_default_namespace(self):
1013        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
1014        self.assertEqual(
1015            ''.join(ET.tostringlist(elem, encoding='unicode')),
1016            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
1017        )
1018        self.assertEqual(
1019            ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
1020            '<body xmlns="http://effbot.org/ns"><tag /></body>'
1021        )
1022
1023    def test_tostringlist_xml_declaration(self):
1024        elem = ET.XML('<body><tag/></body>')
1025        self.assertEqual(
1026            ''.join(ET.tostringlist(elem, encoding='unicode')),
1027            '<body><tag /></body>'
1028        )
1029        self.assertEqual(
1030            b''.join(ET.tostringlist(elem, xml_declaration=True)),
1031            b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
1032        )
1033
1034        preferredencoding = locale.getpreferredencoding()
1035        stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
1036        self.assertEqual(
1037            ''.join(stringlist),
1038            f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
1039        )
1040        self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
1041        self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
1042
1043    def test_encoding(self):
1044        def check(encoding, body=''):
1045            xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
1046                   (encoding, body))
1047            self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
1048            self.assertEqual(ET.XML(xml).text, body)
1049        check("ascii", 'a')
1050        check("us-ascii", 'a')
1051        check("iso-8859-1", '\xbd')
1052        check("iso-8859-15", '\u20ac')
1053        check("cp437", '\u221a')
1054        check("mac-roman", '\u02da')
1055
1056        def xml(encoding):
1057            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
1058        def bxml(encoding):
1059            return xml(encoding).encode(encoding)
1060        supported_encodings = [
1061            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
1062            'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
1063            'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
1064            'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
1065            'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
1066            'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
1067            'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
1068            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1069            'cp1256', 'cp1257', 'cp1258',
1070            'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
1071            'mac-roman', 'mac-turkish',
1072            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
1073            'iso2022-jp-3', 'iso2022-jp-ext',
1074            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
1075            'hz', 'ptcp154',
1076        ]
1077        for encoding in supported_encodings:
1078            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
1079
1080        unsupported_ascii_compatible_encodings = [
1081            'big5', 'big5hkscs',
1082            'cp932', 'cp949', 'cp950',
1083            'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
1084            'gb2312', 'gbk', 'gb18030',
1085            'iso2022-kr', 'johab',
1086            'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
1087            'utf-7',
1088        ]
1089        for encoding in unsupported_ascii_compatible_encodings:
1090            self.assertRaises(ValueError, ET.XML, bxml(encoding))
1091
1092        unsupported_ascii_incompatible_encodings = [
1093            'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
1094            'utf_32', 'utf_32_be', 'utf_32_le',
1095        ]
1096        for encoding in unsupported_ascii_incompatible_encodings:
1097            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
1098
1099        self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
1100        self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
1101
1102    def test_methods(self):
1103        # Test serialization methods.
1104
1105        e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
1106        e.tail = "\n"
1107        self.assertEqual(serialize(e),
1108                '<html><link /><script>1 &lt; 2</script></html>\n')
1109        self.assertEqual(serialize(e, method=None),
1110                '<html><link /><script>1 &lt; 2</script></html>\n')
1111        self.assertEqual(serialize(e, method="xml"),
1112                '<html><link /><script>1 &lt; 2</script></html>\n')
1113        self.assertEqual(serialize(e, method="html"),
1114                '<html><link><script>1 < 2</script></html>\n')
1115        self.assertEqual(serialize(e, method="text"), '1 < 2\n')
1116
1117    def test_issue18347(self):
1118        e = ET.XML('<html><CamelCase>text</CamelCase></html>')
1119        self.assertEqual(serialize(e),
1120                '<html><CamelCase>text</CamelCase></html>')
1121        self.assertEqual(serialize(e, method="html"),
1122                '<html><CamelCase>text</CamelCase></html>')
1123
1124    def test_entity(self):
1125        # Test entity handling.
1126
1127        # 1) good entities
1128
1129        e = ET.XML("<document title='&#x8230;'>test</document>")
1130        self.assertEqual(serialize(e, encoding="us-ascii"),
1131                b'<document title="&#33328;">test</document>')
1132        self.serialize_check(e, '<document title="\u8230">test</document>')
1133
1134        # 2) bad entities
1135
1136        with self.assertRaises(ET.ParseError) as cm:
1137            ET.XML("<document>&entity;</document>")
1138        self.assertEqual(str(cm.exception),
1139                'undefined entity: line 1, column 10')
1140
1141        with self.assertRaises(ET.ParseError) as cm:
1142            ET.XML(ENTITY_XML)
1143        self.assertEqual(str(cm.exception),
1144                'undefined entity &entity;: line 5, column 10')
1145
1146        # 3) custom entity
1147
1148        parser = ET.XMLParser()
1149        parser.entity["entity"] = "text"
1150        parser.feed(ENTITY_XML)
1151        root = parser.close()
1152        self.serialize_check(root, '<document>text</document>')
1153
1154        # 4) external (SYSTEM) entity
1155
1156        with self.assertRaises(ET.ParseError) as cm:
1157            ET.XML(EXTERNAL_ENTITY_XML)
1158        self.assertEqual(str(cm.exception),
1159                'undefined entity &entity;: line 4, column 10')
1160
1161    def test_namespace(self):
1162        # Test namespace issues.
1163
1164        # 1) xml namespace
1165
1166        elem = ET.XML("<tag xml:lang='en' />")
1167        self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
1168
1169        # 2) other "well-known" namespaces
1170
1171        elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1172        self.serialize_check(elem,
1173            '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
1174
1175        elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1176        self.serialize_check(elem,
1177            '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
1178
1179        elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1180        self.serialize_check(elem,
1181            '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
1182
1183        # 3) unknown namespaces
1184        elem = ET.XML(SAMPLE_XML_NS)
1185        self.serialize_check(elem,
1186            '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
1187            '  <ns0:tag>text</ns0:tag>\n'
1188            '  <ns0:tag />\n'
1189            '  <ns0:section>\n'
1190            '    <ns0:tag>subtext</ns0:tag>\n'
1191            '  </ns0:section>\n'
1192            '</ns0:body>')
1193
1194    def test_qname(self):
1195        # Test QName handling.
1196
1197        # 1) decorated tags
1198
1199        elem = ET.Element("{uri}tag")
1200        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
1201        elem = ET.Element(ET.QName("{uri}tag"))
1202        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
1203        elem = ET.Element(ET.QName("uri", "tag"))
1204        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
1205        elem = ET.Element(ET.QName("uri", "tag"))
1206        subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1207        subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1208        self.serialize_check(elem,
1209            '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
1210
1211        # 2) decorated attributes
1212
1213        elem.clear()
1214        elem.attrib["{uri}key"] = "value"
1215        self.serialize_check(elem,
1216            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
1217
1218        elem.clear()
1219        elem.attrib[ET.QName("{uri}key")] = "value"
1220        self.serialize_check(elem,
1221            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
1222
1223        # 3) decorated values are not converted by default, but the
1224        # QName wrapper can be used for values
1225
1226        elem.clear()
1227        elem.attrib["{uri}key"] = "{uri}value"
1228        self.serialize_check(elem,
1229            '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
1230
1231        elem.clear()
1232        elem.attrib["{uri}key"] = ET.QName("{uri}value")
1233        self.serialize_check(elem,
1234            '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
1235
1236        elem.clear()
1237        subelem = ET.Element("tag")
1238        subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1239        elem.append(subelem)
1240        elem.append(subelem)
1241        self.serialize_check(elem,
1242            '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
1243            '<tag ns1:key="ns2:value" />'
1244            '<tag ns1:key="ns2:value" />'
1245            '</ns0:tag>') # 3.3
1246
1247        # 4) Direct QName tests
1248
1249        self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
1250        self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
1251        q1 = ET.QName('ns', 'tag')
1252        q2 = ET.QName('ns', 'tag')
1253        self.assertEqual(q1, q2)
1254        q2 = ET.QName('ns', 'other-tag')
1255        self.assertNotEqual(q1, q2)
1256        self.assertNotEqual(q1, 'ns:tag')
1257        self.assertEqual(q1, '{ns}tag')
1258
1259    def test_doctype_public(self):
1260        # Test PUBLIC doctype.
1261
1262        elem = ET.XML('<!DOCTYPE html PUBLIC'
1263                ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1264                ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1265                '<html>text</html>')
1266
1267    def test_xpath_tokenizer(self):
1268        # Test the XPath tokenizer.
1269        from xml.etree import ElementPath
1270        def check(p, expected, namespaces=None):
1271            self.assertEqual([op or tag
1272                              for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
1273                             expected)
1274
1275        # tests from the xml specification
1276        check("*", ['*'])
1277        check("text()", ['text', '()'])
1278        check("@name", ['@', 'name'])
1279        check("@*", ['@', '*'])
1280        check("para[1]", ['para', '[', '1', ']'])
1281        check("para[last()]", ['para', '[', 'last', '()', ']'])
1282        check("*/para", ['*', '/', 'para'])
1283        check("/doc/chapter[5]/section[2]",
1284              ['/', 'doc', '/', 'chapter', '[', '5', ']',
1285               '/', 'section', '[', '2', ']'])
1286        check("chapter//para", ['chapter', '//', 'para'])
1287        check("//para", ['//', 'para'])
1288        check("//olist/item", ['//', 'olist', '/', 'item'])
1289        check(".", ['.'])
1290        check(".//para", ['.', '//', 'para'])
1291        check("..", ['..'])
1292        check("../@lang", ['..', '/', '@', 'lang'])
1293        check("chapter[title]", ['chapter', '[', 'title', ']'])
1294        check("employee[@secretary and @assistant]", ['employee',
1295              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
1296
1297        # additional tests
1298        check("@{ns}attr", ['@', '{ns}attr'])
1299        check("{http://spam}egg", ['{http://spam}egg'])
1300        check("./spam.egg", ['.', '/', 'spam.egg'])
1301        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
1302
1303        # wildcard tags
1304        check("{ns}*", ['{ns}*'])
1305        check("{}*", ['{}*'])
1306        check("{*}tag", ['{*}tag'])
1307        check("{*}*", ['{*}*'])
1308        check(".//{*}tag", ['.', '//', '{*}tag'])
1309
1310        # namespace prefix resolution
1311        check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
1312              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1313        check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
1314              {'': 'http://www.w3.org/2001/XMLSchema'})
1315        check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
1316              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1317        check("@type", ['@', 'type'],
1318              {'': 'http://www.w3.org/2001/XMLSchema'})
1319        check("@{*}type", ['@', '{*}type'],
1320              {'': 'http://www.w3.org/2001/XMLSchema'})
1321        check("@{ns}attr", ['@', '{ns}attr'],
1322              {'': 'http://www.w3.org/2001/XMLSchema',
1323               'ns': 'http://www.w3.org/2001/XMLSchema'})
1324
1325    def test_processinginstruction(self):
1326        # Test ProcessingInstruction directly
1327
1328        self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1329                b'<?test instruction?>')
1330        self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1331                b'<?test instruction?>')
1332
1333        # Issue #2746
1334
1335        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1336                b'<?test <testing&>?>')
1337        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1338                b"<?xml version='1.0' encoding='latin-1'?>\n"
1339                b"<?test <testing&>\xe3?>")
1340
1341    def test_html_empty_elems_serialization(self):
1342        # issue 15970
1343        # from http://www.w3.org/TR/html401/index/elements.html
1344        for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1345                        'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
1346            for elem in [element, element.lower()]:
1347                expected = '<%s>' % elem
1348                serialized = serialize(ET.XML('<%s />' % elem), method='html')
1349                self.assertEqual(serialized, expected)
1350                serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1351                                       method='html')
1352                self.assertEqual(serialized, expected)
1353
1354    def test_dump_attribute_order(self):
1355        # See BPO 34160
1356        e = ET.Element('cirriculum', status='public', company='example')
1357        with support.captured_stdout() as stdout:
1358            ET.dump(e)
1359        self.assertEqual(stdout.getvalue(),
1360                         '<cirriculum status="public" company="example" />\n')
1361
1362    def test_tree_write_attribute_order(self):
1363        # See BPO 34160
1364        root = ET.Element('cirriculum', status='public', company='example')
1365        self.assertEqual(serialize(root),
1366                         '<cirriculum status="public" company="example" />')
1367        self.assertEqual(serialize(root, method='html'),
1368                '<cirriculum status="public" company="example"></cirriculum>')
1369
1370    def test_attlist_default(self):
1371        # Test default attribute values; See BPO 42151.
1372        root = ET.fromstring(ATTLIST_XML)
1373        self.assertEqual(root[0].attrib,
1374                         {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
1375
1376
1377class XMLPullParserTest(unittest.TestCase):
1378
1379    def _feed(self, parser, data, chunk_size=None):
1380        if chunk_size is None:
1381            parser.feed(data)
1382        else:
1383            for i in range(0, len(data), chunk_size):
1384                parser.feed(data[i:i+chunk_size])
1385
1386    def assert_events(self, parser, expected, max_events=None):
1387        self.assertEqual(
1388            [(event, (elem.tag, elem.text))
1389             for event, elem in islice(parser.read_events(), max_events)],
1390            expected)
1391
1392    def assert_event_tuples(self, parser, expected, max_events=None):
1393        self.assertEqual(
1394            list(islice(parser.read_events(), max_events)),
1395            expected)
1396
1397    def assert_event_tags(self, parser, expected, max_events=None):
1398        events = islice(parser.read_events(), max_events)
1399        self.assertEqual([(action, elem.tag) for action, elem in events],
1400                         expected)
1401
1402    def test_simple_xml(self):
1403        for chunk_size in (None, 1, 5):
1404            with self.subTest(chunk_size=chunk_size):
1405                parser = ET.XMLPullParser()
1406                self.assert_event_tags(parser, [])
1407                self._feed(parser, "<!-- comment -->\n", chunk_size)
1408                self.assert_event_tags(parser, [])
1409                self._feed(parser,
1410                           "<root>\n  <element key='value'>text</element",
1411                           chunk_size)
1412                self.assert_event_tags(parser, [])
1413                self._feed(parser, ">\n", chunk_size)
1414                self.assert_event_tags(parser, [('end', 'element')])
1415                self._feed(parser, "<element>text</element>tail\n", chunk_size)
1416                self._feed(parser, "<empty-element/>\n", chunk_size)
1417                self.assert_event_tags(parser, [
1418                    ('end', 'element'),
1419                    ('end', 'empty-element'),
1420                    ])
1421                self._feed(parser, "</root>\n", chunk_size)
1422                self.assert_event_tags(parser, [('end', 'root')])
1423                self.assertIsNone(parser.close())
1424
1425    def test_feed_while_iterating(self):
1426        parser = ET.XMLPullParser()
1427        it = parser.read_events()
1428        self._feed(parser, "<root>\n  <element key='value'>text</element>\n")
1429        action, elem = next(it)
1430        self.assertEqual((action, elem.tag), ('end', 'element'))
1431        self._feed(parser, "</root>\n")
1432        action, elem = next(it)
1433        self.assertEqual((action, elem.tag), ('end', 'root'))
1434        with self.assertRaises(StopIteration):
1435            next(it)
1436
1437    def test_simple_xml_with_ns(self):
1438        parser = ET.XMLPullParser()
1439        self.assert_event_tags(parser, [])
1440        self._feed(parser, "<!-- comment -->\n")
1441        self.assert_event_tags(parser, [])
1442        self._feed(parser, "<root xmlns='namespace'>\n")
1443        self.assert_event_tags(parser, [])
1444        self._feed(parser, "<element key='value'>text</element")
1445        self.assert_event_tags(parser, [])
1446        self._feed(parser, ">\n")
1447        self.assert_event_tags(parser, [('end', '{namespace}element')])
1448        self._feed(parser, "<element>text</element>tail\n")
1449        self._feed(parser, "<empty-element/>\n")
1450        self.assert_event_tags(parser, [
1451            ('end', '{namespace}element'),
1452            ('end', '{namespace}empty-element'),
1453            ])
1454        self._feed(parser, "</root>\n")
1455        self.assert_event_tags(parser, [('end', '{namespace}root')])
1456        self.assertIsNone(parser.close())
1457
1458    def test_ns_events(self):
1459        parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
1460        self._feed(parser, "<!-- comment -->\n")
1461        self._feed(parser, "<root xmlns='namespace'>\n")
1462        self.assertEqual(
1463            list(parser.read_events()),
1464            [('start-ns', ('', 'namespace'))])
1465        self._feed(parser, "<element key='value'>text</element")
1466        self._feed(parser, ">\n")
1467        self._feed(parser, "<element>text</element>tail\n")
1468        self._feed(parser, "<empty-element/>\n")
1469        self._feed(parser, "</root>\n")
1470        self.assertEqual(list(parser.read_events()), [('end-ns', None)])
1471        self.assertIsNone(parser.close())
1472
1473    def test_ns_events_start(self):
1474        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1475        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1476        self.assert_event_tuples(parser, [
1477            ('start-ns', ('', 'abc')),
1478            ('start-ns', ('p', 'xyz')),
1479        ], max_events=2)
1480        self.assert_event_tags(parser, [
1481            ('start', '{abc}tag'),
1482        ], max_events=1)
1483
1484        self._feed(parser, "<child />\n")
1485        self.assert_event_tags(parser, [
1486            ('start', '{abc}child'),
1487            ('end', '{abc}child'),
1488        ])
1489
1490        self._feed(parser, "</tag>\n")
1491        parser.close()
1492        self.assert_event_tags(parser, [
1493            ('end', '{abc}tag'),
1494        ])
1495
1496    def test_ns_events_start_end(self):
1497        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1498        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1499        self.assert_event_tuples(parser, [
1500            ('start-ns', ('', 'abc')),
1501            ('start-ns', ('p', 'xyz')),
1502        ], max_events=2)
1503        self.assert_event_tags(parser, [
1504            ('start', '{abc}tag'),
1505        ], max_events=1)
1506
1507        self._feed(parser, "<child />\n")
1508        self.assert_event_tags(parser, [
1509            ('start', '{abc}child'),
1510            ('end', '{abc}child'),
1511        ])
1512
1513        self._feed(parser, "</tag>\n")
1514        parser.close()
1515        self.assert_event_tags(parser, [
1516            ('end', '{abc}tag'),
1517        ], max_events=1)
1518        self.assert_event_tuples(parser, [
1519            ('end-ns', None),
1520            ('end-ns', None),
1521        ])
1522
1523    def test_events(self):
1524        parser = ET.XMLPullParser(events=())
1525        self._feed(parser, "<root/>\n")
1526        self.assert_event_tags(parser, [])
1527
1528        parser = ET.XMLPullParser(events=('start', 'end'))
1529        self._feed(parser, "<!-- text here -->\n")
1530        self.assert_events(parser, [])
1531
1532        parser = ET.XMLPullParser(events=('start', 'end'))
1533        self._feed(parser, "<root>\n")
1534        self.assert_event_tags(parser, [('start', 'root')])
1535        self._feed(parser, "<element key='value'>text</element")
1536        self.assert_event_tags(parser, [('start', 'element')])
1537        self._feed(parser, ">\n")
1538        self.assert_event_tags(parser, [('end', 'element')])
1539        self._feed(parser,
1540                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1541        self.assert_event_tags(parser, [
1542            ('start', '{foo}element'),
1543            ('start', '{foo}empty-element'),
1544            ('end', '{foo}empty-element'),
1545            ('end', '{foo}element'),
1546            ])
1547        self._feed(parser, "</root>")
1548        self.assertIsNone(parser.close())
1549        self.assert_event_tags(parser, [('end', 'root')])
1550
1551        parser = ET.XMLPullParser(events=('start',))
1552        self._feed(parser, "<!-- comment -->\n")
1553        self.assert_event_tags(parser, [])
1554        self._feed(parser, "<root>\n")
1555        self.assert_event_tags(parser, [('start', 'root')])
1556        self._feed(parser, "<element key='value'>text</element")
1557        self.assert_event_tags(parser, [('start', 'element')])
1558        self._feed(parser, ">\n")
1559        self.assert_event_tags(parser, [])
1560        self._feed(parser,
1561                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1562        self.assert_event_tags(parser, [
1563            ('start', '{foo}element'),
1564            ('start', '{foo}empty-element'),
1565            ])
1566        self._feed(parser, "</root>")
1567        self.assertIsNone(parser.close())
1568
1569    def test_events_comment(self):
1570        parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
1571        self._feed(parser, "<!-- text here -->\n")
1572        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1573        self._feed(parser, "<!-- more text here -->\n")
1574        self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
1575        self._feed(parser, "<root-tag>text")
1576        self.assert_event_tags(parser, [('start', 'root-tag')])
1577        self._feed(parser, "<!-- inner comment-->\n")
1578        self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
1579        self._feed(parser, "</root-tag>\n")
1580        self.assert_event_tags(parser, [('end', 'root-tag')])
1581        self._feed(parser, "<!-- outer comment -->\n")
1582        self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
1583
1584        parser = ET.XMLPullParser(events=('comment',))
1585        self._feed(parser, "<!-- text here -->\n")
1586        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1587
1588    def test_events_pi(self):
1589        parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
1590        self._feed(parser, "<?pitarget?>\n")
1591        self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
1592        parser = ET.XMLPullParser(events=('pi',))
1593        self._feed(parser, "<?pitarget some text ?>\n")
1594        self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
1595
1596    def test_events_sequence(self):
1597        # Test that events can be some sequence that's not just a tuple or list
1598        eventset = {'end', 'start'}
1599        parser = ET.XMLPullParser(events=eventset)
1600        self._feed(parser, "<foo>bar</foo>")
1601        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1602
1603        class DummyIter:
1604            def __init__(self):
1605                self.events = iter(['start', 'end', 'start-ns'])
1606            def __iter__(self):
1607                return self
1608            def __next__(self):
1609                return next(self.events)
1610
1611        parser = ET.XMLPullParser(events=DummyIter())
1612        self._feed(parser, "<foo>bar</foo>")
1613        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1614
1615    def test_unknown_event(self):
1616        with self.assertRaises(ValueError):
1617            ET.XMLPullParser(events=('start', 'end', 'bogus'))
1618
1619
1620#
1621# xinclude tests (samples from appendix C of the xinclude specification)
1622
1623XINCLUDE = {}
1624
1625XINCLUDE["C1.xml"] = """\
1626<?xml version='1.0'?>
1627<document xmlns:xi="http://www.w3.org/2001/XInclude">
1628  <p>120 Mz is adequate for an average home user.</p>
1629  <xi:include href="disclaimer.xml"/>
1630</document>
1631"""
1632
1633XINCLUDE["disclaimer.xml"] = """\
1634<?xml version='1.0'?>
1635<disclaimer>
1636  <p>The opinions represented herein represent those of the individual
1637  and should not be interpreted as official policy endorsed by this
1638  organization.</p>
1639</disclaimer>
1640"""
1641
1642XINCLUDE["C2.xml"] = """\
1643<?xml version='1.0'?>
1644<document xmlns:xi="http://www.w3.org/2001/XInclude">
1645  <p>This document has been accessed
1646  <xi:include href="count.txt" parse="text"/> times.</p>
1647</document>
1648"""
1649
1650XINCLUDE["count.txt"] = "324387"
1651
1652XINCLUDE["C2b.xml"] = """\
1653<?xml version='1.0'?>
1654<document xmlns:xi="http://www.w3.org/2001/XInclude">
1655  <p>This document has been <em>accessed</em>
1656  <xi:include href="count.txt" parse="text"/> times.</p>
1657</document>
1658"""
1659
1660XINCLUDE["C3.xml"] = """\
1661<?xml version='1.0'?>
1662<document xmlns:xi="http://www.w3.org/2001/XInclude">
1663  <p>The following is the source of the "data.xml" resource:</p>
1664  <example><xi:include href="data.xml" parse="text"/></example>
1665</document>
1666"""
1667
1668XINCLUDE["data.xml"] = """\
1669<?xml version='1.0'?>
1670<data>
1671  <item><![CDATA[Brooks & Shields]]></item>
1672</data>
1673"""
1674
1675XINCLUDE["C5.xml"] = """\
1676<?xml version='1.0'?>
1677<div xmlns:xi="http://www.w3.org/2001/XInclude">
1678  <xi:include href="example.txt" parse="text">
1679    <xi:fallback>
1680      <xi:include href="fallback-example.txt" parse="text">
1681        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1682      </xi:include>
1683    </xi:fallback>
1684  </xi:include>
1685</div>
1686"""
1687
1688XINCLUDE["default.xml"] = """\
1689<?xml version='1.0'?>
1690<document xmlns:xi="http://www.w3.org/2001/XInclude">
1691  <p>Example.</p>
1692  <xi:include href="{}"/>
1693</document>
1694""".format(html.escape(SIMPLE_XMLFILE, True))
1695
1696XINCLUDE["include_c1_repeated.xml"] = """\
1697<?xml version='1.0'?>
1698<document xmlns:xi="http://www.w3.org/2001/XInclude">
1699  <p>The following is the source code of Recursive1.xml:</p>
1700  <xi:include href="C1.xml"/>
1701  <xi:include href="C1.xml"/>
1702  <xi:include href="C1.xml"/>
1703  <xi:include href="C1.xml"/>
1704</document>
1705"""
1706
1707#
1708# badly formatted xi:include tags
1709
1710XINCLUDE_BAD = {}
1711
1712XINCLUDE_BAD["B1.xml"] = """\
1713<?xml version='1.0'?>
1714<document xmlns:xi="http://www.w3.org/2001/XInclude">
1715  <p>120 Mz is adequate for an average home user.</p>
1716  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1717</document>
1718"""
1719
1720XINCLUDE_BAD["B2.xml"] = """\
1721<?xml version='1.0'?>
1722<div xmlns:xi="http://www.w3.org/2001/XInclude">
1723    <xi:fallback></xi:fallback>
1724</div>
1725"""
1726
1727XINCLUDE["Recursive1.xml"] = """\
1728<?xml version='1.0'?>
1729<document xmlns:xi="http://www.w3.org/2001/XInclude">
1730  <p>The following is the source code of Recursive2.xml:</p>
1731  <xi:include href="Recursive2.xml"/>
1732</document>
1733"""
1734
1735XINCLUDE["Recursive2.xml"] = """\
1736<?xml version='1.0'?>
1737<document xmlns:xi="http://www.w3.org/2001/XInclude">
1738  <p>The following is the source code of Recursive3.xml:</p>
1739  <xi:include href="Recursive3.xml"/>
1740</document>
1741"""
1742
1743XINCLUDE["Recursive3.xml"] = """\
1744<?xml version='1.0'?>
1745<document xmlns:xi="http://www.w3.org/2001/XInclude">
1746  <p>The following is the source code of Recursive1.xml:</p>
1747  <xi:include href="Recursive1.xml"/>
1748</document>
1749"""
1750
1751
1752class XIncludeTest(unittest.TestCase):
1753
1754    def xinclude_loader(self, href, parse="xml", encoding=None):
1755        try:
1756            data = XINCLUDE[href]
1757        except KeyError:
1758            raise OSError("resource not found")
1759        if parse == "xml":
1760            data = ET.XML(data)
1761        return data
1762
1763    def none_loader(self, href, parser, encoding=None):
1764        return None
1765
1766    def _my_loader(self, href, parse):
1767        # Used to avoid a test-dependency problem where the default loader
1768        # of ElementInclude uses the pyET parser for cET tests.
1769        if parse == 'xml':
1770            with open(href, 'rb') as f:
1771                return ET.parse(f).getroot()
1772        else:
1773            return None
1774
1775    def test_xinclude_default(self):
1776        from xml.etree import ElementInclude
1777        doc = self.xinclude_loader('default.xml')
1778        ElementInclude.include(doc, self._my_loader)
1779        self.assertEqual(serialize(doc),
1780            '<document>\n'
1781            '  <p>Example.</p>\n'
1782            '  <root>\n'
1783            '   <element key="value">text</element>\n'
1784            '   <element>text</element>tail\n'
1785            '   <empty-element />\n'
1786            '</root>\n'
1787            '</document>')
1788
1789    def test_xinclude(self):
1790        from xml.etree import ElementInclude
1791
1792        # Basic inclusion example (XInclude C.1)
1793        document = self.xinclude_loader("C1.xml")
1794        ElementInclude.include(document, self.xinclude_loader)
1795        self.assertEqual(serialize(document),
1796            '<document>\n'
1797            '  <p>120 Mz is adequate for an average home user.</p>\n'
1798            '  <disclaimer>\n'
1799            '  <p>The opinions represented herein represent those of the individual\n'
1800            '  and should not be interpreted as official policy endorsed by this\n'
1801            '  organization.</p>\n'
1802            '</disclaimer>\n'
1803            '</document>') # C1
1804
1805        # Textual inclusion example (XInclude C.2)
1806        document = self.xinclude_loader("C2.xml")
1807        ElementInclude.include(document, self.xinclude_loader)
1808        self.assertEqual(serialize(document),
1809            '<document>\n'
1810            '  <p>This document has been accessed\n'
1811            '  324387 times.</p>\n'
1812            '</document>') # C2
1813
1814        # Textual inclusion after sibling element (based on modified XInclude C.2)
1815        document = self.xinclude_loader("C2b.xml")
1816        ElementInclude.include(document, self.xinclude_loader)
1817        self.assertEqual(serialize(document),
1818            '<document>\n'
1819            '  <p>This document has been <em>accessed</em>\n'
1820            '  324387 times.</p>\n'
1821            '</document>') # C2b
1822
1823        # Textual inclusion of XML example (XInclude C.3)
1824        document = self.xinclude_loader("C3.xml")
1825        ElementInclude.include(document, self.xinclude_loader)
1826        self.assertEqual(serialize(document),
1827            '<document>\n'
1828            '  <p>The following is the source of the "data.xml" resource:</p>\n'
1829            "  <example>&lt;?xml version='1.0'?&gt;\n"
1830            '&lt;data&gt;\n'
1831            '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1832            '&lt;/data&gt;\n'
1833            '</example>\n'
1834            '</document>') # C3
1835
1836        # Fallback example (XInclude C.5)
1837        # Note! Fallback support is not yet implemented
1838        document = self.xinclude_loader("C5.xml")
1839        with self.assertRaises(OSError) as cm:
1840            ElementInclude.include(document, self.xinclude_loader)
1841        self.assertEqual(str(cm.exception), 'resource not found')
1842        self.assertEqual(serialize(document),
1843            '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1844            '  <ns0:include href="example.txt" parse="text">\n'
1845            '    <ns0:fallback>\n'
1846            '      <ns0:include href="fallback-example.txt" parse="text">\n'
1847            '        <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1848            '      </ns0:include>\n'
1849            '    </ns0:fallback>\n'
1850            '  </ns0:include>\n'
1851            '</div>') # C5
1852
1853    def test_xinclude_repeated(self):
1854        from xml.etree import ElementInclude
1855
1856        document = self.xinclude_loader("include_c1_repeated.xml")
1857        ElementInclude.include(document, self.xinclude_loader)
1858        self.assertEqual(1+4*2, len(document.findall(".//p")))
1859
1860    def test_xinclude_failures(self):
1861        from xml.etree import ElementInclude
1862
1863        # Test failure to locate included XML file.
1864        document = ET.XML(XINCLUDE["C1.xml"])
1865        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1866            ElementInclude.include(document, loader=self.none_loader)
1867        self.assertEqual(str(cm.exception),
1868                "cannot load 'disclaimer.xml' as 'xml'")
1869
1870        # Test failure to locate included text file.
1871        document = ET.XML(XINCLUDE["C2.xml"])
1872        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1873            ElementInclude.include(document, loader=self.none_loader)
1874        self.assertEqual(str(cm.exception),
1875                "cannot load 'count.txt' as 'text'")
1876
1877        # Test bad parse type.
1878        document = ET.XML(XINCLUDE_BAD["B1.xml"])
1879        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1880            ElementInclude.include(document, loader=self.none_loader)
1881        self.assertEqual(str(cm.exception),
1882                "unknown parse type in xi:include tag ('BAD_TYPE')")
1883
1884        # Test xi:fallback outside xi:include.
1885        document = ET.XML(XINCLUDE_BAD["B2.xml"])
1886        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1887            ElementInclude.include(document, loader=self.none_loader)
1888        self.assertEqual(str(cm.exception),
1889                "xi:fallback tag must be child of xi:include "
1890                "('{http://www.w3.org/2001/XInclude}fallback')")
1891
1892        # Test infinitely recursive includes.
1893        document = self.xinclude_loader("Recursive1.xml")
1894        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1895            ElementInclude.include(document, self.xinclude_loader)
1896        self.assertEqual(str(cm.exception),
1897                "recursive include of Recursive2.xml")
1898
1899        # Test 'max_depth' limitation.
1900        document = self.xinclude_loader("Recursive1.xml")
1901        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1902            ElementInclude.include(document, self.xinclude_loader, max_depth=None)
1903        self.assertEqual(str(cm.exception),
1904                "recursive include of Recursive2.xml")
1905
1906        document = self.xinclude_loader("Recursive1.xml")
1907        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1908            ElementInclude.include(document, self.xinclude_loader, max_depth=0)
1909        self.assertEqual(str(cm.exception),
1910                "maximum xinclude depth reached when including file Recursive2.xml")
1911
1912        document = self.xinclude_loader("Recursive1.xml")
1913        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1914            ElementInclude.include(document, self.xinclude_loader, max_depth=1)
1915        self.assertEqual(str(cm.exception),
1916                "maximum xinclude depth reached when including file Recursive3.xml")
1917
1918        document = self.xinclude_loader("Recursive1.xml")
1919        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
1920            ElementInclude.include(document, self.xinclude_loader, max_depth=2)
1921        self.assertEqual(str(cm.exception),
1922                "maximum xinclude depth reached when including file Recursive1.xml")
1923
1924        document = self.xinclude_loader("Recursive1.xml")
1925        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1926            ElementInclude.include(document, self.xinclude_loader, max_depth=3)
1927        self.assertEqual(str(cm.exception),
1928                "recursive include of Recursive2.xml")
1929
1930
1931# --------------------------------------------------------------------
1932# reported bugs
1933
1934class BugsTest(unittest.TestCase):
1935
1936    def test_bug_xmltoolkit21(self):
1937        # marshaller gives obscure errors for non-string values
1938
1939        def check(elem):
1940            with self.assertRaises(TypeError) as cm:
1941                serialize(elem)
1942            self.assertEqual(str(cm.exception),
1943                    'cannot serialize 123 (type int)')
1944
1945        elem = ET.Element(123)
1946        check(elem) # tag
1947
1948        elem = ET.Element("elem")
1949        elem.text = 123
1950        check(elem) # text
1951
1952        elem = ET.Element("elem")
1953        elem.tail = 123
1954        check(elem) # tail
1955
1956        elem = ET.Element("elem")
1957        elem.set(123, "123")
1958        check(elem) # attribute key
1959
1960        elem = ET.Element("elem")
1961        elem.set("123", 123)
1962        check(elem) # attribute value
1963
1964    def test_bug_xmltoolkit25(self):
1965        # typo in ElementTree.findtext
1966
1967        elem = ET.XML(SAMPLE_XML)
1968        tree = ET.ElementTree(elem)
1969        self.assertEqual(tree.findtext("tag"), 'text')
1970        self.assertEqual(tree.findtext("section/tag"), 'subtext')
1971
1972    def test_bug_xmltoolkit28(self):
1973        # .//tag causes exceptions
1974
1975        tree = ET.XML("<doc><table><tbody/></table></doc>")
1976        self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1977        self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
1978
1979    def test_bug_xmltoolkitX1(self):
1980        # dump() doesn't flush the output buffer
1981
1982        tree = ET.XML("<doc><table><tbody/></table></doc>")
1983        with support.captured_stdout() as stdout:
1984            ET.dump(tree)
1985            self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
1986
1987    def test_bug_xmltoolkit39(self):
1988        # non-ascii element and attribute names doesn't work
1989
1990        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1991        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1992
1993        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1994                      b"<tag \xe4ttr='v&#228;lue' />")
1995        self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
1996        self.assertEqual(ET.tostring(tree, "utf-8"),
1997                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1998
1999        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2000                      b'<t\xe4g>text</t\xe4g>')
2001        self.assertEqual(ET.tostring(tree, "utf-8"),
2002                b'<t\xc3\xa4g>text</t\xc3\xa4g>')
2003
2004        tree = ET.Element("t\u00e4g")
2005        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
2006
2007        tree = ET.Element("tag")
2008        tree.set("\u00e4ttr", "v\u00e4lue")
2009        self.assertEqual(ET.tostring(tree, "utf-8"),
2010                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
2011
2012    def test_bug_xmltoolkit54(self):
2013        # problems handling internally defined entities
2014
2015        e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
2016                   '<doc>&ldots;</doc>')
2017        self.assertEqual(serialize(e, encoding="us-ascii"),
2018                b'<doc>&#33328;</doc>')
2019        self.assertEqual(serialize(e), '<doc>\u8230</doc>')
2020
2021    def test_bug_xmltoolkit55(self):
2022        # make sure we're reporting the first error, not the last
2023
2024        with self.assertRaises(ET.ParseError) as cm:
2025            ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
2026                   b'<doc>&ldots;&ndots;&rdots;</doc>')
2027        self.assertEqual(str(cm.exception),
2028                'undefined entity &ldots;: line 1, column 36')
2029
2030    def test_bug_xmltoolkit60(self):
2031        # Handle crash in stream source.
2032
2033        class ExceptionFile:
2034            def read(self, x):
2035                raise OSError
2036
2037        self.assertRaises(OSError, ET.parse, ExceptionFile())
2038
2039    def test_bug_xmltoolkit62(self):
2040        # Don't crash when using custom entities.
2041
2042        ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
2043        parser = ET.XMLParser()
2044        parser.entity.update(ENTITIES)
2045        parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
2046<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
2047<patent-application-publication>
2048<subdoc-abstract>
2049<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
2050</subdoc-abstract>
2051</patent-application-publication>""")
2052        t = parser.close()
2053        self.assertEqual(t.find('.//paragraph').text,
2054            'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
2055
2056    @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
2057    def test_bug_xmltoolkit63(self):
2058        # Check reference leak.
2059        def xmltoolkit63():
2060            tree = ET.TreeBuilder()
2061            tree.start("tag", {})
2062            tree.data("text")
2063            tree.end("tag")
2064
2065        xmltoolkit63()
2066        count = sys.getrefcount(None)
2067        for i in range(1000):
2068            xmltoolkit63()
2069        self.assertEqual(sys.getrefcount(None), count)
2070
2071    def test_bug_200708_newline(self):
2072        # Preserve newlines in attributes.
2073
2074        e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
2075        self.assertEqual(ET.tostring(e),
2076                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
2077        self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
2078                'def _f():\n  return 3\n')
2079        self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
2080                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
2081
2082    def test_bug_200708_close(self):
2083        # Test default builder.
2084        parser = ET.XMLParser() # default
2085        parser.feed("<element>some text</element>")
2086        self.assertEqual(parser.close().tag, 'element')
2087
2088        # Test custom builder.
2089        class EchoTarget:
2090            def close(self):
2091                return ET.Element("element") # simulate root
2092        parser = ET.XMLParser(target=EchoTarget())
2093        parser.feed("<element>some text</element>")
2094        self.assertEqual(parser.close().tag, 'element')
2095
2096    def test_bug_200709_default_namespace(self):
2097        e = ET.Element("{default}elem")
2098        s = ET.SubElement(e, "{default}elem")
2099        self.assertEqual(serialize(e, default_namespace="default"), # 1
2100                '<elem xmlns="default"><elem /></elem>')
2101
2102        e = ET.Element("{default}elem")
2103        s = ET.SubElement(e, "{default}elem")
2104        s = ET.SubElement(e, "{not-default}elem")
2105        self.assertEqual(serialize(e, default_namespace="default"), # 2
2106            '<elem xmlns="default" xmlns:ns1="not-default">'
2107            '<elem />'
2108            '<ns1:elem />'
2109            '</elem>')
2110
2111        e = ET.Element("{default}elem")
2112        s = ET.SubElement(e, "{default}elem")
2113        s = ET.SubElement(e, "elem") # unprefixed name
2114        with self.assertRaises(ValueError) as cm:
2115            serialize(e, default_namespace="default") # 3
2116        self.assertEqual(str(cm.exception),
2117                'cannot use non-qualified names with default_namespace option')
2118
2119    def test_bug_200709_register_namespace(self):
2120        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2121        self.assertEqual(ET.tostring(e),
2122            b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
2123        ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
2124        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2125        self.assertEqual(ET.tostring(e),
2126            b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
2127
2128        # And the Dublin Core namespace is in the default list:
2129
2130        e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
2131        self.assertEqual(ET.tostring(e),
2132            b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
2133
2134    def test_bug_200709_element_comment(self):
2135        # Not sure if this can be fixed, really (since the serializer needs
2136        # ET.Comment, not cET.comment).
2137
2138        a = ET.Element('a')
2139        a.append(ET.Comment('foo'))
2140        self.assertEqual(a[0].tag, ET.Comment)
2141
2142        a = ET.Element('a')
2143        a.append(ET.PI('foo'))
2144        self.assertEqual(a[0].tag, ET.PI)
2145
2146    def test_bug_200709_element_insert(self):
2147        a = ET.Element('a')
2148        b = ET.SubElement(a, 'b')
2149        c = ET.SubElement(a, 'c')
2150        d = ET.Element('d')
2151        a.insert(0, d)
2152        self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
2153        a.insert(-1, d)
2154        self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
2155
2156    def test_bug_200709_iter_comment(self):
2157        a = ET.Element('a')
2158        b = ET.SubElement(a, 'b')
2159        comment_b = ET.Comment("TEST-b")
2160        b.append(comment_b)
2161        self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
2162
2163    # --------------------------------------------------------------------
2164    # reported on bugs.python.org
2165
2166    def test_bug_1534630(self):
2167        bob = ET.TreeBuilder()
2168        e = bob.data("data")
2169        e = bob.start("tag", {})
2170        e = bob.end("tag")
2171        e = bob.close()
2172        self.assertEqual(serialize(e), '<tag />')
2173
2174    def test_issue6233(self):
2175        e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
2176                   b'<body>t\xc3\xa3g</body>')
2177        self.assertEqual(ET.tostring(e, 'ascii'),
2178                b"<?xml version='1.0' encoding='ascii'?>\n"
2179                b'<body>t&#227;g</body>')
2180        e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2181                   b'<body>t\xe3g</body>')
2182        self.assertEqual(ET.tostring(e, 'ascii'),
2183                b"<?xml version='1.0' encoding='ascii'?>\n"
2184                b'<body>t&#227;g</body>')
2185
2186    def test_issue3151(self):
2187        e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
2188        self.assertEqual(e.tag, '{${stuff}}localname')
2189        t = ET.ElementTree(e)
2190        self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
2191
2192    def test_issue6565(self):
2193        elem = ET.XML("<body><tag/></body>")
2194        self.assertEqual(summarize_list(elem), ['tag'])
2195        newelem = ET.XML(SAMPLE_XML)
2196        elem[:] = newelem[:]
2197        self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
2198
2199    def test_issue10777(self):
2200        # Registering a namespace twice caused a "dictionary changed size during
2201        # iteration" bug.
2202
2203        ET.register_namespace('test10777', 'http://myuri/')
2204        ET.register_namespace('test10777', 'http://myuri/')
2205
2206    def test_lost_text(self):
2207        # Issue #25902: Borrowed text can disappear
2208        class Text:
2209            def __bool__(self):
2210                e.text = 'changed'
2211                return True
2212
2213        e = ET.Element('tag')
2214        e.text = Text()
2215        i = e.itertext()
2216        t = next(i)
2217        self.assertIsInstance(t, Text)
2218        self.assertIsInstance(e.text, str)
2219        self.assertEqual(e.text, 'changed')
2220
2221    def test_lost_tail(self):
2222        # Issue #25902: Borrowed tail can disappear
2223        class Text:
2224            def __bool__(self):
2225                e[0].tail = 'changed'
2226                return True
2227
2228        e = ET.Element('root')
2229        e.append(ET.Element('tag'))
2230        e[0].tail = Text()
2231        i = e.itertext()
2232        t = next(i)
2233        self.assertIsInstance(t, Text)
2234        self.assertIsInstance(e[0].tail, str)
2235        self.assertEqual(e[0].tail, 'changed')
2236
2237    def test_lost_elem(self):
2238        # Issue #25902: Borrowed element can disappear
2239        class Tag:
2240            def __eq__(self, other):
2241                e[0] = ET.Element('changed')
2242                next(i)
2243                return True
2244
2245        e = ET.Element('root')
2246        e.append(ET.Element(Tag()))
2247        e.append(ET.Element('tag'))
2248        i = e.iter('tag')
2249        try:
2250            t = next(i)
2251        except ValueError:
2252            self.skipTest('generators are not reentrant')
2253        self.assertIsInstance(t.tag, Tag)
2254        self.assertIsInstance(e[0].tag, str)
2255        self.assertEqual(e[0].tag, 'changed')
2256
2257    def check_expat224_utf8_bug(self, text):
2258        xml = b'<a b="%s"/>' % text
2259        root = ET.XML(xml)
2260        self.assertEqual(root.get('b'), text.decode('utf-8'))
2261
2262    def test_expat224_utf8_bug(self):
2263        # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
2264        # Check that Expat 2.2.4 fixed the bug.
2265        #
2266        # Test buffer bounds at odd and even positions.
2267
2268        text = b'\xc3\xa0' * 1024
2269        self.check_expat224_utf8_bug(text)
2270
2271        text = b'x' + b'\xc3\xa0' * 1024
2272        self.check_expat224_utf8_bug(text)
2273
2274    def test_expat224_utf8_bug_file(self):
2275        with open(UTF8_BUG_XMLFILE, 'rb') as fp:
2276            raw = fp.read()
2277        root = ET.fromstring(raw)
2278        xmlattr = root.get('b')
2279
2280        # "Parse" manually the XML file to extract the value of the 'b'
2281        # attribute of the <a b='xxx' /> XML element
2282        text = raw.decode('utf-8').strip()
2283        text = text.replace('\r\n', ' ')
2284        text = text[6:-4]
2285        self.assertEqual(root.get('b'), text)
2286
2287    def test_39495_treebuilder_start(self):
2288        self.assertRaises(TypeError, ET.TreeBuilder().start, "tag")
2289        self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None)
2290
2291
2292
2293# --------------------------------------------------------------------
2294
2295
2296class BasicElementTest(ElementTestCase, unittest.TestCase):
2297
2298    def test___init__(self):
2299        tag = "foo"
2300        attrib = { "zix": "wyp" }
2301
2302        element_foo = ET.Element(tag, attrib)
2303
2304        # traits of an element
2305        self.assertIsInstance(element_foo, ET.Element)
2306        self.assertIn("tag", dir(element_foo))
2307        self.assertIn("attrib", dir(element_foo))
2308        self.assertIn("text", dir(element_foo))
2309        self.assertIn("tail", dir(element_foo))
2310
2311        # string attributes have expected values
2312        self.assertEqual(element_foo.tag, tag)
2313        self.assertIsNone(element_foo.text)
2314        self.assertIsNone(element_foo.tail)
2315
2316        # attrib is a copy
2317        self.assertIsNot(element_foo.attrib, attrib)
2318        self.assertEqual(element_foo.attrib, attrib)
2319
2320        # attrib isn't linked
2321        attrib["bar"] = "baz"
2322        self.assertIsNot(element_foo.attrib, attrib)
2323        self.assertNotEqual(element_foo.attrib, attrib)
2324
2325    def test_copy(self):
2326        # Only run this test if Element.copy() is defined.
2327        if "copy" not in dir(ET.Element):
2328            raise unittest.SkipTest("Element.copy() not present")
2329
2330        element_foo = ET.Element("foo", { "zix": "wyp" })
2331        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2332
2333        with self.assertWarns(DeprecationWarning):
2334            element_foo2 = element_foo.copy()
2335
2336        # elements are not the same
2337        self.assertIsNot(element_foo2, element_foo)
2338
2339        # string attributes are equal
2340        self.assertEqual(element_foo2.tag, element_foo.tag)
2341        self.assertEqual(element_foo2.text, element_foo.text)
2342        self.assertEqual(element_foo2.tail, element_foo.tail)
2343
2344        # number of children is the same
2345        self.assertEqual(len(element_foo2), len(element_foo))
2346
2347        # children are the same
2348        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2349            self.assertIs(child1, child2)
2350
2351        # attrib is a copy
2352        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2353
2354    def test___copy__(self):
2355        element_foo = ET.Element("foo", { "zix": "wyp" })
2356        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2357
2358        element_foo2 = copy.copy(element_foo)
2359
2360        # elements are not the same
2361        self.assertIsNot(element_foo2, element_foo)
2362
2363        # string attributes are equal
2364        self.assertEqual(element_foo2.tag, element_foo.tag)
2365        self.assertEqual(element_foo2.text, element_foo.text)
2366        self.assertEqual(element_foo2.tail, element_foo.tail)
2367
2368        # number of children is the same
2369        self.assertEqual(len(element_foo2), len(element_foo))
2370
2371        # children are the same
2372        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2373            self.assertIs(child1, child2)
2374
2375        # attrib is a copy
2376        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2377
2378    def test___deepcopy__(self):
2379        element_foo = ET.Element("foo", { "zix": "wyp" })
2380        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2381
2382        element_foo2 = copy.deepcopy(element_foo)
2383
2384        # elements are not the same
2385        self.assertIsNot(element_foo2, element_foo)
2386
2387        # string attributes are equal
2388        self.assertEqual(element_foo2.tag, element_foo.tag)
2389        self.assertEqual(element_foo2.text, element_foo.text)
2390        self.assertEqual(element_foo2.tail, element_foo.tail)
2391
2392        # number of children is the same
2393        self.assertEqual(len(element_foo2), len(element_foo))
2394
2395        # children are not the same
2396        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2397            self.assertIsNot(child1, child2)
2398
2399        # attrib is a copy
2400        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2401        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2402
2403        # attrib isn't linked
2404        element_foo.attrib["bar"] = "baz"
2405        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2406        self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
2407
2408    def test_augmentation_type_errors(self):
2409        e = ET.Element('joe')
2410        self.assertRaises(TypeError, e.append, 'b')
2411        self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
2412        self.assertRaises(TypeError, e.insert, 0, 'foo')
2413        e[:] = [ET.Element('bar')]
2414        with self.assertRaises(TypeError):
2415            e[0] = 'foo'
2416        with self.assertRaises(TypeError):
2417            e[:] = [ET.Element('bar'), 'foo']
2418
2419        if hasattr(e, '__setstate__'):
2420            state = {
2421                'tag': 'tag',
2422                '_children': [None],  # non-Element
2423                'attrib': 'attr',
2424                'tail': 'tail',
2425                'text': 'text',
2426            }
2427            self.assertRaises(TypeError, e.__setstate__, state)
2428
2429        if hasattr(e, '__deepcopy__'):
2430            class E(ET.Element):
2431                def __deepcopy__(self, memo):
2432                    return None  # non-Element
2433            e[:] = [E('bar')]
2434            self.assertRaises(TypeError, copy.deepcopy, e)
2435
2436    def test_cyclic_gc(self):
2437        class Dummy:
2438            pass
2439
2440        # Test the shortest cycle: d->element->d
2441        d = Dummy()
2442        d.dummyref = ET.Element('joe', attr=d)
2443        wref = weakref.ref(d)
2444        del d
2445        gc_collect()
2446        self.assertIsNone(wref())
2447
2448        # A longer cycle: d->e->e2->d
2449        e = ET.Element('joe')
2450        d = Dummy()
2451        d.dummyref = e
2452        wref = weakref.ref(d)
2453        e2 = ET.SubElement(e, 'foo', attr=d)
2454        del d, e, e2
2455        gc_collect()
2456        self.assertIsNone(wref())
2457
2458        # A cycle between Element objects as children of one another
2459        # e1->e2->e3->e1
2460        e1 = ET.Element('e1')
2461        e2 = ET.Element('e2')
2462        e3 = ET.Element('e3')
2463        e3.append(e1)
2464        e2.append(e3)
2465        e1.append(e2)
2466        wref = weakref.ref(e1)
2467        del e1, e2, e3
2468        gc_collect()
2469        self.assertIsNone(wref())
2470
2471    def test_weakref(self):
2472        flag = False
2473        def wref_cb(w):
2474            nonlocal flag
2475            flag = True
2476        e = ET.Element('e')
2477        wref = weakref.ref(e, wref_cb)
2478        self.assertEqual(wref().tag, 'e')
2479        del e
2480        gc_collect()  # For PyPy or other GCs.
2481        self.assertEqual(flag, True)
2482        self.assertEqual(wref(), None)
2483
2484    def test_get_keyword_args(self):
2485        e1 = ET.Element('foo' , x=1, y=2, z=3)
2486        self.assertEqual(e1.get('x', default=7), 1)
2487        self.assertEqual(e1.get('w', default=7), 7)
2488
2489    def test_pickle(self):
2490        # issue #16076: the C implementation wasn't pickleable.
2491        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2492            for dumper, loader in product(self.modules, repeat=2):
2493                e = dumper.Element('foo', bar=42)
2494                e.text = "text goes here"
2495                e.tail = "opposite of head"
2496                dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
2497                e.append(dumper.Element('child'))
2498                e.findall('.//grandchild')[0].set('attr', 'other value')
2499
2500                e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
2501                                          dumper, loader, proto)
2502
2503                self.assertEqual(e2.tag, 'foo')
2504                self.assertEqual(e2.attrib['bar'], 42)
2505                self.assertEqual(len(e2), 2)
2506                self.assertEqualElements(e, e2)
2507
2508    def test_pickle_issue18997(self):
2509        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2510            for dumper, loader in product(self.modules, repeat=2):
2511                XMLTEXT = """<?xml version="1.0"?>
2512                    <group><dogs>4</dogs>
2513                    </group>"""
2514                e1 = dumper.fromstring(XMLTEXT)
2515                if hasattr(e1, '__getstate__'):
2516                    self.assertEqual(e1.__getstate__()['tag'], 'group')
2517                e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
2518                                          dumper, loader, proto)
2519                self.assertEqual(e2.tag, 'group')
2520                self.assertEqual(e2[0].tag, 'dogs')
2521
2522
2523class BadElementTest(ElementTestCase, unittest.TestCase):
2524    def test_extend_mutable_list(self):
2525        class X:
2526            @property
2527            def __class__(self):
2528                L[:] = [ET.Element('baz')]
2529                return ET.Element
2530        L = [X()]
2531        e = ET.Element('foo')
2532        try:
2533            e.extend(L)
2534        except TypeError:
2535            pass
2536
2537        class Y(X, ET.Element):
2538            pass
2539        L = [Y('x')]
2540        e = ET.Element('foo')
2541        e.extend(L)
2542
2543    def test_extend_mutable_list2(self):
2544        class X:
2545            @property
2546            def __class__(self):
2547                del L[:]
2548                return ET.Element
2549        L = [X(), ET.Element('baz')]
2550        e = ET.Element('foo')
2551        try:
2552            e.extend(L)
2553        except TypeError:
2554            pass
2555
2556        class Y(X, ET.Element):
2557            pass
2558        L = [Y('bar'), ET.Element('baz')]
2559        e = ET.Element('foo')
2560        e.extend(L)
2561
2562    def test_remove_with_mutating(self):
2563        class X(ET.Element):
2564            def __eq__(self, o):
2565                del e[:]
2566                return False
2567        e = ET.Element('foo')
2568        e.extend([X('bar')])
2569        self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2570
2571        e = ET.Element('foo')
2572        e.extend([ET.Element('bar')])
2573        self.assertRaises(ValueError, e.remove, X('baz'))
2574
2575    def test_recursive_repr(self):
2576        # Issue #25455
2577        e = ET.Element('foo')
2578        with swap_attr(e, 'tag', e):
2579            with self.assertRaises(RuntimeError):
2580                repr(e)  # Should not crash
2581
2582    def test_element_get_text(self):
2583        # Issue #27863
2584        class X(str):
2585            def __del__(self):
2586                try:
2587                    elem.text
2588                except NameError:
2589                    pass
2590
2591        b = ET.TreeBuilder()
2592        b.start('tag', {})
2593        b.data('ABCD')
2594        b.data(X('EFGH'))
2595        b.data('IJKL')
2596        b.end('tag')
2597
2598        elem = b.close()
2599        self.assertEqual(elem.text, 'ABCDEFGHIJKL')
2600
2601    def test_element_get_tail(self):
2602        # Issue #27863
2603        class X(str):
2604            def __del__(self):
2605                try:
2606                    elem[0].tail
2607                except NameError:
2608                    pass
2609
2610        b = ET.TreeBuilder()
2611        b.start('root', {})
2612        b.start('tag', {})
2613        b.end('tag')
2614        b.data('ABCD')
2615        b.data(X('EFGH'))
2616        b.data('IJKL')
2617        b.end('root')
2618
2619        elem = b.close()
2620        self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
2621
2622    def test_subscr(self):
2623        # Issue #27863
2624        class X:
2625            def __index__(self):
2626                del e[:]
2627                return 1
2628
2629        e = ET.Element('elem')
2630        e.append(ET.Element('child'))
2631        e[:X()]  # shouldn't crash
2632
2633        e.append(ET.Element('child'))
2634        e[0:10:X()]  # shouldn't crash
2635
2636    def test_ass_subscr(self):
2637        # Issue #27863
2638        class X:
2639            def __index__(self):
2640                e[:] = []
2641                return 1
2642
2643        e = ET.Element('elem')
2644        for _ in range(10):
2645            e.insert(0, ET.Element('child'))
2646
2647        e[0:10:X()] = []  # shouldn't crash
2648
2649    def test_treebuilder_start(self):
2650        # Issue #27863
2651        def element_factory(x, y):
2652            return []
2653        b = ET.TreeBuilder(element_factory=element_factory)
2654
2655        b.start('tag', {})
2656        b.data('ABCD')
2657        self.assertRaises(AttributeError, b.start, 'tag2', {})
2658        del b
2659        gc_collect()
2660
2661    def test_treebuilder_end(self):
2662        # Issue #27863
2663        def element_factory(x, y):
2664            return []
2665        b = ET.TreeBuilder(element_factory=element_factory)
2666
2667        b.start('tag', {})
2668        b.data('ABCD')
2669        self.assertRaises(AttributeError, b.end, 'tag')
2670        del b
2671        gc_collect()
2672
2673
2674class MutatingElementPath(str):
2675    def __new__(cls, elem, *args):
2676        self = str.__new__(cls, *args)
2677        self.elem = elem
2678        return self
2679    def __eq__(self, o):
2680        del self.elem[:]
2681        return True
2682MutatingElementPath.__hash__ = str.__hash__
2683
2684class BadElementPath(str):
2685    def __eq__(self, o):
2686        raise 1/0
2687BadElementPath.__hash__ = str.__hash__
2688
2689class BadElementPathTest(ElementTestCase, unittest.TestCase):
2690    def setUp(self):
2691        super().setUp()
2692        from xml.etree import ElementPath
2693        self.path_cache = ElementPath._cache
2694        ElementPath._cache = {}
2695
2696    def tearDown(self):
2697        from xml.etree import ElementPath
2698        ElementPath._cache = self.path_cache
2699        super().tearDown()
2700
2701    def test_find_with_mutating(self):
2702        e = ET.Element('foo')
2703        e.extend([ET.Element('bar')])
2704        e.find(MutatingElementPath(e, 'x'))
2705
2706    def test_find_with_error(self):
2707        e = ET.Element('foo')
2708        e.extend([ET.Element('bar')])
2709        try:
2710            e.find(BadElementPath('x'))
2711        except ZeroDivisionError:
2712            pass
2713
2714    def test_findtext_with_mutating(self):
2715        e = ET.Element('foo')
2716        e.extend([ET.Element('bar')])
2717        e.findtext(MutatingElementPath(e, 'x'))
2718
2719    def test_findtext_with_error(self):
2720        e = ET.Element('foo')
2721        e.extend([ET.Element('bar')])
2722        try:
2723            e.findtext(BadElementPath('x'))
2724        except ZeroDivisionError:
2725            pass
2726
2727    def test_findall_with_mutating(self):
2728        e = ET.Element('foo')
2729        e.extend([ET.Element('bar')])
2730        e.findall(MutatingElementPath(e, 'x'))
2731
2732    def test_findall_with_error(self):
2733        e = ET.Element('foo')
2734        e.extend([ET.Element('bar')])
2735        try:
2736            e.findall(BadElementPath('x'))
2737        except ZeroDivisionError:
2738            pass
2739
2740
2741class ElementTreeTypeTest(unittest.TestCase):
2742    def test_istype(self):
2743        self.assertIsInstance(ET.ParseError, type)
2744        self.assertIsInstance(ET.QName, type)
2745        self.assertIsInstance(ET.ElementTree, type)
2746        self.assertIsInstance(ET.Element, type)
2747        self.assertIsInstance(ET.TreeBuilder, type)
2748        self.assertIsInstance(ET.XMLParser, type)
2749
2750    def test_Element_subclass_trivial(self):
2751        class MyElement(ET.Element):
2752            pass
2753
2754        mye = MyElement('foo')
2755        self.assertIsInstance(mye, ET.Element)
2756        self.assertIsInstance(mye, MyElement)
2757        self.assertEqual(mye.tag, 'foo')
2758
2759        # test that attribute assignment works (issue 14849)
2760        mye.text = "joe"
2761        self.assertEqual(mye.text, "joe")
2762
2763    def test_Element_subclass_constructor(self):
2764        class MyElement(ET.Element):
2765            def __init__(self, tag, attrib={}, **extra):
2766                super(MyElement, self).__init__(tag + '__', attrib, **extra)
2767
2768        mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2769        self.assertEqual(mye.tag, 'foo__')
2770        self.assertEqual(sorted(mye.items()),
2771            [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2772
2773    def test_Element_subclass_new_method(self):
2774        class MyElement(ET.Element):
2775            def newmethod(self):
2776                return self.tag
2777
2778        mye = MyElement('joe')
2779        self.assertEqual(mye.newmethod(), 'joe')
2780
2781    def test_Element_subclass_find(self):
2782        class MyElement(ET.Element):
2783            pass
2784
2785        e = ET.Element('foo')
2786        e.text = 'text'
2787        sub = MyElement('bar')
2788        sub.text = 'subtext'
2789        e.append(sub)
2790        self.assertEqual(e.findtext('bar'), 'subtext')
2791        self.assertEqual(e.find('bar').tag, 'bar')
2792        found = list(e.findall('bar'))
2793        self.assertEqual(len(found), 1, found)
2794        self.assertEqual(found[0].tag, 'bar')
2795
2796
2797class ElementFindTest(unittest.TestCase):
2798    def test_find_simple(self):
2799        e = ET.XML(SAMPLE_XML)
2800        self.assertEqual(e.find('tag').tag, 'tag')
2801        self.assertEqual(e.find('section/tag').tag, 'tag')
2802        self.assertEqual(e.find('./tag').tag, 'tag')
2803
2804        e[2] = ET.XML(SAMPLE_SECTION)
2805        self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2806
2807        self.assertEqual(e.findtext('./tag'), 'text')
2808        self.assertEqual(e.findtext('section/tag'), 'subtext')
2809
2810        # section/nexttag is found but has no text
2811        self.assertEqual(e.findtext('section/nexttag'), '')
2812        self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2813
2814        # tog doesn't exist and 'default' kicks in
2815        self.assertIsNone(e.findtext('tog'))
2816        self.assertEqual(e.findtext('tog', 'default'), 'default')
2817
2818        # Issue #16922
2819        self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2820
2821    def test_find_xpath(self):
2822        LINEAR_XML = '''
2823        <body>
2824            <tag class='a'/>
2825            <tag class='b'/>
2826            <tag class='c'/>
2827            <tag class='d'/>
2828        </body>'''
2829        e = ET.XML(LINEAR_XML)
2830
2831        # Test for numeric indexing and last()
2832        self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2833        self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2834        self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2835        self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2836        self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2837
2838        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2839        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2840        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2841        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2842
2843    def test_findall(self):
2844        e = ET.XML(SAMPLE_XML)
2845        e[2] = ET.XML(SAMPLE_SECTION)
2846        self.assertEqual(summarize_list(e.findall('.')), ['body'])
2847        self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2848        self.assertEqual(summarize_list(e.findall('tog')), [])
2849        self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2850        self.assertEqual(summarize_list(e.findall('*')),
2851            ['tag', 'tag', 'section'])
2852        self.assertEqual(summarize_list(e.findall('.//tag')),
2853            ['tag'] * 4)
2854        self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2855        self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2856        self.assertEqual(summarize_list(e.findall('section/*')),
2857            ['tag', 'nexttag', 'nextsection'])
2858        self.assertEqual(summarize_list(e.findall('section//*')),
2859            ['tag', 'nexttag', 'nextsection', 'tag'])
2860        self.assertEqual(summarize_list(e.findall('section/.//*')),
2861            ['tag', 'nexttag', 'nextsection', 'tag'])
2862        self.assertEqual(summarize_list(e.findall('*/*')),
2863            ['tag', 'nexttag', 'nextsection'])
2864        self.assertEqual(summarize_list(e.findall('*//*')),
2865            ['tag', 'nexttag', 'nextsection', 'tag'])
2866        self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
2867        self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
2868        self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
2869        self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
2870
2871        self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
2872            ['tag'] * 3)
2873        self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
2874            ['tag'])
2875        self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')),
2876            ['tag'] * 2)
2877        self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
2878            ['tag'] * 2)
2879        self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')),
2880            ['tag'])
2881        self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
2882            ['tag'])
2883        self.assertEqual(summarize_list(e.findall('.//section[tag]')),
2884            ['section'])
2885        self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
2886        self.assertEqual(summarize_list(e.findall('../tag')), [])
2887        self.assertEqual(summarize_list(e.findall('section/../tag')),
2888            ['tag'] * 2)
2889        self.assertEqual(e.findall('section//'), e.findall('section//*'))
2890
2891        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
2892            ['section'])
2893        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
2894            ['section'])
2895        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
2896            ['section'])
2897        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2898            ['section'])
2899        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
2900            ['section'])
2901
2902        # Negations of above tests. They match nothing because the sole section
2903        # tag has subtext.
2904        self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")),
2905            [])
2906        self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")),
2907            [])
2908        self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")),
2909            [])
2910        self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")),
2911            [])
2912        self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")),
2913            [])
2914
2915        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
2916                         ['tag'])
2917        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
2918                         ['tag'])
2919        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
2920                         ['tag'])
2921        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
2922                         ['tag'])
2923        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2924                         ['tag'])
2925        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
2926                         [])
2927        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
2928                         [])
2929
2930        # Negations of above tests.
2931        #   Matches everything but the tag containing subtext
2932        self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")),
2933                         ['tag'] * 3)
2934        self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")),
2935                         ['tag'] * 3)
2936        self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')),
2937                         ['tag'] * 3)
2938        self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')),
2939                         ['tag'] * 3)
2940        self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")),
2941                         ['tag'] * 3)
2942        # Matches all tags.
2943        self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")),
2944                         ['tag'] * 4)
2945        self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")),
2946                         ['tag'] * 4)
2947
2948        # duplicate section => 2x tag matches
2949        e[1] = e[2]
2950        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
2951                         ['section', 'section'])
2952        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
2953                         ['tag', 'tag'])
2954
2955    def test_test_find_with_ns(self):
2956        e = ET.XML(SAMPLE_XML_NS)
2957        self.assertEqual(summarize_list(e.findall('tag')), [])
2958        self.assertEqual(
2959            summarize_list(e.findall("{http://effbot.org/ns}tag")),
2960            ['{http://effbot.org/ns}tag'] * 2)
2961        self.assertEqual(
2962            summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
2963            ['{http://effbot.org/ns}tag'] * 3)
2964
2965    def test_findall_different_nsmaps(self):
2966        root = ET.XML('''
2967            <a xmlns:x="X" xmlns:y="Y">
2968                <x:b><c/></x:b>
2969                <b/>
2970                <c><x:b/><b/></c><y:b/>
2971            </a>''')
2972        nsmap = {'xx': 'X'}
2973        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2974        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2975        nsmap = {'xx': 'Y'}
2976        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
2977        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
2978        nsmap = {'xx': 'X', '': 'Y'}
2979        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
2980        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
2981
2982    def test_findall_wildcard(self):
2983        root = ET.XML('''
2984            <a xmlns:x="X" xmlns:y="Y">
2985                <x:b><c/></x:b>
2986                <b/>
2987                <c><x:b/><b/></c><y:b/>
2988            </a>''')
2989        root.append(ET.Comment('test'))
2990
2991        self.assertEqual(summarize_list(root.findall("{*}b")),
2992                         ['{X}b', 'b', '{Y}b'])
2993        self.assertEqual(summarize_list(root.findall("{*}c")),
2994                         ['c'])
2995        self.assertEqual(summarize_list(root.findall("{X}*")),
2996                         ['{X}b'])
2997        self.assertEqual(summarize_list(root.findall("{Y}*")),
2998                         ['{Y}b'])
2999        self.assertEqual(summarize_list(root.findall("{}*")),
3000                         ['b', 'c'])
3001        self.assertEqual(summarize_list(root.findall("{}b")),  # only for consistency
3002                         ['b'])
3003        self.assertEqual(summarize_list(root.findall("{}b")),
3004                         summarize_list(root.findall("b")))
3005        self.assertEqual(summarize_list(root.findall("{*}*")),
3006                         ['{X}b', 'b', 'c', '{Y}b'])
3007        # This is an unfortunate difference, but that's how find('*') works.
3008        self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
3009                         summarize_list(root.findall("*")))
3010
3011        self.assertEqual(summarize_list(root.findall(".//{*}b")),
3012                         ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
3013        self.assertEqual(summarize_list(root.findall(".//{*}c")),
3014                         ['c', 'c'])
3015        self.assertEqual(summarize_list(root.findall(".//{X}*")),
3016                         ['{X}b', '{X}b'])
3017        self.assertEqual(summarize_list(root.findall(".//{Y}*")),
3018                         ['{Y}b'])
3019        self.assertEqual(summarize_list(root.findall(".//{}*")),
3020                         ['c', 'b', 'c', 'b'])
3021        self.assertEqual(summarize_list(root.findall(".//{}b")),  # only for consistency
3022                         ['b', 'b'])
3023        self.assertEqual(summarize_list(root.findall(".//{}b")),
3024                         summarize_list(root.findall(".//b")))
3025
3026    def test_bad_find(self):
3027        e = ET.XML(SAMPLE_XML)
3028        with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
3029            e.findall('/tag')
3030
3031    def test_find_through_ElementTree(self):
3032        e = ET.XML(SAMPLE_XML)
3033        self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
3034        self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
3035        self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
3036            ['tag'] * 2)
3037        # this produces a warning
3038        msg = ("This search is broken in 1.3 and earlier, and will be fixed "
3039               "in a future version.  If you rely on the current behaviour, "
3040               "change it to '.+'")
3041        with self.assertWarnsRegex(FutureWarning, msg):
3042            it = ET.ElementTree(e).findall('//tag')
3043        self.assertEqual(summarize_list(it), ['tag'] * 3)
3044
3045
3046class ElementIterTest(unittest.TestCase):
3047    def _ilist(self, elem, tag=None):
3048        return summarize_list(elem.iter(tag))
3049
3050    def test_basic(self):
3051        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
3052        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
3053        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
3054        self.assertEqual(next(doc.iter()).tag, 'html')
3055        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
3056        self.assertEqual(''.join(doc.find('body').itertext()),
3057            'this is a paragraph.')
3058        self.assertEqual(next(doc.itertext()), 'this is a ')
3059
3060        # iterparse should return an iterator
3061        sourcefile = serialize(doc, to_string=False)
3062        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
3063
3064        # With an explicit parser too (issue #9708)
3065        sourcefile = serialize(doc, to_string=False)
3066        parser = ET.XMLParser(target=ET.TreeBuilder())
3067        self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
3068                         'end')
3069
3070        tree = ET.ElementTree(None)
3071        self.assertRaises(AttributeError, tree.iter)
3072
3073        # Issue #16913
3074        doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
3075        self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
3076
3077    def test_corners(self):
3078        # single root, no subelements
3079        a = ET.Element('a')
3080        self.assertEqual(self._ilist(a), ['a'])
3081
3082        # one child
3083        b = ET.SubElement(a, 'b')
3084        self.assertEqual(self._ilist(a), ['a', 'b'])
3085
3086        # one child and one grandchild
3087        c = ET.SubElement(b, 'c')
3088        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
3089
3090        # two children, only first with grandchild
3091        d = ET.SubElement(a, 'd')
3092        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
3093
3094        # replace first child by second
3095        a[0] = a[1]
3096        del a[1]
3097        self.assertEqual(self._ilist(a), ['a', 'd'])
3098
3099    def test_iter_by_tag(self):
3100        doc = ET.XML('''
3101            <document>
3102                <house>
3103                    <room>bedroom1</room>
3104                    <room>bedroom2</room>
3105                </house>
3106                <shed>nothing here
3107                </shed>
3108                <house>
3109                    <room>bedroom8</room>
3110                </house>
3111            </document>''')
3112
3113        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
3114        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
3115
3116        # test that iter also accepts 'tag' as a keyword arg
3117        self.assertEqual(
3118            summarize_list(doc.iter(tag='room')),
3119            ['room'] * 3)
3120
3121        # make sure both tag=None and tag='*' return all tags
3122        all_tags = ['document', 'house', 'room', 'room',
3123                    'shed', 'house', 'room']
3124        self.assertEqual(summarize_list(doc.iter()), all_tags)
3125        self.assertEqual(self._ilist(doc), all_tags)
3126        self.assertEqual(self._ilist(doc, '*'), all_tags)
3127
3128    def test_copy(self):
3129        a = ET.Element('a')
3130        it = a.iter()
3131        with self.assertRaises(TypeError):
3132            copy.copy(it)
3133
3134    def test_pickle(self):
3135        a = ET.Element('a')
3136        it = a.iter()
3137        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3138            with self.assertRaises((TypeError, pickle.PicklingError)):
3139                pickle.dumps(it, proto)
3140
3141
3142class TreeBuilderTest(unittest.TestCase):
3143    sample1 = ('<!DOCTYPE html PUBLIC'
3144        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3145        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3146        '<html>text<div>subtext</div>tail</html>')
3147
3148    sample2 = '''<toplevel>sometext</toplevel>'''
3149
3150    def _check_sample1_element(self, e):
3151        self.assertEqual(e.tag, 'html')
3152        self.assertEqual(e.text, 'text')
3153        self.assertEqual(e.tail, None)
3154        self.assertEqual(e.attrib, {})
3155        children = list(e)
3156        self.assertEqual(len(children), 1)
3157        child = children[0]
3158        self.assertEqual(child.tag, 'div')
3159        self.assertEqual(child.text, 'subtext')
3160        self.assertEqual(child.tail, 'tail')
3161        self.assertEqual(child.attrib, {})
3162
3163    def test_dummy_builder(self):
3164        class BaseDummyBuilder:
3165            def close(self):
3166                return 42
3167
3168        class DummyBuilder(BaseDummyBuilder):
3169            data = start = end = lambda *a: None
3170
3171        parser = ET.XMLParser(target=DummyBuilder())
3172        parser.feed(self.sample1)
3173        self.assertEqual(parser.close(), 42)
3174
3175        parser = ET.XMLParser(target=BaseDummyBuilder())
3176        parser.feed(self.sample1)
3177        self.assertEqual(parser.close(), 42)
3178
3179        parser = ET.XMLParser(target=object())
3180        parser.feed(self.sample1)
3181        self.assertIsNone(parser.close())
3182
3183    def test_treebuilder_comment(self):
3184        b = ET.TreeBuilder()
3185        self.assertEqual(b.comment('ctext').tag, ET.Comment)
3186        self.assertEqual(b.comment('ctext').text, 'ctext')
3187
3188        b = ET.TreeBuilder(comment_factory=ET.Comment)
3189        self.assertEqual(b.comment('ctext').tag, ET.Comment)
3190        self.assertEqual(b.comment('ctext').text, 'ctext')
3191
3192        b = ET.TreeBuilder(comment_factory=len)
3193        self.assertEqual(b.comment('ctext'), len('ctext'))
3194
3195    def test_treebuilder_pi(self):
3196        b = ET.TreeBuilder()
3197        self.assertEqual(b.pi('target', None).tag, ET.PI)
3198        self.assertEqual(b.pi('target', None).text, 'target')
3199
3200        b = ET.TreeBuilder(pi_factory=ET.PI)
3201        self.assertEqual(b.pi('target').tag, ET.PI)
3202        self.assertEqual(b.pi('target').text, "target")
3203        self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
3204        self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget  text ")
3205
3206        b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
3207        self.assertEqual(b.pi('target'), (len('target'), None))
3208        self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
3209
3210    def test_late_tail(self):
3211        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3212        class TreeBuilderSubclass(ET.TreeBuilder):
3213            pass
3214
3215        xml = "<a>text<!-- comment -->tail</a>"
3216        a = ET.fromstring(xml)
3217        self.assertEqual(a.text, "texttail")
3218
3219        parser = ET.XMLParser(target=TreeBuilderSubclass())
3220        parser.feed(xml)
3221        a = parser.close()
3222        self.assertEqual(a.text, "texttail")
3223
3224        xml = "<a>text<?pi data?>tail</a>"
3225        a = ET.fromstring(xml)
3226        self.assertEqual(a.text, "texttail")
3227
3228        xml = "<a>text<?pi data?>tail</a>"
3229        parser = ET.XMLParser(target=TreeBuilderSubclass())
3230        parser.feed(xml)
3231        a = parser.close()
3232        self.assertEqual(a.text, "texttail")
3233
3234    def test_late_tail_mix_pi_comments(self):
3235        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3236        # Test appending tails to comments/pis.
3237        class TreeBuilderSubclass(ET.TreeBuilder):
3238            pass
3239
3240        xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
3241        parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
3242        parser.feed(xml)
3243        a = parser.close()
3244        self.assertEqual(a[0].text, ' comment ')
3245        self.assertEqual(a[0].tail, '\ntail')
3246        self.assertEqual(a.text, "text ")
3247
3248        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
3249        parser.feed(xml)
3250        a = parser.close()
3251        self.assertEqual(a[0].text, ' comment ')
3252        self.assertEqual(a[0].tail, '\ntail')
3253        self.assertEqual(a.text, "text ")
3254
3255        xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3256        parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3257        parser.feed(xml)
3258        a = parser.close()
3259        self.assertEqual(a[0].text, 'pi data')
3260        self.assertEqual(a[0].tail, 'tail')
3261        self.assertEqual(a.text, "text\n")
3262
3263        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3264        parser.feed(xml)
3265        a = parser.close()
3266        self.assertEqual(a[0].text, 'pi data')
3267        self.assertEqual(a[0].tail, 'tail')
3268        self.assertEqual(a.text, "text\n")
3269
3270    def test_treebuilder_elementfactory_none(self):
3271        parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
3272        parser.feed(self.sample1)
3273        e = parser.close()
3274        self._check_sample1_element(e)
3275
3276    def test_subclass(self):
3277        class MyTreeBuilder(ET.TreeBuilder):
3278            def foobar(self, x):
3279                return x * 2
3280
3281        tb = MyTreeBuilder()
3282        self.assertEqual(tb.foobar(10), 20)
3283
3284        parser = ET.XMLParser(target=tb)
3285        parser.feed(self.sample1)
3286
3287        e = parser.close()
3288        self._check_sample1_element(e)
3289
3290    def test_subclass_comment_pi(self):
3291        class MyTreeBuilder(ET.TreeBuilder):
3292            def foobar(self, x):
3293                return x * 2
3294
3295        tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
3296        self.assertEqual(tb.foobar(10), 20)
3297
3298        parser = ET.XMLParser(target=tb)
3299        parser.feed(self.sample1)
3300        parser.feed('<!-- a comment--><?and a pi?>')
3301
3302        e = parser.close()
3303        self._check_sample1_element(e)
3304
3305    def test_element_factory(self):
3306        lst = []
3307        def myfactory(tag, attrib):
3308            nonlocal lst
3309            lst.append(tag)
3310            return ET.Element(tag, attrib)
3311
3312        tb = ET.TreeBuilder(element_factory=myfactory)
3313        parser = ET.XMLParser(target=tb)
3314        parser.feed(self.sample2)
3315        parser.close()
3316
3317        self.assertEqual(lst, ['toplevel'])
3318
3319    def _check_element_factory_class(self, cls):
3320        tb = ET.TreeBuilder(element_factory=cls)
3321
3322        parser = ET.XMLParser(target=tb)
3323        parser.feed(self.sample1)
3324        e = parser.close()
3325        self.assertIsInstance(e, cls)
3326        self._check_sample1_element(e)
3327
3328    def test_element_factory_subclass(self):
3329        class MyElement(ET.Element):
3330            pass
3331        self._check_element_factory_class(MyElement)
3332
3333    def test_element_factory_pure_python_subclass(self):
3334        # Mimic SimpleTAL's behaviour (issue #16089): both versions of
3335        # TreeBuilder should be able to cope with a subclass of the
3336        # pure Python Element class.
3337        base = ET._Element_Py
3338        # Not from a C extension
3339        self.assertEqual(base.__module__, 'xml.etree.ElementTree')
3340        # Force some multiple inheritance with a C class to make things
3341        # more interesting.
3342        class MyElement(base, ValueError):
3343            pass
3344        self._check_element_factory_class(MyElement)
3345
3346    def test_doctype(self):
3347        class DoctypeParser:
3348            _doctype = None
3349
3350            def doctype(self, name, pubid, system):
3351                self._doctype = (name, pubid, system)
3352
3353            def close(self):
3354                return self._doctype
3355
3356        parser = ET.XMLParser(target=DoctypeParser())
3357        parser.feed(self.sample1)
3358
3359        self.assertEqual(parser.close(),
3360            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3361             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3362
3363    def test_builder_lookup_errors(self):
3364        class RaisingBuilder:
3365            def __init__(self, raise_in=None, what=ValueError):
3366                self.raise_in = raise_in
3367                self.what = what
3368
3369            def __getattr__(self, name):
3370                if name == self.raise_in:
3371                    raise self.what(self.raise_in)
3372                def handle(*args):
3373                    pass
3374                return handle
3375
3376        ET.XMLParser(target=RaisingBuilder())
3377        # cET also checks for 'close' and 'doctype', PyET does it only at need
3378        for event in ('start', 'data', 'end', 'comment', 'pi'):
3379            with self.assertRaisesRegex(ValueError, event):
3380                ET.XMLParser(target=RaisingBuilder(event))
3381
3382        ET.XMLParser(target=RaisingBuilder(what=AttributeError))
3383        for event in ('start', 'data', 'end', 'comment', 'pi'):
3384            parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
3385            parser.feed(self.sample1)
3386            self.assertIsNone(parser.close())
3387
3388
3389class XMLParserTest(unittest.TestCase):
3390    sample1 = b'<file><line>22</line></file>'
3391    sample2 = (b'<!DOCTYPE html PUBLIC'
3392        b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3393        b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3394        b'<html>text</html>')
3395    sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
3396        '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
3397
3398    def _check_sample_element(self, e):
3399        self.assertEqual(e.tag, 'file')
3400        self.assertEqual(e[0].tag, 'line')
3401        self.assertEqual(e[0].text, '22')
3402
3403    def test_constructor_args(self):
3404        parser2 = ET.XMLParser(encoding='utf-8',
3405                               target=ET.TreeBuilder())
3406        parser2.feed(self.sample1)
3407        self._check_sample_element(parser2.close())
3408
3409    def test_subclass(self):
3410        class MyParser(ET.XMLParser):
3411            pass
3412        parser = MyParser()
3413        parser.feed(self.sample1)
3414        self._check_sample_element(parser.close())
3415
3416    def test_doctype_warning(self):
3417        with warnings.catch_warnings():
3418            warnings.simplefilter('error', DeprecationWarning)
3419            parser = ET.XMLParser()
3420            parser.feed(self.sample2)
3421            parser.close()
3422
3423    def test_subclass_doctype(self):
3424        _doctype = None
3425        class MyParserWithDoctype(ET.XMLParser):
3426            def doctype(self, *args, **kwargs):
3427                nonlocal _doctype
3428                _doctype = (args, kwargs)
3429
3430        parser = MyParserWithDoctype()
3431        with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
3432            parser.feed(self.sample2)
3433        parser.close()
3434        self.assertIsNone(_doctype)
3435
3436        _doctype = _doctype2 = None
3437        with warnings.catch_warnings():
3438            warnings.simplefilter('error', DeprecationWarning)
3439            warnings.simplefilter('error', RuntimeWarning)
3440            class DoctypeParser:
3441                def doctype(self, name, pubid, system):
3442                    nonlocal _doctype2
3443                    _doctype2 = (name, pubid, system)
3444
3445            parser = MyParserWithDoctype(target=DoctypeParser())
3446            parser.feed(self.sample2)
3447            parser.close()
3448            self.assertIsNone(_doctype)
3449            self.assertEqual(_doctype2,
3450                ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3451                 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3452
3453    def test_inherited_doctype(self):
3454        '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
3455        with warnings.catch_warnings():
3456            warnings.simplefilter('error', DeprecationWarning)
3457            warnings.simplefilter('error', RuntimeWarning)
3458            class MyParserWithoutDoctype(ET.XMLParser):
3459                pass
3460            parser = MyParserWithoutDoctype()
3461            parser.feed(self.sample2)
3462            parser.close()
3463
3464    def test_parse_string(self):
3465        parser = ET.XMLParser(target=ET.TreeBuilder())
3466        parser.feed(self.sample3)
3467        e = parser.close()
3468        self.assertEqual(e.tag, 'money')
3469        self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
3470        self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
3471
3472
3473class NamespaceParseTest(unittest.TestCase):
3474    def test_find_with_namespace(self):
3475        nsmap = {'h': 'hello', 'f': 'foo'}
3476        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
3477
3478        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
3479        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
3480        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
3481
3482
3483class ElementSlicingTest(unittest.TestCase):
3484    def _elem_tags(self, elemlist):
3485        return [e.tag for e in elemlist]
3486
3487    def _subelem_tags(self, elem):
3488        return self._elem_tags(list(elem))
3489
3490    def _make_elem_with_children(self, numchildren):
3491        """Create an Element with a tag 'a', with the given amount of children
3492           named 'a0', 'a1' ... and so on.
3493
3494        """
3495        e = ET.Element('a')
3496        for i in range(numchildren):
3497            ET.SubElement(e, 'a%s' % i)
3498        return e
3499
3500    def test_getslice_single_index(self):
3501        e = self._make_elem_with_children(10)
3502
3503        self.assertEqual(e[1].tag, 'a1')
3504        self.assertEqual(e[-2].tag, 'a8')
3505
3506        self.assertRaises(IndexError, lambda: e[12])
3507        self.assertRaises(IndexError, lambda: e[-12])
3508
3509    def test_getslice_range(self):
3510        e = self._make_elem_with_children(6)
3511
3512        self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
3513        self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
3514        self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
3515        self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
3516        self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
3517        self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
3518
3519    def test_getslice_steps(self):
3520        e = self._make_elem_with_children(10)
3521
3522        self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
3523        self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
3524        self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
3525        self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
3526        self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
3527        self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
3528
3529    def test_getslice_negative_steps(self):
3530        e = self._make_elem_with_children(4)
3531
3532        self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
3533        self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
3534        self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
3535        self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
3536        self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
3537
3538    def test_delslice(self):
3539        e = self._make_elem_with_children(4)
3540        del e[0:2]
3541        self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
3542
3543        e = self._make_elem_with_children(4)
3544        del e[0:]
3545        self.assertEqual(self._subelem_tags(e), [])
3546
3547        e = self._make_elem_with_children(4)
3548        del e[::-1]
3549        self.assertEqual(self._subelem_tags(e), [])
3550
3551        e = self._make_elem_with_children(4)
3552        del e[::-2]
3553        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3554
3555        e = self._make_elem_with_children(4)
3556        del e[1::2]
3557        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3558
3559        e = self._make_elem_with_children(2)
3560        del e[::2]
3561        self.assertEqual(self._subelem_tags(e), ['a1'])
3562
3563    def test_setslice_single_index(self):
3564        e = self._make_elem_with_children(4)
3565        e[1] = ET.Element('b')
3566        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3567
3568        e[-2] = ET.Element('c')
3569        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3570
3571        with self.assertRaises(IndexError):
3572            e[5] = ET.Element('d')
3573        with self.assertRaises(IndexError):
3574            e[-5] = ET.Element('d')
3575        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3576
3577    def test_setslice_range(self):
3578        e = self._make_elem_with_children(4)
3579        e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
3580        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
3581
3582        e = self._make_elem_with_children(4)
3583        e[1:3] = [ET.Element('b')]
3584        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
3585
3586        e = self._make_elem_with_children(4)
3587        e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
3588        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
3589
3590    def test_setslice_steps(self):
3591        e = self._make_elem_with_children(6)
3592        e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
3593        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
3594
3595        e = self._make_elem_with_children(6)
3596        with self.assertRaises(ValueError):
3597            e[1:5:2] = [ET.Element('b')]
3598        with self.assertRaises(ValueError):
3599            e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
3600        with self.assertRaises(ValueError):
3601            e[1:5:2] = []
3602        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
3603
3604        e = self._make_elem_with_children(4)
3605        e[1::sys.maxsize] = [ET.Element('b')]
3606        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3607        e[1::sys.maxsize<<64] = [ET.Element('c')]
3608        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3609
3610    def test_setslice_negative_steps(self):
3611        e = self._make_elem_with_children(4)
3612        e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
3613        self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
3614
3615        e = self._make_elem_with_children(4)
3616        with self.assertRaises(ValueError):
3617            e[2:0:-1] = [ET.Element('b')]
3618        with self.assertRaises(ValueError):
3619            e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
3620        with self.assertRaises(ValueError):
3621            e[2:0:-1] = []
3622        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
3623
3624        e = self._make_elem_with_children(4)
3625        e[1::-sys.maxsize] = [ET.Element('b')]
3626        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3627        e[1::-sys.maxsize-1] = [ET.Element('c')]
3628        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3629        e[1::-sys.maxsize<<64] = [ET.Element('d')]
3630        self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
3631
3632
3633class IOTest(unittest.TestCase):
3634    def test_encoding(self):
3635        # Test encoding issues.
3636        elem = ET.Element("tag")
3637        elem.text = "abc"
3638        self.assertEqual(serialize(elem), '<tag>abc</tag>')
3639        for enc in ("utf-8", "us-ascii"):
3640            with self.subTest(enc):
3641                self.assertEqual(serialize(elem, encoding=enc),
3642                        b'<tag>abc</tag>')
3643                self.assertEqual(serialize(elem, encoding=enc.upper()),
3644                        b'<tag>abc</tag>')
3645        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3646            with self.subTest(enc):
3647                self.assertEqual(serialize(elem, encoding=enc),
3648                        ("<?xml version='1.0' encoding='%s'?>\n"
3649                         "<tag>abc</tag>" % enc).encode(enc))
3650                upper = enc.upper()
3651                self.assertEqual(serialize(elem, encoding=upper),
3652                        ("<?xml version='1.0' encoding='%s'?>\n"
3653                         "<tag>abc</tag>" % upper).encode(enc))
3654
3655        elem = ET.Element("tag")
3656        elem.text = "<&\"\'>"
3657        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
3658        self.assertEqual(serialize(elem, encoding="utf-8"),
3659                b'<tag>&lt;&amp;"\'&gt;</tag>')
3660        self.assertEqual(serialize(elem, encoding="us-ascii"),
3661                b'<tag>&lt;&amp;"\'&gt;</tag>')
3662        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3663            self.assertEqual(serialize(elem, encoding=enc),
3664                    ("<?xml version='1.0' encoding='%s'?>\n"
3665                     "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
3666
3667        elem = ET.Element("tag")
3668        elem.attrib["key"] = "<&\"\'>"
3669        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
3670        self.assertEqual(serialize(elem, encoding="utf-8"),
3671                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3672        self.assertEqual(serialize(elem, encoding="us-ascii"),
3673                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3674        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3675            self.assertEqual(serialize(elem, encoding=enc),
3676                    ("<?xml version='1.0' encoding='%s'?>\n"
3677                     "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
3678
3679        elem = ET.Element("tag")
3680        elem.text = '\xe5\xf6\xf6<>'
3681        self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
3682        self.assertEqual(serialize(elem, encoding="utf-8"),
3683                b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
3684        self.assertEqual(serialize(elem, encoding="us-ascii"),
3685                b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
3686        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3687            self.assertEqual(serialize(elem, encoding=enc),
3688                    ("<?xml version='1.0' encoding='%s'?>\n"
3689                     "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
3690
3691        elem = ET.Element("tag")
3692        elem.attrib["key"] = '\xe5\xf6\xf6<>'
3693        self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
3694        self.assertEqual(serialize(elem, encoding="utf-8"),
3695                b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
3696        self.assertEqual(serialize(elem, encoding="us-ascii"),
3697                b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
3698        for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
3699            self.assertEqual(serialize(elem, encoding=enc),
3700                    ("<?xml version='1.0' encoding='%s'?>\n"
3701                     "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
3702
3703    def test_write_to_filename(self):
3704        self.addCleanup(os_helper.unlink, TESTFN)
3705        tree = ET.ElementTree(ET.XML('''<site />'''))
3706        tree.write(TESTFN)
3707        with open(TESTFN, 'rb') as f:
3708            self.assertEqual(f.read(), b'''<site />''')
3709
3710    def test_write_to_text_file(self):
3711        self.addCleanup(os_helper.unlink, TESTFN)
3712        tree = ET.ElementTree(ET.XML('''<site />'''))
3713        with open(TESTFN, 'w', encoding='utf-8') as f:
3714            tree.write(f, encoding='unicode')
3715            self.assertFalse(f.closed)
3716        with open(TESTFN, 'rb') as f:
3717            self.assertEqual(f.read(), b'''<site />''')
3718
3719    def test_write_to_binary_file(self):
3720        self.addCleanup(os_helper.unlink, TESTFN)
3721        tree = ET.ElementTree(ET.XML('''<site />'''))
3722        with open(TESTFN, 'wb') as f:
3723            tree.write(f)
3724            self.assertFalse(f.closed)
3725        with open(TESTFN, 'rb') as f:
3726            self.assertEqual(f.read(), b'''<site />''')
3727
3728    def test_write_to_binary_file_with_bom(self):
3729        self.addCleanup(os_helper.unlink, TESTFN)
3730        tree = ET.ElementTree(ET.XML('''<site />'''))
3731        # test BOM writing to buffered file
3732        with open(TESTFN, 'wb') as f:
3733            tree.write(f, encoding='utf-16')
3734            self.assertFalse(f.closed)
3735        with open(TESTFN, 'rb') as f:
3736            self.assertEqual(f.read(),
3737                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3738                    '''<site />'''.encode("utf-16"))
3739        # test BOM writing to non-buffered file
3740        with open(TESTFN, 'wb', buffering=0) as f:
3741            tree.write(f, encoding='utf-16')
3742            self.assertFalse(f.closed)
3743        with open(TESTFN, 'rb') as f:
3744            self.assertEqual(f.read(),
3745                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3746                    '''<site />'''.encode("utf-16"))
3747
3748    def test_read_from_stringio(self):
3749        tree = ET.ElementTree()
3750        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3751        tree.parse(stream)
3752        self.assertEqual(tree.getroot().tag, 'site')
3753
3754    def test_write_to_stringio(self):
3755        tree = ET.ElementTree(ET.XML('''<site />'''))
3756        stream = io.StringIO()
3757        tree.write(stream, encoding='unicode')
3758        self.assertEqual(stream.getvalue(), '''<site />''')
3759
3760    def test_read_from_bytesio(self):
3761        tree = ET.ElementTree()
3762        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3763        tree.parse(raw)
3764        self.assertEqual(tree.getroot().tag, 'site')
3765
3766    def test_write_to_bytesio(self):
3767        tree = ET.ElementTree(ET.XML('''<site />'''))
3768        raw = io.BytesIO()
3769        tree.write(raw)
3770        self.assertEqual(raw.getvalue(), b'''<site />''')
3771
3772    class dummy:
3773        pass
3774
3775    def test_read_from_user_text_reader(self):
3776        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3777        reader = self.dummy()
3778        reader.read = stream.read
3779        tree = ET.ElementTree()
3780        tree.parse(reader)
3781        self.assertEqual(tree.getroot().tag, 'site')
3782
3783    def test_write_to_user_text_writer(self):
3784        tree = ET.ElementTree(ET.XML('''<site />'''))
3785        stream = io.StringIO()
3786        writer = self.dummy()
3787        writer.write = stream.write
3788        tree.write(writer, encoding='unicode')
3789        self.assertEqual(stream.getvalue(), '''<site />''')
3790
3791    def test_read_from_user_binary_reader(self):
3792        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3793        reader = self.dummy()
3794        reader.read = raw.read
3795        tree = ET.ElementTree()
3796        tree.parse(reader)
3797        self.assertEqual(tree.getroot().tag, 'site')
3798        tree = ET.ElementTree()
3799
3800    def test_write_to_user_binary_writer(self):
3801        tree = ET.ElementTree(ET.XML('''<site />'''))
3802        raw = io.BytesIO()
3803        writer = self.dummy()
3804        writer.write = raw.write
3805        tree.write(writer)
3806        self.assertEqual(raw.getvalue(), b'''<site />''')
3807
3808    def test_write_to_user_binary_writer_with_bom(self):
3809        tree = ET.ElementTree(ET.XML('''<site />'''))
3810        raw = io.BytesIO()
3811        writer = self.dummy()
3812        writer.write = raw.write
3813        writer.seekable = lambda: True
3814        writer.tell = raw.tell
3815        tree.write(writer, encoding="utf-16")
3816        self.assertEqual(raw.getvalue(),
3817                '''<?xml version='1.0' encoding='utf-16'?>\n'''
3818                '''<site />'''.encode("utf-16"))
3819
3820    def test_tostringlist_invariant(self):
3821        root = ET.fromstring('<tag>foo</tag>')
3822        self.assertEqual(
3823            ET.tostring(root, 'unicode'),
3824            ''.join(ET.tostringlist(root, 'unicode')))
3825        self.assertEqual(
3826            ET.tostring(root, 'utf-16'),
3827            b''.join(ET.tostringlist(root, 'utf-16')))
3828
3829    def test_short_empty_elements(self):
3830        root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
3831        self.assertEqual(
3832            ET.tostring(root, 'unicode'),
3833            '<tag>a<x />b<y />c</tag>')
3834        self.assertEqual(
3835            ET.tostring(root, 'unicode', short_empty_elements=True),
3836            '<tag>a<x />b<y />c</tag>')
3837        self.assertEqual(
3838            ET.tostring(root, 'unicode', short_empty_elements=False),
3839            '<tag>a<x></x>b<y></y>c</tag>')
3840
3841
3842class ParseErrorTest(unittest.TestCase):
3843    def test_subclass(self):
3844        self.assertIsInstance(ET.ParseError(), SyntaxError)
3845
3846    def _get_error(self, s):
3847        try:
3848            ET.fromstring(s)
3849        except ET.ParseError as e:
3850            return e
3851
3852    def test_error_position(self):
3853        self.assertEqual(self._get_error('foo').position, (1, 0))
3854        self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
3855        self.assertEqual(self._get_error('foobar<').position, (1, 6))
3856
3857    def test_error_code(self):
3858        import xml.parsers.expat.errors as ERRORS
3859        self.assertEqual(self._get_error('foo').code,
3860                ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
3861
3862
3863class KeywordArgsTest(unittest.TestCase):
3864    # Test various issues with keyword arguments passed to ET.Element
3865    # constructor and methods
3866    def test_issue14818(self):
3867        x = ET.XML("<a>foo</a>")
3868        self.assertEqual(x.find('a', None),
3869                         x.find(path='a', namespaces=None))
3870        self.assertEqual(x.findtext('a', None, None),
3871                         x.findtext(path='a', default=None, namespaces=None))
3872        self.assertEqual(x.findall('a', None),
3873                         x.findall(path='a', namespaces=None))
3874        self.assertEqual(list(x.iterfind('a', None)),
3875                         list(x.iterfind(path='a', namespaces=None)))
3876
3877        self.assertEqual(ET.Element('a').attrib, {})
3878        elements = [
3879            ET.Element('a', dict(href="#", id="foo")),
3880            ET.Element('a', attrib=dict(href="#", id="foo")),
3881            ET.Element('a', dict(href="#"), id="foo"),
3882            ET.Element('a', href="#", id="foo"),
3883            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
3884        ]
3885        for e in elements:
3886            self.assertEqual(e.tag, 'a')
3887            self.assertEqual(e.attrib, dict(href="#", id="foo"))
3888
3889        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
3890        self.assertEqual(e2.attrib['key1'], 'value1')
3891
3892        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3893            ET.Element('a', "I'm not a dict")
3894        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
3895            ET.Element('a', attrib="I'm not a dict")
3896
3897# --------------------------------------------------------------------
3898
3899class NoAcceleratorTest(unittest.TestCase):
3900    def setUp(self):
3901        if not pyET:
3902            raise unittest.SkipTest('only for the Python version')
3903
3904    # Test that the C accelerator was not imported for pyET
3905    def test_correct_import_pyET(self):
3906        # The type of methods defined in Python code is types.FunctionType,
3907        # while the type of methods defined inside _elementtree is
3908        # <class 'wrapper_descriptor'>
3909        self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
3910        self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
3911
3912
3913# --------------------------------------------------------------------
3914
3915def c14n_roundtrip(xml, **options):
3916    return pyET.canonicalize(xml, **options)
3917
3918
3919class C14NTest(unittest.TestCase):
3920    maxDiff = None
3921
3922    #
3923    # simple roundtrip tests (from c14n.py)
3924
3925    def test_simple_roundtrip(self):
3926        # Basics
3927        self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
3928        self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
3929                '<doc xmlns="uri"></doc>')
3930        self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
3931            '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
3932        self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
3933            '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
3934        self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
3935            '<elem></elem>')
3936
3937        # C14N spec
3938        self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
3939            '<doc>Hello, world!</doc>')
3940        self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
3941            '<value>2</value>')
3942        self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
3943            '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
3944        self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
3945            '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
3946        self.assertEqual(c14n_roundtrip("<norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
3947            '<norm attr=" \'    &#xD;&#xA;&#x9;   \' "></norm>')
3948        self.assertEqual(c14n_roundtrip("<normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>"),
3949            '<normNames attr="   A    &#xD;&#xA;&#x9;   B   "></normNames>')
3950        self.assertEqual(c14n_roundtrip("<normId id=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
3951            '<normId id=" \'    &#xD;&#xA;&#x9;   \' "></normId>')
3952
3953        # fragments from PJ's tests
3954        #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
3955        #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
3956
3957        # Namespace issues
3958        xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>'
3959        self.assertEqual(c14n_roundtrip(xml), xml)
3960        xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>'
3961        self.assertEqual(c14n_roundtrip(xml), xml)
3962        xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>'
3963        self.assertEqual(c14n_roundtrip(xml), xml)
3964
3965    def test_c14n_exclusion(self):
3966        xml = textwrap.dedent("""\
3967        <root xmlns:x="http://example.com/x">
3968            <a x:attr="attrx">
3969                <b>abtext</b>
3970            </a>
3971            <b>btext</b>
3972            <c>
3973                <x:d>dtext</x:d>
3974            </c>
3975        </root>
3976        """)
3977        self.assertEqual(
3978            c14n_roundtrip(xml, strip_text=True),
3979            '<root>'
3980            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3981            '<b>btext</b>'
3982            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3983            '</root>')
3984        self.assertEqual(
3985            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
3986            '<root>'
3987            '<a><b>abtext</b></a>'
3988            '<b>btext</b>'
3989            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
3990            '</root>')
3991        self.assertEqual(
3992            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
3993            '<root>'
3994            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
3995            '<b>btext</b>'
3996            '<c></c>'
3997            '</root>')
3998        self.assertEqual(
3999            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
4000                           exclude_tags=['{http://example.com/x}d']),
4001            '<root>'
4002            '<a><b>abtext</b></a>'
4003            '<b>btext</b>'
4004            '<c></c>'
4005            '</root>')
4006        self.assertEqual(
4007            c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
4008            '<root>'
4009            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4010            '</root>')
4011        self.assertEqual(
4012            c14n_roundtrip(xml, exclude_tags=['a', 'b']),
4013            '<root>\n'
4014            '    \n'
4015            '    \n'
4016            '    <c>\n'
4017            '        <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
4018            '    </c>\n'
4019            '</root>')
4020        self.assertEqual(
4021            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
4022            '<root>'
4023            '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
4024            '<c></c>'
4025            '</root>')
4026        self.assertEqual(
4027            c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
4028            '<root>\n'
4029            '    <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
4030            '        \n'
4031            '    </a>\n'
4032            '    \n'
4033            '    <c>\n'
4034            '        \n'
4035            '    </c>\n'
4036            '</root>')
4037
4038    #
4039    # basic method=c14n tests from the c14n 2.0 specification.  uses
4040    # test files under xmltestdata/c14n-20.
4041
4042    # note that this uses generated C14N versions of the standard ET.write
4043    # output, not roundtripped C14N (see above).
4044
4045    def test_xml_c14n2(self):
4046        datadir = findfile("c14n-20", subdir="xmltestdata")
4047        full_path = partial(os.path.join, datadir)
4048
4049        files = [filename[:-4] for filename in sorted(os.listdir(datadir))
4050                 if filename.endswith('.xml')]
4051        input_files = [
4052            filename for filename in files
4053            if filename.startswith('in')
4054        ]
4055        configs = {
4056            filename: {
4057                # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
4058                option.tag.split('}')[-1]: ((option.text or '').strip(), option)
4059                for option in ET.parse(full_path(filename) + ".xml").getroot()
4060            }
4061            for filename in files
4062            if filename.startswith('c14n')
4063        }
4064
4065        tests = {
4066            input_file: [
4067                (filename, configs[filename.rsplit('_', 1)[-1]])
4068                for filename in files
4069                if filename.startswith(f'out_{input_file}_')
4070                and filename.rsplit('_', 1)[-1] in configs
4071            ]
4072            for input_file in input_files
4073        }
4074
4075        # Make sure we found all test cases.
4076        self.assertEqual(30, len([
4077            output_file for output_files in tests.values()
4078            for output_file in output_files]))
4079
4080        def get_option(config, option_name, default=None):
4081            return config.get(option_name, (default, ()))[0]
4082
4083        for input_file, output_files in tests.items():
4084            for output_file, config in output_files:
4085                keep_comments = get_option(
4086                    config, 'IgnoreComments') == 'true'  # no, it's right :)
4087                strip_text = get_option(
4088                    config, 'TrimTextNodes') == 'true'
4089                rewrite_prefixes = get_option(
4090                    config, 'PrefixRewrite') == 'sequential'
4091                if 'QNameAware' in config:
4092                    qattrs = [
4093                        f"{{{el.get('NS')}}}{el.get('Name')}"
4094                        for el in config['QNameAware'][1].findall(
4095                            '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
4096                    ]
4097                    qtags = [
4098                        f"{{{el.get('NS')}}}{el.get('Name')}"
4099                        for el in config['QNameAware'][1].findall(
4100                            '{http://www.w3.org/2010/xml-c14n2}Element')
4101                    ]
4102                else:
4103                    qtags = qattrs = None
4104
4105                # Build subtest description from config.
4106                config_descr = ','.join(
4107                    f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
4108                    for name, (value, children) in sorted(config.items())
4109                )
4110
4111                with self.subTest(f"{output_file}({config_descr})"):
4112                    if input_file == 'inNsRedecl' and not rewrite_prefixes:
4113                        self.skipTest(
4114                            f"Redeclared namespace handling is not supported in {output_file}")
4115                    if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
4116                        self.skipTest(
4117                            f"Redeclared namespace handling is not supported in {output_file}")
4118                    if 'QNameAware' in config and config['QNameAware'][1].find(
4119                            '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
4120                        self.skipTest(
4121                            f"QName rewriting in XPath text is not supported in {output_file}")
4122
4123                    f = full_path(input_file + ".xml")
4124                    if input_file == 'inC14N5':
4125                        # Hack: avoid setting up external entity resolution in the parser.
4126                        with open(full_path('world.txt'), 'rb') as entity_file:
4127                            with open(f, 'rb') as f:
4128                                f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
4129
4130                    text = ET.canonicalize(
4131                        from_file=f,
4132                        with_comments=keep_comments,
4133                        strip_text=strip_text,
4134                        rewrite_prefixes=rewrite_prefixes,
4135                        qname_aware_tags=qtags, qname_aware_attrs=qattrs)
4136
4137                    with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
4138                        expected = f.read()
4139                        if input_file == 'inC14N3':
4140                            # FIXME: cET resolves default attributes but ET does not!
4141                            expected = expected.replace(' attr="default"', '')
4142                            text = text.replace(' attr="default"', '')
4143                    self.assertEqual(expected, text)
4144
4145# --------------------------------------------------------------------
4146
4147
4148def test_main(module=None):
4149    # When invoked without a module, runs the Python ET tests by loading pyET.
4150    # Otherwise, uses the given module as the ET.
4151    global pyET
4152    pyET = import_fresh_module('xml.etree.ElementTree',
4153                               blocked=['_elementtree'])
4154    if module is None:
4155        module = pyET
4156
4157    global ET
4158    ET = module
4159
4160    test_classes = [
4161        ModuleTest,
4162        ElementSlicingTest,
4163        BasicElementTest,
4164        BadElementTest,
4165        BadElementPathTest,
4166        ElementTreeTest,
4167        IOTest,
4168        ParseErrorTest,
4169        XIncludeTest,
4170        ElementTreeTypeTest,
4171        ElementFindTest,
4172        ElementIterTest,
4173        TreeBuilderTest,
4174        XMLParserTest,
4175        XMLPullParserTest,
4176        BugsTest,
4177        KeywordArgsTest,
4178        C14NTest,
4179        ]
4180
4181    # These tests will only run for the pure-Python version that doesn't import
4182    # _elementtree. We can't use skipUnless here, because pyET is filled in only
4183    # after the module is loaded.
4184    if pyET is not ET:
4185        test_classes.extend([
4186            NoAcceleratorTest,
4187            ])
4188
4189    # Provide default namespace mapping and path cache.
4190    from xml.etree import ElementPath
4191    nsmap = ET.register_namespace._namespace_map
4192    # Copy the default namespace mapping
4193    nsmap_copy = nsmap.copy()
4194    # Copy the path cache (should be empty)
4195    path_cache = ElementPath._cache
4196    ElementPath._cache = path_cache.copy()
4197    # Align the Comment/PI factories.
4198    if hasattr(ET, '_set_factories'):
4199        old_factories = ET._set_factories(ET.Comment, ET.PI)
4200    else:
4201        old_factories = None
4202
4203    try:
4204        support.run_unittest(*test_classes)
4205    finally:
4206        from xml.etree import ElementPath
4207        # Restore mapping and path cache
4208        nsmap.clear()
4209        nsmap.update(nsmap_copy)
4210        ElementPath._cache = path_cache
4211        if old_factories is not None:
4212            ET._set_factories(*old_factories)
4213        # don't interfere with subsequent tests
4214        ET = pyET = None
4215
4216
4217if __name__ == '__main__':
4218    test_main()
4219