• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
2# to ensure consistency between the C implementation and the Python
3# implementation.
4#
5# For this purpose, the module-level "ET" symbol is temporarily
6# monkey-patched when running the "test_xml_etree_c" test suite.
7
8import copy
9import functools
10import html
11import io
12import itertools
13import operator
14import os
15import pickle
16import pyexpat
17import sys
18import textwrap
19import types
20import unittest
21import warnings
22import weakref
23
24from functools import partial
25from itertools import product, islice
26from test import support
27from test.support import os_helper
28from test.support import warnings_helper
29from test.support import findfile, gc_collect, swap_attr, swap_item
30from test.support.import_helper import import_fresh_module
31from test.support.os_helper import TESTFN
32
33
34# pyET is the pure-Python implementation.
35#
36# ET is pyET in test_xml_etree and is the C accelerated version in
37# test_xml_etree_c.
38pyET = None
39ET = None
40
41SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
42try:
43    SIMPLE_XMLFILE.encode("utf-8")
44except UnicodeEncodeError:
45    raise unittest.SkipTest("filename is not encodable to utf8")
46SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
47UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
48
49SAMPLE_XML = """\
50<body>
51  <tag class='a'>text</tag>
52  <tag class='b' />
53  <section>
54    <tag class='b' id='inner'>subtext</tag>
55  </section>
56</body>
57"""
58
59SAMPLE_SECTION = """\
60<section>
61  <tag class='b' id='inner'>subtext</tag>
62  <nexttag />
63  <nextsection>
64    <tag />
65  </nextsection>
66</section>
67"""
68
69SAMPLE_XML_NS = """
70<body xmlns="http://effbot.org/ns">
71  <tag>text</tag>
72  <tag />
73  <section>
74    <tag>subtext</tag>
75  </section>
76</body>
77"""
78
79SAMPLE_XML_NS_ELEMS = """
80<root>
81<h:table xmlns:h="hello">
82  <h:tr>
83    <h:td>Apples</h:td>
84    <h:td>Bananas</h:td>
85  </h:tr>
86</h:table>
87
88<f:table xmlns:f="foo">
89  <f:name>African Coffee Table</f:name>
90  <f:width>80</f:width>
91  <f:length>120</f:length>
92</f:table>
93</root>
94"""
95
96ENTITY_XML = """\
97<!DOCTYPE points [
98<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
99%user-entities;
100]>
101<document>&entity;</document>
102"""
103
104EXTERNAL_ENTITY_XML = """\
105<!DOCTYPE points [
106<!ENTITY entity SYSTEM "file:///non-existing-file.xml">
107]>
108<document>&entity;</document>
109"""
110
111ATTLIST_XML = """\
112<?xml version="1.0" encoding="UTF-8"?>
113<!DOCTYPE Foo [
114<!ELEMENT foo (bar*)>
115<!ELEMENT bar (#PCDATA)*>
116<!ATTLIST bar xml:lang CDATA "eng">
117<!ENTITY qux "quux">
118]>
119<foo>
120<bar>&qux;</bar>
121</foo>
122"""
123
124def checkwarnings(*filters, quiet=False):
125    def decorator(test):
126        def newtest(*args, **kwargs):
127            with warnings_helper.check_warnings(*filters, quiet=quiet):
128                test(*args, **kwargs)
129        functools.update_wrapper(newtest, test)
130        return newtest
131    return decorator
132
133def convlinesep(data):
134    return data.replace(b'\n', os.linesep.encode())
135
136
137class ModuleTest(unittest.TestCase):
138    def test_sanity(self):
139        # Import sanity.
140
141        from xml.etree import ElementTree
142        from xml.etree import ElementInclude
143        from xml.etree import ElementPath
144
145    def test_all(self):
146        names = ("xml.etree.ElementTree", "_elementtree")
147        support.check__all__(self, ET, names, not_exported=("HTML_EMPTY",))
148
149
150def serialize(elem, to_string=True, encoding='unicode', **options):
151    if encoding != 'unicode':
152        file = io.BytesIO()
153    else:
154        file = io.StringIO()
155    tree = ET.ElementTree(elem)
156    tree.write(file, encoding=encoding, **options)
157    if to_string:
158        return file.getvalue()
159    else:
160        file.seek(0)
161        return file
162
163def summarize_list(seq):
164    return [elem.tag for elem in seq]
165
166
167class ElementTestCase:
168    @classmethod
169    def setUpClass(cls):
170        cls.modules = {pyET, ET}
171
172    def pickleRoundTrip(self, obj, name, dumper, loader, proto):
173        try:
174            with swap_item(sys.modules, name, dumper):
175                temp = pickle.dumps(obj, proto)
176            with swap_item(sys.modules, name, loader):
177                result = pickle.loads(temp)
178        except pickle.PicklingError as pe:
179            # pyET must be second, because pyET may be (equal to) ET.
180            human = dict([(ET, "cET"), (pyET, "pyET")])
181            raise support.TestFailed("Failed to round-trip %r from %r to %r"
182                                     % (obj,
183                                        human.get(dumper, dumper),
184                                        human.get(loader, loader))) from pe
185        return result
186
187    def assertEqualElements(self, alice, bob):
188        self.assertIsInstance(alice, (ET.Element, pyET.Element))
189        self.assertIsInstance(bob, (ET.Element, pyET.Element))
190        self.assertEqual(len(list(alice)), len(list(bob)))
191        for x, y in zip(alice, bob):
192            self.assertEqualElements(x, y)
193        properties = operator.attrgetter('tag', 'tail', 'text', 'attrib')
194        self.assertEqual(properties(alice), properties(bob))
195
196# --------------------------------------------------------------------
197# element tree tests
198
199class ElementTreeTest(unittest.TestCase):
200
201    def serialize_check(self, elem, expected):
202        self.assertEqual(serialize(elem), expected)
203
204    def test_interface(self):
205        # Test element tree interface.
206
207        def check_element(element):
208            self.assertTrue(ET.iselement(element), msg="not an element")
209            direlem = dir(element)
210            for attr in 'tag', 'attrib', 'text', 'tail':
211                self.assertTrue(hasattr(element, attr),
212                        msg='no %s member' % attr)
213                self.assertIn(attr, direlem,
214                        msg='no %s visible by dir' % attr)
215
216            self.assertIsInstance(element.tag, str)
217            self.assertIsInstance(element.attrib, dict)
218            if element.text is not None:
219                self.assertIsInstance(element.text, str)
220            if element.tail is not None:
221                self.assertIsInstance(element.tail, str)
222            for elem in element:
223                check_element(elem)
224
225        element = ET.Element("tag")
226        check_element(element)
227        tree = ET.ElementTree(element)
228        check_element(tree.getroot())
229        element = ET.Element("t\xe4g", key="value")
230        tree = ET.ElementTree(element)
231        self.assertRegex(repr(element), r"^<Element 't\xe4g' at 0x.*>$")
232        element = ET.Element("tag", key="value")
233
234        # Make sure all standard element methods exist.
235
236        def check_method(method):
237            self.assertTrue(hasattr(method, '__call__'),
238                    msg="%s not callable" % method)
239
240        check_method(element.append)
241        check_method(element.extend)
242        check_method(element.insert)
243        check_method(element.remove)
244        check_method(element.find)
245        check_method(element.iterfind)
246        check_method(element.findall)
247        check_method(element.findtext)
248        check_method(element.clear)
249        check_method(element.get)
250        check_method(element.set)
251        check_method(element.keys)
252        check_method(element.items)
253        check_method(element.iter)
254        check_method(element.itertext)
255
256        # These methods return an iterable. See bug 6472.
257
258        def check_iter(it):
259            check_method(it.__next__)
260
261        check_iter(element.iterfind("tag"))
262        check_iter(element.iterfind("*"))
263        check_iter(tree.iterfind("tag"))
264        check_iter(tree.iterfind("*"))
265
266        # These aliases are provided:
267
268        self.assertEqual(ET.XML, ET.fromstring)
269        self.assertEqual(ET.PI, ET.ProcessingInstruction)
270
271    def test_set_attribute(self):
272        element = ET.Element('tag')
273
274        self.assertEqual(element.tag, 'tag')
275        element.tag = 'Tag'
276        self.assertEqual(element.tag, 'Tag')
277        element.tag = 'TAG'
278        self.assertEqual(element.tag, 'TAG')
279
280        self.assertIsNone(element.text)
281        element.text = 'Text'
282        self.assertEqual(element.text, 'Text')
283        element.text = 'TEXT'
284        self.assertEqual(element.text, 'TEXT')
285
286        self.assertIsNone(element.tail)
287        element.tail = 'Tail'
288        self.assertEqual(element.tail, 'Tail')
289        element.tail = 'TAIL'
290        self.assertEqual(element.tail, 'TAIL')
291
292        self.assertEqual(element.attrib, {})
293        element.attrib = {'a': 'b', 'c': 'd'}
294        self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
295        element.attrib = {'A': 'B', 'C': 'D'}
296        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
297
298    def test_simpleops(self):
299        # Basic method sanity checks.
300
301        elem = ET.XML("<body><tag/></body>")
302        self.serialize_check(elem, '<body><tag /></body>')
303        e = ET.Element("tag2")
304        elem.append(e)
305        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
306        elem.remove(e)
307        self.serialize_check(elem, '<body><tag /></body>')
308        elem.insert(0, e)
309        self.serialize_check(elem, '<body><tag2 /><tag /></body>')
310        elem.remove(e)
311        elem.extend([e])
312        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
313        elem.remove(e)
314        elem.extend(iter([e]))
315        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
316        elem.remove(e)
317
318        element = ET.Element("tag", key="value")
319        self.serialize_check(element, '<tag key="value" />') # 1
320        subelement = ET.Element("subtag")
321        element.append(subelement)
322        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
323        element.insert(0, subelement)
324        self.serialize_check(element,
325                '<tag key="value"><subtag /><subtag /></tag>') # 3
326        element.remove(subelement)
327        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
328        element.remove(subelement)
329        self.serialize_check(element, '<tag key="value" />') # 5
330        with self.assertRaises(ValueError) as cm:
331            element.remove(subelement)
332        self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
333        self.serialize_check(element, '<tag key="value" />') # 6
334        element[0:0] = [subelement, subelement, subelement]
335        self.serialize_check(element[1], '<subtag />')
336        self.assertEqual(element[1:9], [element[1], element[2]])
337        self.assertEqual(element[:9:2], [element[0], element[2]])
338        del element[1:2]
339        self.serialize_check(element,
340                '<tag key="value"><subtag /><subtag /></tag>')
341
342    def test_cdata(self):
343        # Test CDATA handling (etc).
344
345        self.serialize_check(ET.XML("<tag>hello</tag>"),
346                '<tag>hello</tag>')
347        self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
348                '<tag>hello</tag>')
349        self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
350                '<tag>hello</tag>')
351
352    def test_file_init(self):
353        stringfile = io.BytesIO(SAMPLE_XML.encode("utf-8"))
354        tree = ET.ElementTree(file=stringfile)
355        self.assertEqual(tree.find("tag").tag, 'tag')
356        self.assertEqual(tree.find("section/tag").tag, 'tag')
357
358        tree = ET.ElementTree(file=SIMPLE_XMLFILE)
359        self.assertEqual(tree.find("element").tag, 'element')
360        self.assertEqual(tree.find("element/../empty-element").tag,
361                'empty-element')
362
363    def test_path_cache(self):
364        # Check that the path cache behaves sanely.
365
366        from xml.etree import ElementPath
367
368        elem = ET.XML(SAMPLE_XML)
369        ElementPath._cache.clear()
370        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
371        cache_len_10 = len(ElementPath._cache)
372        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
373        self.assertEqual(len(ElementPath._cache), cache_len_10)
374        for i in range(20): ET.ElementTree(elem).find('./'+str(i))
375        self.assertGreater(len(ElementPath._cache), cache_len_10)
376        for i in range(600): ET.ElementTree(elem).find('./'+str(i))
377        self.assertLess(len(ElementPath._cache), 500)
378
379    def test_copy(self):
380        # Test copy handling (etc).
381
382        import copy
383        e1 = ET.XML("<tag>hello<foo/></tag>")
384        e2 = copy.copy(e1)
385        e3 = copy.deepcopy(e1)
386        e1.find("foo").tag = "bar"
387        self.serialize_check(e1, '<tag>hello<bar /></tag>')
388        self.serialize_check(e2, '<tag>hello<bar /></tag>')
389        self.serialize_check(e3, '<tag>hello<foo /></tag>')
390
391    def test_attrib(self):
392        # Test attribute handling.
393
394        elem = ET.Element("tag")
395        elem.get("key") # 1.1
396        self.assertEqual(elem.get("key", "default"), 'default') # 1.2
397
398        elem.set("key", "value")
399        self.assertEqual(elem.get("key"), 'value') # 1.3
400
401        elem = ET.Element("tag", key="value")
402        self.assertEqual(elem.get("key"), 'value') # 2.1
403        self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
404
405        attrib = {"key": "value"}
406        elem = ET.Element("tag", attrib)
407        attrib.clear() # check for aliasing issues
408        self.assertEqual(elem.get("key"), 'value') # 3.1
409        self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
410
411        attrib = {"key": "value"}
412        elem = ET.Element("tag", **attrib)
413        attrib.clear() # check for aliasing issues
414        self.assertEqual(elem.get("key"), 'value') # 4.1
415        self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
416
417        elem = ET.Element("tag", {"key": "other"}, key="value")
418        self.assertEqual(elem.get("key"), 'value') # 5.1
419        self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
420
421        elem = ET.Element('test')
422        elem.text = "aa"
423        elem.set('testa', 'testval')
424        elem.set('testb', 'test2')
425        self.assertEqual(ET.tostring(elem),
426                b'<test testa="testval" testb="test2">aa</test>')
427        self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
428        self.assertEqual(sorted(elem.items()),
429                [('testa', 'testval'), ('testb', 'test2')])
430        self.assertEqual(elem.attrib['testb'], 'test2')
431        elem.attrib['testb'] = 'test1'
432        elem.attrib['testc'] = 'test2'
433        self.assertEqual(ET.tostring(elem),
434                b'<test testa="testval" testb="test1" testc="test2">aa</test>')
435
436        # Test preserving white space chars in attributes
437        elem = ET.Element('test')
438        elem.set('a', '\r')
439        elem.set('b', '\r\n')
440        elem.set('c', '\t\n\r ')
441        elem.set('d', '\n\n\r\r\t\t  ')
442        self.assertEqual(ET.tostring(elem),
443                b'<test a="&#13;" b="&#13;&#10;" c="&#09;&#10;&#13; " d="&#10;&#10;&#13;&#13;&#09;&#09;  " />')
444
445    def test_makeelement(self):
446        # Test makeelement handling.
447
448        elem = ET.Element("tag")
449        attrib = {"key": "value"}
450        subelem = elem.makeelement("subtag", attrib)
451        self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
452        elem.append(subelem)
453        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
454
455        elem.clear()
456        self.serialize_check(elem, '<tag />')
457        elem.append(subelem)
458        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
459        elem.extend([subelem, subelem])
460        self.serialize_check(elem,
461            '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
462        elem[:] = [subelem]
463        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
464        elem[:] = tuple([subelem])
465        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
466
467    def test_parsefile(self):
468        # Test parsing from file.
469
470        tree = ET.parse(SIMPLE_XMLFILE)
471        stream = io.StringIO()
472        tree.write(stream, encoding='unicode')
473        self.assertEqual(stream.getvalue(),
474                '<root>\n'
475                '   <element key="value">text</element>\n'
476                '   <element>text</element>tail\n'
477                '   <empty-element />\n'
478                '</root>')
479        tree = ET.parse(SIMPLE_NS_XMLFILE)
480        stream = io.StringIO()
481        tree.write(stream, encoding='unicode')
482        self.assertEqual(stream.getvalue(),
483                '<ns0:root xmlns:ns0="namespace">\n'
484                '   <ns0:element key="value">text</ns0:element>\n'
485                '   <ns0:element>text</ns0:element>tail\n'
486                '   <ns0:empty-element />\n'
487                '</ns0:root>')
488
489        with open(SIMPLE_XMLFILE) as f:
490            data = f.read()
491
492        parser = ET.XMLParser()
493        self.assertRegex(parser.version, r'^Expat ')
494        parser.feed(data)
495        self.serialize_check(parser.close(),
496                '<root>\n'
497                '   <element key="value">text</element>\n'
498                '   <element>text</element>tail\n'
499                '   <empty-element />\n'
500                '</root>')
501
502        target = ET.TreeBuilder()
503        parser = ET.XMLParser(target=target)
504        parser.feed(data)
505        self.serialize_check(parser.close(),
506                '<root>\n'
507                '   <element key="value">text</element>\n'
508                '   <element>text</element>tail\n'
509                '   <empty-element />\n'
510                '</root>')
511
512    def test_parseliteral(self):
513        element = ET.XML("<html><body>text</body></html>")
514        self.assertEqual(ET.tostring(element, encoding='unicode'),
515                '<html><body>text</body></html>')
516        element = ET.fromstring("<html><body>text</body></html>")
517        self.assertEqual(ET.tostring(element, encoding='unicode'),
518                '<html><body>text</body></html>')
519        sequence = ["<html><body>", "text</bo", "dy></html>"]
520        element = ET.fromstringlist(sequence)
521        self.assertEqual(ET.tostring(element),
522                b'<html><body>text</body></html>')
523        self.assertEqual(b"".join(ET.tostringlist(element)),
524                b'<html><body>text</body></html>')
525        self.assertEqual(ET.tostring(element, "ascii"),
526                b"<?xml version='1.0' encoding='ascii'?>\n"
527                b"<html><body>text</body></html>")
528        _, ids = ET.XMLID("<html><body>text</body></html>")
529        self.assertEqual(len(ids), 0)
530        _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
531        self.assertEqual(len(ids), 1)
532        self.assertEqual(ids["body"].tag, 'body')
533
534    def test_iterparse(self):
535        # Test iterparse interface.
536
537        iterparse = ET.iterparse
538
539        context = iterparse(SIMPLE_XMLFILE)
540        self.assertIsNone(context.root)
541        action, elem = next(context)
542        self.assertIsNone(context.root)
543        self.assertEqual((action, elem.tag), ('end', 'element'))
544        self.assertEqual([(action, elem.tag) for action, elem in context], [
545                ('end', 'element'),
546                ('end', 'empty-element'),
547                ('end', 'root'),
548            ])
549        self.assertEqual(context.root.tag, 'root')
550
551        context = iterparse(SIMPLE_NS_XMLFILE)
552        self.assertEqual([(action, elem.tag) for action, elem in context], [
553                ('end', '{namespace}element'),
554                ('end', '{namespace}element'),
555                ('end', '{namespace}empty-element'),
556                ('end', '{namespace}root'),
557            ])
558
559        with open(SIMPLE_XMLFILE, 'rb') as source:
560            context = iterparse(source)
561            action, elem = next(context)
562            self.assertEqual((action, elem.tag), ('end', 'element'))
563            self.assertEqual([(action, elem.tag) for action, elem in context], [
564                    ('end', 'element'),
565                    ('end', 'empty-element'),
566                    ('end', 'root'),
567                ])
568            self.assertEqual(context.root.tag, 'root')
569
570        events = ()
571        context = iterparse(SIMPLE_XMLFILE, events)
572        self.assertEqual([(action, elem.tag) for action, elem in context], [])
573
574        events = ()
575        context = iterparse(SIMPLE_XMLFILE, events=events)
576        self.assertEqual([(action, elem.tag) for action, elem in context], [])
577
578        events = ("start", "end")
579        context = iterparse(SIMPLE_XMLFILE, events)
580        self.assertEqual([(action, elem.tag) for action, elem in context], [
581                ('start', 'root'),
582                ('start', 'element'),
583                ('end', 'element'),
584                ('start', 'element'),
585                ('end', 'element'),
586                ('start', 'empty-element'),
587                ('end', 'empty-element'),
588                ('end', 'root'),
589            ])
590
591        events = ("start", "end", "start-ns", "end-ns")
592        context = iterparse(SIMPLE_NS_XMLFILE, events)
593        self.assertEqual([(action, elem.tag) if action in ("start", "end")
594                                             else (action, elem)
595                          for action, elem in context], [
596                ('start-ns', ('', 'namespace')),
597                ('start', '{namespace}root'),
598                ('start', '{namespace}element'),
599                ('end', '{namespace}element'),
600                ('start', '{namespace}element'),
601                ('end', '{namespace}element'),
602                ('start', '{namespace}empty-element'),
603                ('end', '{namespace}empty-element'),
604                ('end', '{namespace}root'),
605                ('end-ns', None),
606            ])
607
608        events = ('start-ns', 'end-ns')
609        context = iterparse(io.StringIO(r"<root xmlns=''/>"), events)
610        res = [action for action, elem in context]
611        self.assertEqual(res, ['start-ns', 'end-ns'])
612
613        events = ("start", "end", "bogus")
614        with open(SIMPLE_XMLFILE, "rb") as f:
615            with self.assertRaises(ValueError) as cm:
616                iterparse(f, events)
617            self.assertFalse(f.closed)
618        self.assertEqual(str(cm.exception), "unknown event 'bogus'")
619
620        with warnings_helper.check_no_resource_warning(self):
621            with self.assertRaises(ValueError) as cm:
622                iterparse(SIMPLE_XMLFILE, events)
623            self.assertEqual(str(cm.exception), "unknown event 'bogus'")
624            del cm
625
626        source = io.BytesIO(
627            b"<?xml version='1.0' encoding='iso-8859-1'?>\n"
628            b"<body xmlns='http://&#233;ffbot.org/ns'\n"
629            b"      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
630        events = ("start-ns",)
631        context = iterparse(source, events)
632        self.assertEqual([(action, elem) for action, elem in context], [
633                ('start-ns', ('', 'http://\xe9ffbot.org/ns')),
634                ('start-ns', ('cl\xe9', 'http://effbot.org/ns')),
635            ])
636
637        source = io.StringIO("<document />junk")
638        it = iterparse(source)
639        action, elem = next(it)
640        self.assertEqual((action, elem.tag), ('end', 'document'))
641        with self.assertRaises(ET.ParseError) as cm:
642            next(it)
643        self.assertEqual(str(cm.exception),
644                'junk after document element: line 1, column 12')
645
646        self.addCleanup(os_helper.unlink, TESTFN)
647        with open(TESTFN, "wb") as f:
648            f.write(b"<document />junk")
649        it = iterparse(TESTFN)
650        action, elem = next(it)
651        self.assertEqual((action, elem.tag), ('end', 'document'))
652        with warnings_helper.check_no_resource_warning(self):
653            with self.assertRaises(ET.ParseError) as cm:
654                next(it)
655            self.assertEqual(str(cm.exception),
656                    'junk after document element: line 1, column 12')
657            del cm, it
658
659        # Not exhausting the iterator still closes the resource (bpo-43292)
660        with warnings_helper.check_no_resource_warning(self):
661            it = iterparse(SIMPLE_XMLFILE)
662            del it
663
664        with warnings_helper.check_no_resource_warning(self):
665            it = iterparse(SIMPLE_XMLFILE)
666            it.close()
667            del it
668
669        with warnings_helper.check_no_resource_warning(self):
670            it = iterparse(SIMPLE_XMLFILE)
671            action, elem = next(it)
672            self.assertEqual((action, elem.tag), ('end', 'element'))
673            del it, elem
674
675        with warnings_helper.check_no_resource_warning(self):
676            it = iterparse(SIMPLE_XMLFILE)
677            action, elem = next(it)
678            it.close()
679            self.assertEqual((action, elem.tag), ('end', 'element'))
680            del it, elem
681
682        with self.assertRaises(FileNotFoundError):
683            iterparse("nonexistent")
684
685    def test_iterparse_close(self):
686        iterparse = ET.iterparse
687
688        it = iterparse(SIMPLE_XMLFILE)
689        it.close()
690        with self.assertRaises(StopIteration):
691            next(it)
692        it.close()  # idempotent
693
694        with open(SIMPLE_XMLFILE, 'rb') as source:
695            it = iterparse(source)
696            it.close()
697            self.assertFalse(source.closed)
698            with self.assertRaises(StopIteration):
699                next(it)
700            it.close()  # idempotent
701
702        it = iterparse(SIMPLE_XMLFILE)
703        action, elem = next(it)
704        self.assertEqual((action, elem.tag), ('end', 'element'))
705        it.close()
706        with self.assertRaises(StopIteration):
707            next(it)
708        it.close()  # idempotent
709
710        with open(SIMPLE_XMLFILE, 'rb') as source:
711            it = iterparse(source)
712            action, elem = next(it)
713            self.assertEqual((action, elem.tag), ('end', 'element'))
714            it.close()
715            self.assertFalse(source.closed)
716            with self.assertRaises(StopIteration):
717                next(it)
718            it.close()  # idempotent
719
720        it = iterparse(SIMPLE_XMLFILE)
721        list(it)
722        it.close()
723        with self.assertRaises(StopIteration):
724            next(it)
725        it.close()  # idempotent
726
727        with open(SIMPLE_XMLFILE, 'rb') as source:
728            it = iterparse(source)
729            list(it)
730            it.close()
731            self.assertFalse(source.closed)
732            with self.assertRaises(StopIteration):
733                next(it)
734            it.close()  # idempotent
735
736    def test_writefile(self):
737        elem = ET.Element("tag")
738        elem.text = "text"
739        self.serialize_check(elem, '<tag>text</tag>')
740        ET.SubElement(elem, "subtag").text = "subtext"
741        self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
742
743        # Test tag suppression
744        elem.tag = None
745        self.serialize_check(elem, 'text<subtag>subtext</subtag>')
746        elem.insert(0, ET.Comment("comment"))
747        self.serialize_check(elem,
748                'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
749
750        elem[0] = ET.PI("key", "value")
751        self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
752
753    def test_custom_builder(self):
754        # Test parser w. custom builder.
755
756        with open(SIMPLE_XMLFILE) as f:
757            data = f.read()
758        class Builder(list):
759            def start(self, tag, attrib):
760                self.append(("start", tag))
761            def end(self, tag):
762                self.append(("end", tag))
763            def data(self, text):
764                pass
765        builder = Builder()
766        parser = ET.XMLParser(target=builder)
767        parser.feed(data)
768        self.assertEqual(builder, [
769                ('start', 'root'),
770                ('start', 'element'),
771                ('end', 'element'),
772                ('start', 'element'),
773                ('end', 'element'),
774                ('start', 'empty-element'),
775                ('end', 'empty-element'),
776                ('end', 'root'),
777            ])
778
779        with open(SIMPLE_NS_XMLFILE) as f:
780            data = f.read()
781        class Builder(list):
782            def start(self, tag, attrib):
783                self.append(("start", tag))
784            def end(self, tag):
785                self.append(("end", tag))
786            def data(self, text):
787                pass
788            def pi(self, target, data):
789                self.append(("pi", target, data))
790            def comment(self, data):
791                self.append(("comment", data))
792            def start_ns(self, prefix, uri):
793                self.append(("start-ns", prefix, uri))
794            def end_ns(self, prefix):
795                self.append(("end-ns", prefix))
796        builder = Builder()
797        parser = ET.XMLParser(target=builder)
798        parser.feed(data)
799        self.assertEqual(builder, [
800                ('pi', 'pi', 'data'),
801                ('comment', ' comment '),
802                ('start-ns', '', 'namespace'),
803                ('start', '{namespace}root'),
804                ('start', '{namespace}element'),
805                ('end', '{namespace}element'),
806                ('start', '{namespace}element'),
807                ('end', '{namespace}element'),
808                ('start', '{namespace}empty-element'),
809                ('end', '{namespace}empty-element'),
810                ('end', '{namespace}root'),
811                ('end-ns', ''),
812            ])
813
814    def test_custom_builder_only_end_ns(self):
815        class Builder(list):
816            def end_ns(self, prefix):
817                self.append(("end-ns", prefix))
818
819        builder = Builder()
820        parser = ET.XMLParser(target=builder)
821        parser.feed(textwrap.dedent("""\
822            <?pi data?>
823            <!-- comment -->
824            <root xmlns='namespace' xmlns:p='pns' xmlns:a='ans'>
825               <a:element key='value'>text</a:element>
826               <p:element>text</p:element>tail
827               <empty-element/>
828            </root>
829            """))
830        self.assertEqual(builder, [
831                ('end-ns', 'a'),
832                ('end-ns', 'p'),
833                ('end-ns', ''),
834            ])
835
836    def test_initialize_parser_without_target(self):
837        # Explicit None
838        parser = ET.XMLParser(target=None)
839        self.assertIsInstance(parser.target, ET.TreeBuilder)
840
841        # Implicit None
842        parser2 = ET.XMLParser()
843        self.assertIsInstance(parser2.target, ET.TreeBuilder)
844
845    def test_children(self):
846        # Test Element children iteration
847
848        with open(SIMPLE_XMLFILE, "rb") as f:
849            tree = ET.parse(f)
850        self.assertEqual([summarize_list(elem)
851                          for elem in tree.getroot().iter()], [
852                ['element', 'element', 'empty-element'],
853                [],
854                [],
855                [],
856            ])
857        self.assertEqual([summarize_list(elem)
858                          for elem in tree.iter()], [
859                ['element', 'element', 'empty-element'],
860                [],
861                [],
862                [],
863            ])
864
865        elem = ET.XML(SAMPLE_XML)
866        self.assertEqual(len(list(elem)), 3)
867        self.assertEqual(len(list(elem[2])), 1)
868        self.assertEqual(elem[:], list(elem))
869        child1 = elem[0]
870        child2 = elem[2]
871        del elem[1:2]
872        self.assertEqual(len(list(elem)), 2)
873        self.assertEqual(child1, elem[0])
874        self.assertEqual(child2, elem[1])
875        elem[0:2] = [child2, child1]
876        self.assertEqual(child2, elem[0])
877        self.assertEqual(child1, elem[1])
878        self.assertNotEqual(child1, elem[0])
879        elem.clear()
880        self.assertEqual(list(elem), [])
881
882    def test_writestring(self):
883        elem = ET.XML("<html><body>text</body></html>")
884        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
885        elem = ET.fromstring("<html><body>text</body></html>")
886        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
887
888    def test_indent(self):
889        elem = ET.XML("<root></root>")
890        ET.indent(elem)
891        self.assertEqual(ET.tostring(elem), b'<root />')
892
893        elem = ET.XML("<html><body>text</body></html>")
894        ET.indent(elem)
895        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
896
897        elem = ET.XML("<html> <body>text</body>  </html>")
898        ET.indent(elem)
899        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>\n</html>')
900
901        elem = ET.XML("<html><body>text</body>tail</html>")
902        ET.indent(elem)
903        self.assertEqual(ET.tostring(elem), b'<html>\n  <body>text</body>tail</html>')
904
905        elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
906        ET.indent(elem)
907        self.assertEqual(
908            ET.tostring(elem),
909            b'<html>\n'
910            b'  <body>\n'
911            b'    <p>par</p>\n'
912            b'    <p>text</p>\n'
913            b'    <p>\n'
914            b'      <br />\n'
915            b'    </p>\n'
916            b'  </body>\n'
917            b'</html>'
918        )
919
920        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
921        ET.indent(elem)
922        self.assertEqual(
923            ET.tostring(elem),
924            b'<html>\n'
925            b'  <body>\n'
926            b'    <p>pre<br />post</p>\n'
927            b'    <p>text</p>\n'
928            b'  </body>\n'
929            b'</html>'
930        )
931
932    def test_indent_space(self):
933        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
934        ET.indent(elem, space='\t')
935        self.assertEqual(
936            ET.tostring(elem),
937            b'<html>\n'
938            b'\t<body>\n'
939            b'\t\t<p>pre<br />post</p>\n'
940            b'\t\t<p>text</p>\n'
941            b'\t</body>\n'
942            b'</html>'
943        )
944
945        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
946        ET.indent(elem, space='')
947        self.assertEqual(
948            ET.tostring(elem),
949            b'<html>\n'
950            b'<body>\n'
951            b'<p>pre<br />post</p>\n'
952            b'<p>text</p>\n'
953            b'</body>\n'
954            b'</html>'
955        )
956
957    def test_indent_space_caching(self):
958        elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
959        ET.indent(elem)
960        self.assertEqual(
961            {el.tail for el in elem.iter()},
962            {None, "\n", "\n  ", "\n    "}
963        )
964        self.assertEqual(
965            {el.text for el in elem.iter()},
966            {None, "\n  ", "\n    ", "\n      ", "par", "text"}
967        )
968        self.assertEqual(
969            len({el.tail for el in elem.iter()}),
970            len({id(el.tail) for el in elem.iter()}),
971        )
972
973    def test_indent_level(self):
974        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
975        with self.assertRaises(ValueError):
976            ET.indent(elem, level=-1)
977        self.assertEqual(
978            ET.tostring(elem),
979            b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
980        )
981
982        ET.indent(elem, level=2)
983        self.assertEqual(
984            ET.tostring(elem),
985            b'<html>\n'
986            b'      <body>\n'
987            b'        <p>pre<br />post</p>\n'
988            b'        <p>text</p>\n'
989            b'      </body>\n'
990            b'    </html>'
991        )
992
993        elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
994        ET.indent(elem, level=1, space=' ')
995        self.assertEqual(
996            ET.tostring(elem),
997            b'<html>\n'
998            b'  <body>\n'
999            b'   <p>pre<br />post</p>\n'
1000            b'   <p>text</p>\n'
1001            b'  </body>\n'
1002            b' </html>'
1003        )
1004
1005    def test_tostring_default_namespace(self):
1006        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
1007        self.assertEqual(
1008            ET.tostring(elem, encoding='unicode'),
1009            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
1010        )
1011        self.assertEqual(
1012            ET.tostring(elem, encoding='unicode', default_namespace='http://effbot.org/ns'),
1013            '<body xmlns="http://effbot.org/ns"><tag /></body>'
1014        )
1015
1016    def test_tostring_default_namespace_different_namespace(self):
1017        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
1018        self.assertEqual(
1019            ET.tostring(elem, encoding='unicode', default_namespace='foobar'),
1020            '<ns1:body xmlns="foobar" xmlns:ns1="http://effbot.org/ns"><ns1:tag /></ns1:body>'
1021        )
1022
1023    def test_tostring_default_namespace_original_no_namespace(self):
1024        elem = ET.XML('<body><tag/></body>')
1025        EXPECTED_MSG = '^cannot use non-qualified names with default_namespace option$'
1026        with self.assertRaisesRegex(ValueError, EXPECTED_MSG):
1027            ET.tostring(elem, encoding='unicode', default_namespace='foobar')
1028
1029    def test_tostring_no_xml_declaration(self):
1030        elem = ET.XML('<body><tag/></body>')
1031        self.assertEqual(
1032            ET.tostring(elem, encoding='unicode'),
1033            '<body><tag /></body>'
1034        )
1035
1036    def test_tostring_xml_declaration(self):
1037        elem = ET.XML('<body><tag/></body>')
1038        self.assertEqual(
1039            ET.tostring(elem, encoding='utf8', xml_declaration=True),
1040            b"<?xml version='1.0' encoding='utf8'?>\n<body><tag /></body>"
1041        )
1042
1043    def test_tostring_xml_declaration_unicode_encoding(self):
1044        elem = ET.XML('<body><tag/></body>')
1045        self.assertEqual(
1046            ET.tostring(elem, encoding='unicode', xml_declaration=True),
1047            "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
1048        )
1049
1050    def test_tostring_xml_declaration_cases(self):
1051        elem = ET.XML('<body><tag>ø</tag></body>')
1052        TESTCASES = [
1053        #   (expected_retval,                  encoding, xml_declaration)
1054            # ... xml_declaration = None
1055            (b'<body><tag>&#248;</tag></body>', None, None),
1056            (b'<body><tag>\xc3\xb8</tag></body>', 'UTF-8', None),
1057            (b'<body><tag>&#248;</tag></body>', 'US-ASCII', None),
1058            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
1059             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', None),
1060            ('<body><tag>ø</tag></body>', 'unicode', None),
1061
1062            # ... xml_declaration = False
1063            (b"<body><tag>&#248;</tag></body>", None, False),
1064            (b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', False),
1065            (b"<body><tag>&#248;</tag></body>", 'US-ASCII', False),
1066            (b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', False),
1067            ("<body><tag>ø</tag></body>", 'unicode', False),
1068
1069            # ... xml_declaration = True
1070            (b"<?xml version='1.0' encoding='us-ascii'?>\n"
1071             b"<body><tag>&#248;</tag></body>", None, True),
1072            (b"<?xml version='1.0' encoding='UTF-8'?>\n"
1073             b"<body><tag>\xc3\xb8</tag></body>", 'UTF-8', True),
1074            (b"<?xml version='1.0' encoding='US-ASCII'?>\n"
1075             b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
1076            (b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
1077             b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
1078            ("<?xml version='1.0' encoding='utf-8'?>\n"
1079             "<body><tag>ø</tag></body>", 'unicode', True),
1080
1081        ]
1082        for expected_retval, encoding, xml_declaration in TESTCASES:
1083            with self.subTest(f'encoding={encoding} '
1084                              f'xml_declaration={xml_declaration}'):
1085                self.assertEqual(
1086                    ET.tostring(
1087                        elem,
1088                        encoding=encoding,
1089                        xml_declaration=xml_declaration
1090                    ),
1091                    expected_retval
1092                )
1093
1094    def test_tostringlist_default_namespace(self):
1095        elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
1096        self.assertEqual(
1097            ''.join(ET.tostringlist(elem, encoding='unicode')),
1098            '<ns0:body xmlns:ns0="http://effbot.org/ns"><ns0:tag /></ns0:body>'
1099        )
1100        self.assertEqual(
1101            ''.join(ET.tostringlist(elem, encoding='unicode', default_namespace='http://effbot.org/ns')),
1102            '<body xmlns="http://effbot.org/ns"><tag /></body>'
1103        )
1104
1105    def test_tostringlist_xml_declaration(self):
1106        elem = ET.XML('<body><tag/></body>')
1107        self.assertEqual(
1108            ''.join(ET.tostringlist(elem, encoding='unicode')),
1109            '<body><tag /></body>'
1110        )
1111        self.assertEqual(
1112            b''.join(ET.tostringlist(elem, xml_declaration=True)),
1113            b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
1114        )
1115
1116        stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
1117        self.assertEqual(
1118            ''.join(stringlist),
1119            "<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
1120        )
1121        self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
1122        self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
1123
1124    def test_encoding(self):
1125        def check(encoding, body=''):
1126            xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
1127                   (encoding, body))
1128            self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
1129            self.assertEqual(ET.XML(xml).text, body)
1130        check("ascii", 'a')
1131        check("us-ascii", 'a')
1132        check("iso-8859-1", '\xbd')
1133        check("iso-8859-15", '\u20ac')
1134        check("cp437", '\u221a')
1135        check("mac-roman", '\u02da')
1136
1137        def xml(encoding):
1138            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
1139        def bxml(encoding):
1140            return xml(encoding).encode(encoding)
1141        supported_encodings = [
1142            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
1143            'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
1144            'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
1145            'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
1146            'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
1147            'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
1148            'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006', 'cp1125',
1149            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
1150            'cp1256', 'cp1257', 'cp1258',
1151            'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
1152            'mac-roman', 'mac-turkish',
1153            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
1154            'iso2022-jp-3', 'iso2022-jp-ext',
1155            'koi8-r', 'koi8-t', 'koi8-u', 'kz1048',
1156            'hz', 'ptcp154',
1157        ]
1158        for encoding in supported_encodings:
1159            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
1160
1161        unsupported_ascii_compatible_encodings = [
1162            'big5', 'big5hkscs',
1163            'cp932', 'cp949', 'cp950',
1164            'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
1165            'gb2312', 'gbk', 'gb18030',
1166            'iso2022-kr', 'johab',
1167            'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
1168            'utf-7',
1169        ]
1170        for encoding in unsupported_ascii_compatible_encodings:
1171            self.assertRaises(ValueError, ET.XML, bxml(encoding))
1172
1173        unsupported_ascii_incompatible_encodings = [
1174            'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
1175            'utf_32', 'utf_32_be', 'utf_32_le',
1176        ]
1177        for encoding in unsupported_ascii_incompatible_encodings:
1178            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
1179
1180        self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
1181        self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
1182
1183    def test_methods(self):
1184        # Test serialization methods.
1185
1186        e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
1187        e.tail = "\n"
1188        self.assertEqual(serialize(e),
1189                '<html><link /><script>1 &lt; 2</script></html>\n')
1190        self.assertEqual(serialize(e, method=None),
1191                '<html><link /><script>1 &lt; 2</script></html>\n')
1192        self.assertEqual(serialize(e, method="xml"),
1193                '<html><link /><script>1 &lt; 2</script></html>\n')
1194        self.assertEqual(serialize(e, method="html"),
1195                '<html><link><script>1 < 2</script></html>\n')
1196        self.assertEqual(serialize(e, method="text"), '1 < 2\n')
1197
1198    def test_issue18347(self):
1199        e = ET.XML('<html><CamelCase>text</CamelCase></html>')
1200        self.assertEqual(serialize(e),
1201                '<html><CamelCase>text</CamelCase></html>')
1202        self.assertEqual(serialize(e, method="html"),
1203                '<html><CamelCase>text</CamelCase></html>')
1204
1205    def test_entity(self):
1206        # Test entity handling.
1207
1208        # 1) good entities
1209
1210        e = ET.XML("<document title='&#x8230;'>test</document>")
1211        self.assertEqual(serialize(e, encoding="us-ascii"),
1212                b'<document title="&#33328;">test</document>')
1213        self.serialize_check(e, '<document title="\u8230">test</document>')
1214
1215        # 2) bad entities
1216
1217        with self.assertRaises(ET.ParseError) as cm:
1218            ET.XML("<document>&entity;</document>")
1219        self.assertEqual(str(cm.exception),
1220                'undefined entity: line 1, column 10')
1221
1222        with self.assertRaises(ET.ParseError) as cm:
1223            ET.XML(ENTITY_XML)
1224        self.assertEqual(str(cm.exception),
1225                'undefined entity &entity;: line 5, column 10')
1226
1227        # 3) custom entity
1228
1229        parser = ET.XMLParser()
1230        parser.entity["entity"] = "text"
1231        parser.feed(ENTITY_XML)
1232        root = parser.close()
1233        self.serialize_check(root, '<document>text</document>')
1234
1235        # 4) external (SYSTEM) entity
1236
1237        with self.assertRaises(ET.ParseError) as cm:
1238            ET.XML(EXTERNAL_ENTITY_XML)
1239        self.assertEqual(str(cm.exception),
1240                'undefined entity &entity;: line 4, column 10')
1241
1242    def test_namespace(self):
1243        # Test namespace issues.
1244
1245        # 1) xml namespace
1246
1247        elem = ET.XML("<tag xml:lang='en' />")
1248        self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
1249
1250        # 2) other "well-known" namespaces
1251
1252        elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1253        self.serialize_check(elem,
1254            '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
1255
1256        elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1257        self.serialize_check(elem,
1258            '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
1259
1260        elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1261        self.serialize_check(elem,
1262            '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
1263
1264        # 3) unknown namespaces
1265        elem = ET.XML(SAMPLE_XML_NS)
1266        self.serialize_check(elem,
1267            '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
1268            '  <ns0:tag>text</ns0:tag>\n'
1269            '  <ns0:tag />\n'
1270            '  <ns0:section>\n'
1271            '    <ns0:tag>subtext</ns0:tag>\n'
1272            '  </ns0:section>\n'
1273            '</ns0:body>')
1274
1275    def test_qname(self):
1276        # Test QName handling.
1277
1278        # 1) decorated tags
1279
1280        elem = ET.Element("{uri}tag")
1281        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
1282        elem = ET.Element(ET.QName("{uri}tag"))
1283        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
1284        elem = ET.Element(ET.QName("uri", "tag"))
1285        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
1286        elem = ET.Element(ET.QName("uri", "tag"))
1287        subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1288        subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1289        self.serialize_check(elem,
1290            '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
1291
1292        # 2) decorated attributes
1293
1294        elem.clear()
1295        elem.attrib["{uri}key"] = "value"
1296        self.serialize_check(elem,
1297            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
1298
1299        elem.clear()
1300        elem.attrib[ET.QName("{uri}key")] = "value"
1301        self.serialize_check(elem,
1302            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
1303
1304        # 3) decorated values are not converted by default, but the
1305        # QName wrapper can be used for values
1306
1307        elem.clear()
1308        elem.attrib["{uri}key"] = "{uri}value"
1309        self.serialize_check(elem,
1310            '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
1311
1312        elem.clear()
1313        elem.attrib["{uri}key"] = ET.QName("{uri}value")
1314        self.serialize_check(elem,
1315            '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
1316
1317        elem.clear()
1318        subelem = ET.Element("tag")
1319        subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1320        elem.append(subelem)
1321        elem.append(subelem)
1322        self.serialize_check(elem,
1323            '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
1324            '<tag ns1:key="ns2:value" />'
1325            '<tag ns1:key="ns2:value" />'
1326            '</ns0:tag>') # 3.3
1327
1328        # 4) Direct QName tests
1329
1330        self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
1331        self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
1332        q1 = ET.QName('ns', 'tag')
1333        q2 = ET.QName('ns', 'tag')
1334        self.assertEqual(q1, q2)
1335        q2 = ET.QName('ns', 'other-tag')
1336        self.assertNotEqual(q1, q2)
1337        self.assertNotEqual(q1, 'ns:tag')
1338        self.assertEqual(q1, '{ns}tag')
1339
1340    def test_doctype_public(self):
1341        # Test PUBLIC doctype.
1342
1343        elem = ET.XML('<!DOCTYPE html PUBLIC'
1344                ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1345                ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1346                '<html>text</html>')
1347
1348    def test_xpath_tokenizer(self):
1349        # Test the XPath tokenizer.
1350        from xml.etree import ElementPath
1351        def check(p, expected, namespaces=None):
1352            self.assertEqual([op or tag
1353                              for op, tag in ElementPath.xpath_tokenizer(p, namespaces)],
1354                             expected)
1355
1356        # tests from the xml specification
1357        check("*", ['*'])
1358        check("text()", ['text', '()'])
1359        check("@name", ['@', 'name'])
1360        check("@*", ['@', '*'])
1361        check("para[1]", ['para', '[', '1', ']'])
1362        check("para[last()]", ['para', '[', 'last', '()', ']'])
1363        check("*/para", ['*', '/', 'para'])
1364        check("/doc/chapter[5]/section[2]",
1365              ['/', 'doc', '/', 'chapter', '[', '5', ']',
1366               '/', 'section', '[', '2', ']'])
1367        check("chapter//para", ['chapter', '//', 'para'])
1368        check("//para", ['//', 'para'])
1369        check("//olist/item", ['//', 'olist', '/', 'item'])
1370        check(".", ['.'])
1371        check(".//para", ['.', '//', 'para'])
1372        check("..", ['..'])
1373        check("../@lang", ['..', '/', '@', 'lang'])
1374        check("chapter[title]", ['chapter', '[', 'title', ']'])
1375        check("employee[@secretary and @assistant]", ['employee',
1376              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
1377
1378        # additional tests
1379        check("@{ns}attr", ['@', '{ns}attr'])
1380        check("{http://spam}egg", ['{http://spam}egg'])
1381        check("./spam.egg", ['.', '/', 'spam.egg'])
1382        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
1383
1384        # wildcard tags
1385        check("{ns}*", ['{ns}*'])
1386        check("{}*", ['{}*'])
1387        check("{*}tag", ['{*}tag'])
1388        check("{*}*", ['{*}*'])
1389        check(".//{*}tag", ['.', '//', '{*}tag'])
1390
1391        # namespace prefix resolution
1392        check("./xsd:type", ['.', '/', '{http://www.w3.org/2001/XMLSchema}type'],
1393              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1394        check("type", ['{http://www.w3.org/2001/XMLSchema}type'],
1395              {'': 'http://www.w3.org/2001/XMLSchema'})
1396        check("@xsd:type", ['@', '{http://www.w3.org/2001/XMLSchema}type'],
1397              {'xsd': 'http://www.w3.org/2001/XMLSchema'})
1398        check("@type", ['@', 'type'],
1399              {'': 'http://www.w3.org/2001/XMLSchema'})
1400        check("@{*}type", ['@', '{*}type'],
1401              {'': 'http://www.w3.org/2001/XMLSchema'})
1402        check("@{ns}attr", ['@', '{ns}attr'],
1403              {'': 'http://www.w3.org/2001/XMLSchema',
1404               'ns': 'http://www.w3.org/2001/XMLSchema'})
1405
1406    def test_processinginstruction(self):
1407        # Test ProcessingInstruction directly
1408
1409        self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
1410                b'<?test instruction?>')
1411        self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
1412                b'<?test instruction?>')
1413
1414        # Issue #2746
1415
1416        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
1417                b'<?test <testing&>?>')
1418        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>\xe3'), 'latin-1'),
1419                b"<?xml version='1.0' encoding='latin-1'?>\n"
1420                b"<?test <testing&>\xe3?>")
1421
1422    def test_html_empty_elems_serialization(self):
1423        # issue 15970
1424        # from http://www.w3.org/TR/html401/index/elements.html
1425        for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'EMBED', 'FRAME',
1426                        'HR', 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM',
1427                        'SOURCE', 'TRACK', 'WBR']:
1428            for elem in [element, element.lower()]:
1429                expected = '<%s>' % elem
1430                serialized = serialize(ET.XML('<%s />' % elem), method='html')
1431                self.assertEqual(serialized, expected)
1432                serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1433                                       method='html')
1434                self.assertEqual(serialized, expected)
1435
1436    def test_dump_attribute_order(self):
1437        # See BPO 34160
1438        e = ET.Element('cirriculum', status='public', company='example')
1439        with support.captured_stdout() as stdout:
1440            ET.dump(e)
1441        self.assertEqual(stdout.getvalue(),
1442                         '<cirriculum status="public" company="example" />\n')
1443
1444    def test_tree_write_attribute_order(self):
1445        # See BPO 34160
1446        root = ET.Element('cirriculum', status='public', company='example')
1447        self.assertEqual(serialize(root),
1448                         '<cirriculum status="public" company="example" />')
1449        self.assertEqual(serialize(root, method='html'),
1450                '<cirriculum status="public" company="example"></cirriculum>')
1451
1452    def test_attlist_default(self):
1453        # Test default attribute values; See BPO 42151.
1454        root = ET.fromstring(ATTLIST_XML)
1455        self.assertEqual(root[0].attrib,
1456                         {'{http://www.w3.org/XML/1998/namespace}lang': 'eng'})
1457
1458
1459class XMLPullParserTest(unittest.TestCase):
1460
1461    def _feed(self, parser, data, chunk_size=None, flush=False):
1462        if chunk_size is None:
1463            parser.feed(data)
1464        else:
1465            for i in range(0, len(data), chunk_size):
1466                parser.feed(data[i:i+chunk_size])
1467        if flush:
1468            parser.flush()
1469
1470    def assert_events(self, parser, expected, max_events=None):
1471        self.assertEqual(
1472            [(event, (elem.tag, elem.text))
1473             for event, elem in islice(parser.read_events(), max_events)],
1474            expected)
1475
1476    def assert_event_tuples(self, parser, expected, max_events=None):
1477        self.assertEqual(
1478            list(islice(parser.read_events(), max_events)),
1479            expected)
1480
1481    def assert_event_tags(self, parser, expected, max_events=None):
1482        events = islice(parser.read_events(), max_events)
1483        self.assertEqual([(action, elem.tag) for action, elem in events],
1484                         expected)
1485
1486    def test_simple_xml(self, chunk_size=None, flush=False):
1487        parser = ET.XMLPullParser()
1488        self.assert_event_tags(parser, [])
1489        self._feed(parser, "<!-- comment -->\n", chunk_size, flush)
1490        self.assert_event_tags(parser, [])
1491        self._feed(parser,
1492                   "<root>\n  <element key='value'>text</element",
1493                   chunk_size, flush)
1494        self.assert_event_tags(parser, [])
1495        self._feed(parser, ">\n", chunk_size, flush)
1496        self.assert_event_tags(parser, [('end', 'element')])
1497        self._feed(parser, "<element>text</element>tail\n", chunk_size, flush)
1498        self._feed(parser, "<empty-element/>\n", chunk_size, flush)
1499        self.assert_event_tags(parser, [
1500            ('end', 'element'),
1501            ('end', 'empty-element'),
1502            ])
1503        self._feed(parser, "</root>\n", chunk_size, flush)
1504        self.assert_event_tags(parser, [('end', 'root')])
1505        self.assertIsNone(parser.close())
1506
1507    def test_simple_xml_chunk_1(self):
1508        self.test_simple_xml(chunk_size=1, flush=True)
1509
1510    def test_simple_xml_chunk_5(self):
1511        self.test_simple_xml(chunk_size=5, flush=True)
1512
1513    def test_simple_xml_chunk_22(self):
1514        self.test_simple_xml(chunk_size=22)
1515
1516    def test_feed_while_iterating(self):
1517        parser = ET.XMLPullParser()
1518        it = parser.read_events()
1519        self._feed(parser, "<root>\n  <element key='value'>text</element>\n")
1520        action, elem = next(it)
1521        self.assertEqual((action, elem.tag), ('end', 'element'))
1522        self._feed(parser, "</root>\n")
1523        action, elem = next(it)
1524        self.assertEqual((action, elem.tag), ('end', 'root'))
1525        with self.assertRaises(StopIteration):
1526            next(it)
1527
1528    def test_simple_xml_with_ns(self):
1529        parser = ET.XMLPullParser()
1530        self.assert_event_tags(parser, [])
1531        self._feed(parser, "<!-- comment -->\n")
1532        self.assert_event_tags(parser, [])
1533        self._feed(parser, "<root xmlns='namespace'>\n")
1534        self.assert_event_tags(parser, [])
1535        self._feed(parser, "<element key='value'>text</element")
1536        self.assert_event_tags(parser, [])
1537        self._feed(parser, ">\n")
1538        self.assert_event_tags(parser, [('end', '{namespace}element')])
1539        self._feed(parser, "<element>text</element>tail\n")
1540        self._feed(parser, "<empty-element/>\n")
1541        self.assert_event_tags(parser, [
1542            ('end', '{namespace}element'),
1543            ('end', '{namespace}empty-element'),
1544            ])
1545        self._feed(parser, "</root>\n")
1546        self.assert_event_tags(parser, [('end', '{namespace}root')])
1547        self.assertIsNone(parser.close())
1548
1549    def test_ns_events(self):
1550        parser = ET.XMLPullParser(events=('start-ns', 'end-ns'))
1551        self._feed(parser, "<!-- comment -->\n")
1552        self._feed(parser, "<root xmlns='namespace'>\n")
1553        self.assertEqual(
1554            list(parser.read_events()),
1555            [('start-ns', ('', 'namespace'))])
1556        self._feed(parser, "<element key='value'>text</element")
1557        self._feed(parser, ">\n")
1558        self._feed(parser, "<element>text</element>tail\n")
1559        self._feed(parser, "<empty-element/>\n")
1560        self._feed(parser, "</root>\n")
1561        self.assertEqual(list(parser.read_events()), [('end-ns', None)])
1562        self.assertIsNone(parser.close())
1563
1564    def test_ns_events_start(self):
1565        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end'))
1566        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1567        self.assert_event_tuples(parser, [
1568            ('start-ns', ('', 'abc')),
1569            ('start-ns', ('p', 'xyz')),
1570        ], max_events=2)
1571        self.assert_event_tags(parser, [
1572            ('start', '{abc}tag'),
1573        ], max_events=1)
1574
1575        self._feed(parser, "<child />\n")
1576        self.assert_event_tags(parser, [
1577            ('start', '{abc}child'),
1578            ('end', '{abc}child'),
1579        ])
1580
1581        self._feed(parser, "</tag>\n")
1582        parser.close()
1583        self.assert_event_tags(parser, [
1584            ('end', '{abc}tag'),
1585        ])
1586
1587    def test_ns_events_start_end(self):
1588        parser = ET.XMLPullParser(events=('start-ns', 'start', 'end', 'end-ns'))
1589        self._feed(parser, "<tag xmlns='abc' xmlns:p='xyz'>\n")
1590        self.assert_event_tuples(parser, [
1591            ('start-ns', ('', 'abc')),
1592            ('start-ns', ('p', 'xyz')),
1593        ], max_events=2)
1594        self.assert_event_tags(parser, [
1595            ('start', '{abc}tag'),
1596        ], max_events=1)
1597
1598        self._feed(parser, "<child />\n")
1599        self.assert_event_tags(parser, [
1600            ('start', '{abc}child'),
1601            ('end', '{abc}child'),
1602        ])
1603
1604        self._feed(parser, "</tag>\n")
1605        parser.close()
1606        self.assert_event_tags(parser, [
1607            ('end', '{abc}tag'),
1608        ], max_events=1)
1609        self.assert_event_tuples(parser, [
1610            ('end-ns', None),
1611            ('end-ns', None),
1612        ])
1613
1614    def test_events(self):
1615        parser = ET.XMLPullParser(events=())
1616        self._feed(parser, "<root/>\n")
1617        self.assert_event_tags(parser, [])
1618
1619        parser = ET.XMLPullParser(events=('start', 'end'))
1620        self._feed(parser, "<!-- text here -->\n")
1621        self.assert_events(parser, [])
1622
1623        parser = ET.XMLPullParser(events=('start', 'end'))
1624        self._feed(parser, "<root>\n")
1625        self.assert_event_tags(parser, [('start', 'root')])
1626        self._feed(parser, "<element key='value'>text</element")
1627        self.assert_event_tags(parser, [('start', 'element')])
1628        self._feed(parser, ">\n")
1629        self.assert_event_tags(parser, [('end', 'element')])
1630        self._feed(parser,
1631                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1632        self.assert_event_tags(parser, [
1633            ('start', '{foo}element'),
1634            ('start', '{foo}empty-element'),
1635            ('end', '{foo}empty-element'),
1636            ('end', '{foo}element'),
1637            ])
1638        self._feed(parser, "</root>")
1639        self.assertIsNone(parser.close())
1640        self.assert_event_tags(parser, [('end', 'root')])
1641
1642        parser = ET.XMLPullParser(events=('start',))
1643        self._feed(parser, "<!-- comment -->\n")
1644        self.assert_event_tags(parser, [])
1645        self._feed(parser, "<root>\n")
1646        self.assert_event_tags(parser, [('start', 'root')])
1647        self._feed(parser, "<element key='value'>text</element")
1648        self.assert_event_tags(parser, [('start', 'element')])
1649        self._feed(parser, ">\n")
1650        self.assert_event_tags(parser, [])
1651        self._feed(parser,
1652                   "<element xmlns='foo'>text<empty-element/></element>tail\n")
1653        self.assert_event_tags(parser, [
1654            ('start', '{foo}element'),
1655            ('start', '{foo}empty-element'),
1656            ])
1657        self._feed(parser, "</root>")
1658        self.assertIsNone(parser.close())
1659
1660    def test_events_comment(self):
1661        parser = ET.XMLPullParser(events=('start', 'comment', 'end'))
1662        self._feed(parser, "<!-- text here -->\n")
1663        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1664        self._feed(parser, "<!-- more text here -->\n")
1665        self.assert_events(parser, [('comment', (ET.Comment, ' more text here '))])
1666        self._feed(parser, "<root-tag>text")
1667        self.assert_event_tags(parser, [('start', 'root-tag')])
1668        self._feed(parser, "<!-- inner comment-->\n")
1669        self.assert_events(parser, [('comment', (ET.Comment, ' inner comment'))])
1670        self._feed(parser, "</root-tag>\n")
1671        self.assert_event_tags(parser, [('end', 'root-tag')])
1672        self._feed(parser, "<!-- outer comment -->\n")
1673        self.assert_events(parser, [('comment', (ET.Comment, ' outer comment '))])
1674
1675        parser = ET.XMLPullParser(events=('comment',))
1676        self._feed(parser, "<!-- text here -->\n")
1677        self.assert_events(parser, [('comment', (ET.Comment, ' text here '))])
1678
1679    def test_events_pi(self):
1680        parser = ET.XMLPullParser(events=('start', 'pi', 'end'))
1681        self._feed(parser, "<?pitarget?>\n")
1682        self.assert_events(parser, [('pi', (ET.PI, 'pitarget'))])
1683        parser = ET.XMLPullParser(events=('pi',))
1684        self._feed(parser, "<?pitarget some text ?>\n")
1685        self.assert_events(parser, [('pi', (ET.PI, 'pitarget some text '))])
1686
1687    def test_events_sequence(self):
1688        # Test that events can be some sequence that's not just a tuple or list
1689        eventset = {'end', 'start'}
1690        parser = ET.XMLPullParser(events=eventset)
1691        self._feed(parser, "<foo>bar</foo>")
1692        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1693
1694        class DummyIter:
1695            def __init__(self):
1696                self.events = iter(['start', 'end', 'start-ns'])
1697            def __iter__(self):
1698                return self
1699            def __next__(self):
1700                return next(self.events)
1701
1702        parser = ET.XMLPullParser(events=DummyIter())
1703        self._feed(parser, "<foo>bar</foo>")
1704        self.assert_event_tags(parser, [('start', 'foo'), ('end', 'foo')])
1705
1706    def test_unknown_event(self):
1707        with self.assertRaises(ValueError):
1708            ET.XMLPullParser(events=('start', 'end', 'bogus'))
1709
1710    @unittest.skipIf(pyexpat.version_info < (2, 6, 0),
1711                     f'Expat {pyexpat.version_info} does not '
1712                     'support reparse deferral')
1713    def test_flush_reparse_deferral_enabled(self):
1714        parser = ET.XMLPullParser(events=('start', 'end'))
1715
1716        for chunk in ("<doc", ">"):
1717            parser.feed(chunk)
1718
1719        self.assert_event_tags(parser, [])  # i.e. no elements started
1720        if ET is pyET:
1721            self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
1722
1723        parser.flush()
1724
1725        self.assert_event_tags(parser, [('start', 'doc')])
1726        if ET is pyET:
1727            self.assertTrue(parser._parser._parser.GetReparseDeferralEnabled())
1728
1729        parser.feed("</doc>")
1730        parser.close()
1731
1732        self.assert_event_tags(parser, [('end', 'doc')])
1733
1734    def test_flush_reparse_deferral_disabled(self):
1735        parser = ET.XMLPullParser(events=('start', 'end'))
1736
1737        for chunk in ("<doc", ">"):
1738            parser.feed(chunk)
1739
1740        if pyexpat.version_info >= (2, 6, 0):
1741            if not ET is pyET:
1742                self.skipTest(f'XMLParser.(Get|Set)ReparseDeferralEnabled '
1743                              'methods not available in C')
1744            parser._parser._parser.SetReparseDeferralEnabled(False)
1745            self.assert_event_tags(parser, [])  # i.e. no elements started
1746
1747        if ET is pyET:
1748            self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
1749
1750        parser.flush()
1751
1752        self.assert_event_tags(parser, [('start', 'doc')])
1753        if ET is pyET:
1754            self.assertFalse(parser._parser._parser.GetReparseDeferralEnabled())
1755
1756        parser.feed("</doc>")
1757        parser.close()
1758
1759        self.assert_event_tags(parser, [('end', 'doc')])
1760
1761#
1762# xinclude tests (samples from appendix C of the xinclude specification)
1763
1764XINCLUDE = {}
1765
1766XINCLUDE["C1.xml"] = """\
1767<?xml version='1.0'?>
1768<document xmlns:xi="http://www.w3.org/2001/XInclude">
1769  <p>120 Mz is adequate for an average home user.</p>
1770  <xi:include href="disclaimer.xml"/>
1771</document>
1772"""
1773
1774XINCLUDE["disclaimer.xml"] = """\
1775<?xml version='1.0'?>
1776<disclaimer>
1777  <p>The opinions represented herein represent those of the individual
1778  and should not be interpreted as official policy endorsed by this
1779  organization.</p>
1780</disclaimer>
1781"""
1782
1783XINCLUDE["C2.xml"] = """\
1784<?xml version='1.0'?>
1785<document xmlns:xi="http://www.w3.org/2001/XInclude">
1786  <p>This document has been accessed
1787  <xi:include href="count.txt" parse="text"/> times.</p>
1788</document>
1789"""
1790
1791XINCLUDE["count.txt"] = "324387"
1792
1793XINCLUDE["C2b.xml"] = """\
1794<?xml version='1.0'?>
1795<document xmlns:xi="http://www.w3.org/2001/XInclude">
1796  <p>This document has been <em>accessed</em>
1797  <xi:include href="count.txt" parse="text"/> times.</p>
1798</document>
1799"""
1800
1801XINCLUDE["C3.xml"] = """\
1802<?xml version='1.0'?>
1803<document xmlns:xi="http://www.w3.org/2001/XInclude">
1804  <p>The following is the source of the "data.xml" resource:</p>
1805  <example><xi:include href="data.xml" parse="text"/></example>
1806</document>
1807"""
1808
1809XINCLUDE["data.xml"] = """\
1810<?xml version='1.0'?>
1811<data>
1812  <item><![CDATA[Brooks & Shields]]></item>
1813</data>
1814"""
1815
1816XINCLUDE["C5.xml"] = """\
1817<?xml version='1.0'?>
1818<div xmlns:xi="http://www.w3.org/2001/XInclude">
1819  <xi:include href="example.txt" parse="text">
1820    <xi:fallback>
1821      <xi:include href="fallback-example.txt" parse="text">
1822        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1823      </xi:include>
1824    </xi:fallback>
1825  </xi:include>
1826</div>
1827"""
1828
1829XINCLUDE["default.xml"] = """\
1830<?xml version='1.0'?>
1831<document xmlns:xi="http://www.w3.org/2001/XInclude">
1832  <p>Example.</p>
1833  <xi:include href="{}"/>
1834</document>
1835""".format(html.escape(SIMPLE_XMLFILE, True))
1836
1837XINCLUDE["include_c1_repeated.xml"] = """\
1838<?xml version='1.0'?>
1839<document xmlns:xi="http://www.w3.org/2001/XInclude">
1840  <p>The following is the source code of Recursive1.xml:</p>
1841  <xi:include href="C1.xml"/>
1842  <xi:include href="C1.xml"/>
1843  <xi:include href="C1.xml"/>
1844  <xi:include href="C1.xml"/>
1845</document>
1846"""
1847
1848#
1849# badly formatted xi:include tags
1850
1851XINCLUDE_BAD = {}
1852
1853XINCLUDE_BAD["B1.xml"] = """\
1854<?xml version='1.0'?>
1855<document xmlns:xi="http://www.w3.org/2001/XInclude">
1856  <p>120 Mz is adequate for an average home user.</p>
1857  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1858</document>
1859"""
1860
1861XINCLUDE_BAD["B2.xml"] = """\
1862<?xml version='1.0'?>
1863<div xmlns:xi="http://www.w3.org/2001/XInclude">
1864    <xi:fallback></xi:fallback>
1865</div>
1866"""
1867
1868XINCLUDE["Recursive1.xml"] = """\
1869<?xml version='1.0'?>
1870<document xmlns:xi="http://www.w3.org/2001/XInclude">
1871  <p>The following is the source code of Recursive2.xml:</p>
1872  <xi:include href="Recursive2.xml"/>
1873</document>
1874"""
1875
1876XINCLUDE["Recursive2.xml"] = """\
1877<?xml version='1.0'?>
1878<document xmlns:xi="http://www.w3.org/2001/XInclude">
1879  <p>The following is the source code of Recursive3.xml:</p>
1880  <xi:include href="Recursive3.xml"/>
1881</document>
1882"""
1883
1884XINCLUDE["Recursive3.xml"] = """\
1885<?xml version='1.0'?>
1886<document xmlns:xi="http://www.w3.org/2001/XInclude">
1887  <p>The following is the source code of Recursive1.xml:</p>
1888  <xi:include href="Recursive1.xml"/>
1889</document>
1890"""
1891
1892
1893class XIncludeTest(unittest.TestCase):
1894
1895    def xinclude_loader(self, href, parse="xml", encoding=None):
1896        try:
1897            data = XINCLUDE[href]
1898        except KeyError:
1899            raise OSError("resource not found")
1900        if parse == "xml":
1901            data = ET.XML(data)
1902        return data
1903
1904    def none_loader(self, href, parser, encoding=None):
1905        return None
1906
1907    def _my_loader(self, href, parse):
1908        # Used to avoid a test-dependency problem where the default loader
1909        # of ElementInclude uses the pyET parser for cET tests.
1910        if parse == 'xml':
1911            with open(href, 'rb') as f:
1912                return ET.parse(f).getroot()
1913        else:
1914            return None
1915
1916    def test_xinclude_default(self):
1917        from xml.etree import ElementInclude
1918        doc = self.xinclude_loader('default.xml')
1919        ElementInclude.include(doc, self._my_loader)
1920        self.assertEqual(serialize(doc),
1921            '<document>\n'
1922            '  <p>Example.</p>\n'
1923            '  <root>\n'
1924            '   <element key="value">text</element>\n'
1925            '   <element>text</element>tail\n'
1926            '   <empty-element />\n'
1927            '</root>\n'
1928            '</document>')
1929
1930    def test_xinclude(self):
1931        from xml.etree import ElementInclude
1932
1933        # Basic inclusion example (XInclude C.1)
1934        document = self.xinclude_loader("C1.xml")
1935        ElementInclude.include(document, self.xinclude_loader)
1936        self.assertEqual(serialize(document),
1937            '<document>\n'
1938            '  <p>120 Mz is adequate for an average home user.</p>\n'
1939            '  <disclaimer>\n'
1940            '  <p>The opinions represented herein represent those of the individual\n'
1941            '  and should not be interpreted as official policy endorsed by this\n'
1942            '  organization.</p>\n'
1943            '</disclaimer>\n'
1944            '</document>') # C1
1945
1946        # Textual inclusion example (XInclude C.2)
1947        document = self.xinclude_loader("C2.xml")
1948        ElementInclude.include(document, self.xinclude_loader)
1949        self.assertEqual(serialize(document),
1950            '<document>\n'
1951            '  <p>This document has been accessed\n'
1952            '  324387 times.</p>\n'
1953            '</document>') # C2
1954
1955        # Textual inclusion after sibling element (based on modified XInclude C.2)
1956        document = self.xinclude_loader("C2b.xml")
1957        ElementInclude.include(document, self.xinclude_loader)
1958        self.assertEqual(serialize(document),
1959            '<document>\n'
1960            '  <p>This document has been <em>accessed</em>\n'
1961            '  324387 times.</p>\n'
1962            '</document>') # C2b
1963
1964        # Textual inclusion of XML example (XInclude C.3)
1965        document = self.xinclude_loader("C3.xml")
1966        ElementInclude.include(document, self.xinclude_loader)
1967        self.assertEqual(serialize(document),
1968            '<document>\n'
1969            '  <p>The following is the source of the "data.xml" resource:</p>\n'
1970            "  <example>&lt;?xml version='1.0'?&gt;\n"
1971            '&lt;data&gt;\n'
1972            '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1973            '&lt;/data&gt;\n'
1974            '</example>\n'
1975            '</document>') # C3
1976
1977        # Fallback example (XInclude C.5)
1978        # Note! Fallback support is not yet implemented
1979        document = self.xinclude_loader("C5.xml")
1980        with self.assertRaises(OSError) as cm:
1981            ElementInclude.include(document, self.xinclude_loader)
1982        self.assertEqual(str(cm.exception), 'resource not found')
1983        self.assertEqual(serialize(document),
1984            '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1985            '  <ns0:include href="example.txt" parse="text">\n'
1986            '    <ns0:fallback>\n'
1987            '      <ns0:include href="fallback-example.txt" parse="text">\n'
1988            '        <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1989            '      </ns0:include>\n'
1990            '    </ns0:fallback>\n'
1991            '  </ns0:include>\n'
1992            '</div>') # C5
1993
1994    def test_xinclude_repeated(self):
1995        from xml.etree import ElementInclude
1996
1997        document = self.xinclude_loader("include_c1_repeated.xml")
1998        ElementInclude.include(document, self.xinclude_loader)
1999        self.assertEqual(1+4*2, len(document.findall(".//p")))
2000
2001    def test_xinclude_failures(self):
2002        from xml.etree import ElementInclude
2003
2004        # Test failure to locate included XML file.
2005        document = ET.XML(XINCLUDE["C1.xml"])
2006        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2007            ElementInclude.include(document, loader=self.none_loader)
2008        self.assertEqual(str(cm.exception),
2009                "cannot load 'disclaimer.xml' as 'xml'")
2010
2011        # Test failure to locate included text file.
2012        document = ET.XML(XINCLUDE["C2.xml"])
2013        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2014            ElementInclude.include(document, loader=self.none_loader)
2015        self.assertEqual(str(cm.exception),
2016                "cannot load 'count.txt' as 'text'")
2017
2018        # Test bad parse type.
2019        document = ET.XML(XINCLUDE_BAD["B1.xml"])
2020        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2021            ElementInclude.include(document, loader=self.none_loader)
2022        self.assertEqual(str(cm.exception),
2023                "unknown parse type in xi:include tag ('BAD_TYPE')")
2024
2025        # Test xi:fallback outside xi:include.
2026        document = ET.XML(XINCLUDE_BAD["B2.xml"])
2027        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2028            ElementInclude.include(document, loader=self.none_loader)
2029        self.assertEqual(str(cm.exception),
2030                "xi:fallback tag must be child of xi:include "
2031                "('{http://www.w3.org/2001/XInclude}fallback')")
2032
2033        # Test infinitely recursive includes.
2034        document = self.xinclude_loader("Recursive1.xml")
2035        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2036            ElementInclude.include(document, self.xinclude_loader)
2037        self.assertEqual(str(cm.exception),
2038                "recursive include of Recursive2.xml")
2039
2040        # Test 'max_depth' limitation.
2041        document = self.xinclude_loader("Recursive1.xml")
2042        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2043            ElementInclude.include(document, self.xinclude_loader, max_depth=None)
2044        self.assertEqual(str(cm.exception),
2045                "recursive include of Recursive2.xml")
2046
2047        document = self.xinclude_loader("Recursive1.xml")
2048        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
2049            ElementInclude.include(document, self.xinclude_loader, max_depth=0)
2050        self.assertEqual(str(cm.exception),
2051                "maximum xinclude depth reached when including file Recursive2.xml")
2052
2053        document = self.xinclude_loader("Recursive1.xml")
2054        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
2055            ElementInclude.include(document, self.xinclude_loader, max_depth=1)
2056        self.assertEqual(str(cm.exception),
2057                "maximum xinclude depth reached when including file Recursive3.xml")
2058
2059        document = self.xinclude_loader("Recursive1.xml")
2060        with self.assertRaises(ElementInclude.LimitedRecursiveIncludeError) as cm:
2061            ElementInclude.include(document, self.xinclude_loader, max_depth=2)
2062        self.assertEqual(str(cm.exception),
2063                "maximum xinclude depth reached when including file Recursive1.xml")
2064
2065        document = self.xinclude_loader("Recursive1.xml")
2066        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
2067            ElementInclude.include(document, self.xinclude_loader, max_depth=3)
2068        self.assertEqual(str(cm.exception),
2069                "recursive include of Recursive2.xml")
2070
2071
2072# --------------------------------------------------------------------
2073# reported bugs
2074
2075class BugsTest(unittest.TestCase):
2076
2077    def test_bug_xmltoolkit21(self):
2078        # marshaller gives obscure errors for non-string values
2079
2080        def check(elem):
2081            with self.assertRaises(TypeError) as cm:
2082                serialize(elem)
2083            self.assertEqual(str(cm.exception),
2084                    'cannot serialize 123 (type int)')
2085
2086        elem = ET.Element(123)
2087        check(elem) # tag
2088
2089        elem = ET.Element("elem")
2090        elem.text = 123
2091        check(elem) # text
2092
2093        elem = ET.Element("elem")
2094        elem.tail = 123
2095        check(elem) # tail
2096
2097        elem = ET.Element("elem")
2098        elem.set(123, "123")
2099        check(elem) # attribute key
2100
2101        elem = ET.Element("elem")
2102        elem.set("123", 123)
2103        check(elem) # attribute value
2104
2105    def test_bug_xmltoolkit25(self):
2106        # typo in ElementTree.findtext
2107
2108        elem = ET.XML(SAMPLE_XML)
2109        tree = ET.ElementTree(elem)
2110        self.assertEqual(tree.findtext("tag"), 'text')
2111        self.assertEqual(tree.findtext("section/tag"), 'subtext')
2112
2113    def test_bug_xmltoolkit28(self):
2114        # .//tag causes exceptions
2115
2116        tree = ET.XML("<doc><table><tbody/></table></doc>")
2117        self.assertEqual(summarize_list(tree.findall(".//thead")), [])
2118        self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
2119
2120    def test_bug_xmltoolkitX1(self):
2121        # dump() doesn't flush the output buffer
2122
2123        tree = ET.XML("<doc><table><tbody/></table></doc>")
2124        with support.captured_stdout() as stdout:
2125            ET.dump(tree)
2126            self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
2127
2128    def test_bug_xmltoolkit39(self):
2129        # non-ascii element and attribute names doesn't work
2130
2131        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
2132        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
2133
2134        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2135                      b"<tag \xe4ttr='v&#228;lue' />")
2136        self.assertEqual(tree.attrib, {'\xe4ttr': 'v\xe4lue'})
2137        self.assertEqual(ET.tostring(tree, "utf-8"),
2138                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
2139
2140        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2141                      b'<t\xe4g>text</t\xe4g>')
2142        self.assertEqual(ET.tostring(tree, "utf-8"),
2143                b'<t\xc3\xa4g>text</t\xc3\xa4g>')
2144
2145        tree = ET.Element("t\u00e4g")
2146        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
2147
2148        tree = ET.Element("tag")
2149        tree.set("\u00e4ttr", "v\u00e4lue")
2150        self.assertEqual(ET.tostring(tree, "utf-8"),
2151                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
2152
2153    def test_bug_xmltoolkit54(self):
2154        # problems handling internally defined entities
2155
2156        e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
2157                   '<doc>&ldots;</doc>')
2158        self.assertEqual(serialize(e, encoding="us-ascii"),
2159                b'<doc>&#33328;</doc>')
2160        self.assertEqual(serialize(e), '<doc>\u8230</doc>')
2161
2162    def test_bug_xmltoolkit55(self):
2163        # make sure we're reporting the first error, not the last
2164
2165        with self.assertRaises(ET.ParseError) as cm:
2166            ET.XML(b"<!DOCTYPE doc SYSTEM 'doc.dtd'>"
2167                   b'<doc>&ldots;&ndots;&rdots;</doc>')
2168        self.assertEqual(str(cm.exception),
2169                'undefined entity &ldots;: line 1, column 36')
2170
2171    def test_bug_xmltoolkit60(self):
2172        # Handle crash in stream source.
2173
2174        class ExceptionFile:
2175            def read(self, x):
2176                raise OSError
2177
2178        self.assertRaises(OSError, ET.parse, ExceptionFile())
2179
2180    def test_bug_xmltoolkit62(self):
2181        # Don't crash when using custom entities.
2182
2183        ENTITIES = {'rsquo': '\u2019', 'lsquo': '\u2018'}
2184        parser = ET.XMLParser()
2185        parser.entity.update(ENTITIES)
2186        parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
2187<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
2188<patent-application-publication>
2189<subdoc-abstract>
2190<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
2191</subdoc-abstract>
2192</patent-application-publication>""")
2193        t = parser.close()
2194        self.assertEqual(t.find('.//paragraph').text,
2195            'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
2196
2197    @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
2198    def test_bug_xmltoolkit63(self):
2199        # Check reference leak.
2200        def xmltoolkit63():
2201            tree = ET.TreeBuilder()
2202            tree.start("tag", {})
2203            tree.data("text")
2204            tree.end("tag")
2205
2206        xmltoolkit63()
2207        count = sys.getrefcount(None)
2208        for i in range(1000):
2209            xmltoolkit63()
2210        self.assertEqual(sys.getrefcount(None), count)
2211
2212    def test_bug_200708_newline(self):
2213        # Preserve newlines in attributes.
2214
2215        e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
2216        self.assertEqual(ET.tostring(e),
2217                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
2218        self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
2219                'def _f():\n  return 3\n')
2220        self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
2221                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
2222
2223    def test_bug_200708_close(self):
2224        # Test default builder.
2225        parser = ET.XMLParser() # default
2226        parser.feed("<element>some text</element>")
2227        self.assertEqual(parser.close().tag, 'element')
2228
2229        # Test custom builder.
2230        class EchoTarget:
2231            def close(self):
2232                return ET.Element("element") # simulate root
2233        parser = ET.XMLParser(target=EchoTarget())
2234        parser.feed("<element>some text</element>")
2235        self.assertEqual(parser.close().tag, 'element')
2236
2237    def test_bug_200709_default_namespace(self):
2238        e = ET.Element("{default}elem")
2239        s = ET.SubElement(e, "{default}elem")
2240        self.assertEqual(serialize(e, default_namespace="default"), # 1
2241                '<elem xmlns="default"><elem /></elem>')
2242
2243        e = ET.Element("{default}elem")
2244        s = ET.SubElement(e, "{default}elem")
2245        s = ET.SubElement(e, "{not-default}elem")
2246        self.assertEqual(serialize(e, default_namespace="default"), # 2
2247            '<elem xmlns="default" xmlns:ns1="not-default">'
2248            '<elem />'
2249            '<ns1:elem />'
2250            '</elem>')
2251
2252        e = ET.Element("{default}elem")
2253        s = ET.SubElement(e, "{default}elem")
2254        s = ET.SubElement(e, "elem") # unprefixed name
2255        with self.assertRaises(ValueError) as cm:
2256            serialize(e, default_namespace="default") # 3
2257        self.assertEqual(str(cm.exception),
2258                'cannot use non-qualified names with default_namespace option')
2259
2260    def test_bug_200709_register_namespace(self):
2261        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2262        self.assertEqual(ET.tostring(e),
2263            b'<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
2264        ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
2265        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
2266        self.assertEqual(ET.tostring(e),
2267            b'<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
2268
2269        # And the Dublin Core namespace is in the default list:
2270
2271        e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
2272        self.assertEqual(ET.tostring(e),
2273            b'<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
2274
2275    def test_bug_200709_element_comment(self):
2276        # Not sure if this can be fixed, really (since the serializer needs
2277        # ET.Comment, not cET.comment).
2278
2279        a = ET.Element('a')
2280        a.append(ET.Comment('foo'))
2281        self.assertEqual(a[0].tag, ET.Comment)
2282
2283        a = ET.Element('a')
2284        a.append(ET.PI('foo'))
2285        self.assertEqual(a[0].tag, ET.PI)
2286
2287    def test_bug_200709_element_insert(self):
2288        a = ET.Element('a')
2289        b = ET.SubElement(a, 'b')
2290        c = ET.SubElement(a, 'c')
2291        d = ET.Element('d')
2292        a.insert(0, d)
2293        self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
2294        a.insert(-1, d)
2295        self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
2296
2297    def test_bug_200709_iter_comment(self):
2298        a = ET.Element('a')
2299        b = ET.SubElement(a, 'b')
2300        comment_b = ET.Comment("TEST-b")
2301        b.append(comment_b)
2302        self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
2303
2304    # --------------------------------------------------------------------
2305    # reported on bugs.python.org
2306
2307    def test_bug_1534630(self):
2308        bob = ET.TreeBuilder()
2309        e = bob.data("data")
2310        e = bob.start("tag", {})
2311        e = bob.end("tag")
2312        e = bob.close()
2313        self.assertEqual(serialize(e), '<tag />')
2314
2315    def test_issue6233(self):
2316        e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
2317                   b'<body>t\xc3\xa3g</body>')
2318        self.assertEqual(ET.tostring(e, 'ascii'),
2319                b"<?xml version='1.0' encoding='ascii'?>\n"
2320                b'<body>t&#227;g</body>')
2321        e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
2322                   b'<body>t\xe3g</body>')
2323        self.assertEqual(ET.tostring(e, 'ascii'),
2324                b"<?xml version='1.0' encoding='ascii'?>\n"
2325                b'<body>t&#227;g</body>')
2326
2327    def test_issue6565(self):
2328        elem = ET.XML("<body><tag/></body>")
2329        self.assertEqual(summarize_list(elem), ['tag'])
2330        newelem = ET.XML(SAMPLE_XML)
2331        elem[:] = newelem[:]
2332        self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
2333
2334    def test_issue10777(self):
2335        # Registering a namespace twice caused a "dictionary changed size during
2336        # iteration" bug.
2337
2338        ET.register_namespace('test10777', 'http://myuri/')
2339        ET.register_namespace('test10777', 'http://myuri/')
2340
2341    def test_lost_text(self):
2342        # Issue #25902: Borrowed text can disappear
2343        class Text:
2344            def __bool__(self):
2345                e.text = 'changed'
2346                return True
2347
2348        e = ET.Element('tag')
2349        e.text = Text()
2350        i = e.itertext()
2351        t = next(i)
2352        self.assertIsInstance(t, Text)
2353        self.assertIsInstance(e.text, str)
2354        self.assertEqual(e.text, 'changed')
2355
2356    def test_lost_tail(self):
2357        # Issue #25902: Borrowed tail can disappear
2358        class Text:
2359            def __bool__(self):
2360                e[0].tail = 'changed'
2361                return True
2362
2363        e = ET.Element('root')
2364        e.append(ET.Element('tag'))
2365        e[0].tail = Text()
2366        i = e.itertext()
2367        t = next(i)
2368        self.assertIsInstance(t, Text)
2369        self.assertIsInstance(e[0].tail, str)
2370        self.assertEqual(e[0].tail, 'changed')
2371
2372    def test_lost_elem(self):
2373        # Issue #25902: Borrowed element can disappear
2374        class Tag:
2375            def __eq__(self, other):
2376                e[0] = ET.Element('changed')
2377                next(i)
2378                return True
2379
2380        e = ET.Element('root')
2381        e.append(ET.Element(Tag()))
2382        e.append(ET.Element('tag'))
2383        i = e.iter('tag')
2384        try:
2385            t = next(i)
2386        except ValueError:
2387            self.skipTest('generators are not reentrant')
2388        self.assertIsInstance(t.tag, Tag)
2389        self.assertIsInstance(e[0].tag, str)
2390        self.assertEqual(e[0].tag, 'changed')
2391
2392    def check_expat224_utf8_bug(self, text):
2393        xml = b'<a b="%s"/>' % text
2394        root = ET.XML(xml)
2395        self.assertEqual(root.get('b'), text.decode('utf-8'))
2396
2397    def test_expat224_utf8_bug(self):
2398        # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
2399        # Check that Expat 2.2.4 fixed the bug.
2400        #
2401        # Test buffer bounds at odd and even positions.
2402
2403        text = b'\xc3\xa0' * 1024
2404        self.check_expat224_utf8_bug(text)
2405
2406        text = b'x' + b'\xc3\xa0' * 1024
2407        self.check_expat224_utf8_bug(text)
2408
2409    def test_expat224_utf8_bug_file(self):
2410        with open(UTF8_BUG_XMLFILE, 'rb') as fp:
2411            raw = fp.read()
2412        root = ET.fromstring(raw)
2413        xmlattr = root.get('b')
2414
2415        # "Parse" manually the XML file to extract the value of the 'b'
2416        # attribute of the <a b='xxx' /> XML element
2417        text = raw.decode('utf-8').strip()
2418        text = text.replace('\r\n', ' ')
2419        text = text[6:-4]
2420        self.assertEqual(root.get('b'), text)
2421
2422    def test_39495_treebuilder_start(self):
2423        self.assertRaises(TypeError, ET.TreeBuilder().start, "tag")
2424        self.assertRaises(TypeError, ET.TreeBuilder().start, "tag", None)
2425
2426    def test_issue123213_correct_extend_exception(self):
2427        # Does not hide the internal exception when extending the element
2428        self.assertRaises(ZeroDivisionError, ET.Element('tag').extend,
2429                          (1/0 for i in range(2)))
2430
2431        # Still raises the TypeError when extending with a non-iterable
2432        self.assertRaises(TypeError, ET.Element('tag').extend, None)
2433
2434        # Preserves the TypeError message when extending with a generator
2435        def f():
2436            raise TypeError("mymessage")
2437
2438        self.assertRaisesRegex(
2439            TypeError, 'mymessage',
2440            ET.Element('tag').extend, (f() for i in range(2)))
2441
2442
2443
2444# --------------------------------------------------------------------
2445
2446
2447class BasicElementTest(ElementTestCase, unittest.TestCase):
2448
2449    def test___init__(self):
2450        tag = "foo"
2451        attrib = { "zix": "wyp" }
2452
2453        element_foo = ET.Element(tag, attrib)
2454
2455        # traits of an element
2456        self.assertIsInstance(element_foo, ET.Element)
2457        self.assertIn("tag", dir(element_foo))
2458        self.assertIn("attrib", dir(element_foo))
2459        self.assertIn("text", dir(element_foo))
2460        self.assertIn("tail", dir(element_foo))
2461
2462        # string attributes have expected values
2463        self.assertEqual(element_foo.tag, tag)
2464        self.assertIsNone(element_foo.text)
2465        self.assertIsNone(element_foo.tail)
2466
2467        # attrib is a copy
2468        self.assertIsNot(element_foo.attrib, attrib)
2469        self.assertEqual(element_foo.attrib, attrib)
2470
2471        # attrib isn't linked
2472        attrib["bar"] = "baz"
2473        self.assertIsNot(element_foo.attrib, attrib)
2474        self.assertNotEqual(element_foo.attrib, attrib)
2475
2476    def test___copy__(self):
2477        element_foo = ET.Element("foo", { "zix": "wyp" })
2478        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2479
2480        element_foo2 = copy.copy(element_foo)
2481
2482        # elements are not the same
2483        self.assertIsNot(element_foo2, element_foo)
2484
2485        # string attributes are equal
2486        self.assertEqual(element_foo2.tag, element_foo.tag)
2487        self.assertEqual(element_foo2.text, element_foo.text)
2488        self.assertEqual(element_foo2.tail, element_foo.tail)
2489
2490        # number of children is the same
2491        self.assertEqual(len(element_foo2), len(element_foo))
2492
2493        # children are the same
2494        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2495            self.assertIs(child1, child2)
2496
2497        # attrib is a copy
2498        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2499
2500    def test___deepcopy__(self):
2501        element_foo = ET.Element("foo", { "zix": "wyp" })
2502        element_foo.append(ET.Element("bar", { "baz": "qix" }))
2503
2504        element_foo2 = copy.deepcopy(element_foo)
2505
2506        # elements are not the same
2507        self.assertIsNot(element_foo2, element_foo)
2508
2509        # string attributes are equal
2510        self.assertEqual(element_foo2.tag, element_foo.tag)
2511        self.assertEqual(element_foo2.text, element_foo.text)
2512        self.assertEqual(element_foo2.tail, element_foo.tail)
2513
2514        # number of children is the same
2515        self.assertEqual(len(element_foo2), len(element_foo))
2516
2517        # children are not the same
2518        for (child1, child2) in itertools.zip_longest(element_foo, element_foo2):
2519            self.assertIsNot(child1, child2)
2520
2521        # attrib is a copy
2522        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2523        self.assertEqual(element_foo2.attrib, element_foo.attrib)
2524
2525        # attrib isn't linked
2526        element_foo.attrib["bar"] = "baz"
2527        self.assertIsNot(element_foo2.attrib, element_foo.attrib)
2528        self.assertNotEqual(element_foo2.attrib, element_foo.attrib)
2529
2530    def test_augmentation_type_errors(self):
2531        e = ET.Element('joe')
2532        self.assertRaises(TypeError, e.append, 'b')
2533        self.assertRaises(TypeError, e.extend, [ET.Element('bar'), 'foo'])
2534        self.assertRaises(TypeError, e.insert, 0, 'foo')
2535        e[:] = [ET.Element('bar')]
2536        with self.assertRaises(TypeError):
2537            e[0] = 'foo'
2538        with self.assertRaises(TypeError):
2539            e[:] = [ET.Element('bar'), 'foo']
2540
2541        if hasattr(e, '__setstate__'):
2542            state = {
2543                'tag': 'tag',
2544                '_children': [None],  # non-Element
2545                'attrib': 'attr',
2546                'tail': 'tail',
2547                'text': 'text',
2548            }
2549            self.assertRaises(TypeError, e.__setstate__, state)
2550
2551        if hasattr(e, '__deepcopy__'):
2552            class E(ET.Element):
2553                def __deepcopy__(self, memo):
2554                    return None  # non-Element
2555            e[:] = [E('bar')]
2556            self.assertRaises(TypeError, copy.deepcopy, e)
2557
2558    def test_cyclic_gc(self):
2559        class Dummy:
2560            pass
2561
2562        # Test the shortest cycle: d->element->d
2563        d = Dummy()
2564        d.dummyref = ET.Element('joe', attr=d)
2565        wref = weakref.ref(d)
2566        del d
2567        gc_collect()
2568        self.assertIsNone(wref())
2569
2570        # A longer cycle: d->e->e2->d
2571        e = ET.Element('joe')
2572        d = Dummy()
2573        d.dummyref = e
2574        wref = weakref.ref(d)
2575        e2 = ET.SubElement(e, 'foo', attr=d)
2576        del d, e, e2
2577        gc_collect()
2578        self.assertIsNone(wref())
2579
2580        # A cycle between Element objects as children of one another
2581        # e1->e2->e3->e1
2582        e1 = ET.Element('e1')
2583        e2 = ET.Element('e2')
2584        e3 = ET.Element('e3')
2585        e3.append(e1)
2586        e2.append(e3)
2587        e1.append(e2)
2588        wref = weakref.ref(e1)
2589        del e1, e2, e3
2590        gc_collect()
2591        self.assertIsNone(wref())
2592
2593    def test_weakref(self):
2594        flag = False
2595        def wref_cb(w):
2596            nonlocal flag
2597            flag = True
2598        e = ET.Element('e')
2599        wref = weakref.ref(e, wref_cb)
2600        self.assertEqual(wref().tag, 'e')
2601        del e
2602        gc_collect()  # For PyPy or other GCs.
2603        self.assertEqual(flag, True)
2604        self.assertEqual(wref(), None)
2605
2606    def test_get_keyword_args(self):
2607        e1 = ET.Element('foo' , x=1, y=2, z=3)
2608        self.assertEqual(e1.get('x', default=7), 1)
2609        self.assertEqual(e1.get('w', default=7), 7)
2610
2611    def test_pickle(self):
2612        # issue #16076: the C implementation wasn't pickleable.
2613        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2614            for dumper, loader in product(self.modules, repeat=2):
2615                e = dumper.Element('foo', bar=42)
2616                e.text = "text goes here"
2617                e.tail = "opposite of head"
2618                dumper.SubElement(e, 'child').append(dumper.Element('grandchild'))
2619                e.append(dumper.Element('child'))
2620                e.findall('.//grandchild')[0].set('attr', 'other value')
2621
2622                e2 = self.pickleRoundTrip(e, 'xml.etree.ElementTree',
2623                                          dumper, loader, proto)
2624
2625                self.assertEqual(e2.tag, 'foo')
2626                self.assertEqual(e2.attrib['bar'], 42)
2627                self.assertEqual(len(e2), 2)
2628                self.assertEqualElements(e, e2)
2629
2630    def test_pickle_issue18997(self):
2631        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
2632            for dumper, loader in product(self.modules, repeat=2):
2633                XMLTEXT = """<?xml version="1.0"?>
2634                    <group><dogs>4</dogs>
2635                    </group>"""
2636                e1 = dumper.fromstring(XMLTEXT)
2637                self.assertEqual(e1.__getstate__()['tag'], 'group')
2638                e2 = self.pickleRoundTrip(e1, 'xml.etree.ElementTree',
2639                                          dumper, loader, proto)
2640                self.assertEqual(e2.tag, 'group')
2641                self.assertEqual(e2[0].tag, 'dogs')
2642
2643
2644class BadElementTest(ElementTestCase, unittest.TestCase):
2645    def test_extend_mutable_list(self):
2646        class X:
2647            @property
2648            def __class__(self):
2649                L[:] = [ET.Element('baz')]
2650                return ET.Element
2651        L = [X()]
2652        e = ET.Element('foo')
2653        try:
2654            e.extend(L)
2655        except TypeError:
2656            pass
2657
2658        class Y(X, ET.Element):
2659            pass
2660        L = [Y('x')]
2661        e = ET.Element('foo')
2662        e.extend(L)
2663
2664    def test_extend_mutable_list2(self):
2665        class X:
2666            @property
2667            def __class__(self):
2668                del L[:]
2669                return ET.Element
2670        L = [X(), ET.Element('baz')]
2671        e = ET.Element('foo')
2672        try:
2673            e.extend(L)
2674        except TypeError:
2675            pass
2676
2677        class Y(X, ET.Element):
2678            pass
2679        L = [Y('bar'), ET.Element('baz')]
2680        e = ET.Element('foo')
2681        e.extend(L)
2682
2683    def test_remove_with_mutating(self):
2684        class X(ET.Element):
2685            def __eq__(self, o):
2686                del e[:]
2687                return False
2688        e = ET.Element('foo')
2689        e.extend([X('bar')])
2690        self.assertRaises(ValueError, e.remove, ET.Element('baz'))
2691
2692        e = ET.Element('foo')
2693        e.extend([ET.Element('bar')])
2694        self.assertRaises(ValueError, e.remove, X('baz'))
2695
2696    @support.infinite_recursion(25)
2697    def test_recursive_repr(self):
2698        # Issue #25455
2699        e = ET.Element('foo')
2700        with swap_attr(e, 'tag', e):
2701            with self.assertRaises(RuntimeError):
2702                repr(e)  # Should not crash
2703
2704    def test_element_get_text(self):
2705        # Issue #27863
2706        class X(str):
2707            def __del__(self):
2708                try:
2709                    elem.text
2710                except NameError:
2711                    pass
2712
2713        b = ET.TreeBuilder()
2714        b.start('tag', {})
2715        b.data('ABCD')
2716        b.data(X('EFGH'))
2717        b.data('IJKL')
2718        b.end('tag')
2719
2720        elem = b.close()
2721        self.assertEqual(elem.text, 'ABCDEFGHIJKL')
2722
2723    def test_element_get_tail(self):
2724        # Issue #27863
2725        class X(str):
2726            def __del__(self):
2727                try:
2728                    elem[0].tail
2729                except NameError:
2730                    pass
2731
2732        b = ET.TreeBuilder()
2733        b.start('root', {})
2734        b.start('tag', {})
2735        b.end('tag')
2736        b.data('ABCD')
2737        b.data(X('EFGH'))
2738        b.data('IJKL')
2739        b.end('root')
2740
2741        elem = b.close()
2742        self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
2743
2744    def test_subscr(self):
2745        # Issue #27863
2746        class X:
2747            def __index__(self):
2748                del e[:]
2749                return 1
2750
2751        e = ET.Element('elem')
2752        e.append(ET.Element('child'))
2753        e[:X()]  # shouldn't crash
2754
2755        e.append(ET.Element('child'))
2756        e[0:10:X()]  # shouldn't crash
2757
2758    def test_ass_subscr(self):
2759        # Issue #27863
2760        class X:
2761            def __index__(self):
2762                e[:] = []
2763                return 1
2764
2765        e = ET.Element('elem')
2766        for _ in range(10):
2767            e.insert(0, ET.Element('child'))
2768
2769        e[0:10:X()] = []  # shouldn't crash
2770
2771    def test_treebuilder_start(self):
2772        # Issue #27863
2773        def element_factory(x, y):
2774            return []
2775        b = ET.TreeBuilder(element_factory=element_factory)
2776
2777        b.start('tag', {})
2778        b.data('ABCD')
2779        self.assertRaises(AttributeError, b.start, 'tag2', {})
2780        del b
2781        gc_collect()
2782
2783    def test_treebuilder_end(self):
2784        # Issue #27863
2785        def element_factory(x, y):
2786            return []
2787        b = ET.TreeBuilder(element_factory=element_factory)
2788
2789        b.start('tag', {})
2790        b.data('ABCD')
2791        self.assertRaises(AttributeError, b.end, 'tag')
2792        del b
2793        gc_collect()
2794
2795
2796class MutatingElementPath(str):
2797    def __new__(cls, elem, *args):
2798        self = str.__new__(cls, *args)
2799        self.elem = elem
2800        return self
2801    def __eq__(self, o):
2802        del self.elem[:]
2803        return True
2804MutatingElementPath.__hash__ = str.__hash__
2805
2806class BadElementPath(str):
2807    def __eq__(self, o):
2808        raise 1/0
2809BadElementPath.__hash__ = str.__hash__
2810
2811class BadElementPathTest(ElementTestCase, unittest.TestCase):
2812    def setUp(self):
2813        super().setUp()
2814        from xml.etree import ElementPath
2815        self.path_cache = ElementPath._cache
2816        ElementPath._cache = {}
2817
2818    def tearDown(self):
2819        from xml.etree import ElementPath
2820        ElementPath._cache = self.path_cache
2821        super().tearDown()
2822
2823    def test_find_with_mutating(self):
2824        e = ET.Element('foo')
2825        e.extend([ET.Element('bar')])
2826        e.find(MutatingElementPath(e, 'x'))
2827
2828    def test_find_with_error(self):
2829        e = ET.Element('foo')
2830        e.extend([ET.Element('bar')])
2831        try:
2832            e.find(BadElementPath('x'))
2833        except ZeroDivisionError:
2834            pass
2835
2836    def test_findtext_with_mutating(self):
2837        e = ET.Element('foo')
2838        e.extend([ET.Element('bar')])
2839        e.findtext(MutatingElementPath(e, 'x'))
2840
2841    def test_findtext_with_error(self):
2842        e = ET.Element('foo')
2843        e.extend([ET.Element('bar')])
2844        try:
2845            e.findtext(BadElementPath('x'))
2846        except ZeroDivisionError:
2847            pass
2848
2849    def test_findtext_with_falsey_text_attribute(self):
2850        root_elem = ET.Element('foo')
2851        sub_elem = ET.SubElement(root_elem, 'bar')
2852        falsey = ["", 0, False, [], (), {}]
2853        for val in falsey:
2854            sub_elem.text = val
2855            self.assertEqual(root_elem.findtext('./bar'), val)
2856
2857    def test_findtext_with_none_text_attribute(self):
2858        root_elem = ET.Element('foo')
2859        sub_elem = ET.SubElement(root_elem, 'bar')
2860        sub_elem.text = None
2861        self.assertEqual(root_elem.findtext('./bar'), '')
2862
2863    def test_findall_with_mutating(self):
2864        e = ET.Element('foo')
2865        e.extend([ET.Element('bar')])
2866        e.findall(MutatingElementPath(e, 'x'))
2867
2868    def test_findall_with_error(self):
2869        e = ET.Element('foo')
2870        e.extend([ET.Element('bar')])
2871        try:
2872            e.findall(BadElementPath('x'))
2873        except ZeroDivisionError:
2874            pass
2875
2876
2877class ElementTreeTypeTest(unittest.TestCase):
2878    def test_istype(self):
2879        self.assertIsInstance(ET.ParseError, type)
2880        self.assertIsInstance(ET.QName, type)
2881        self.assertIsInstance(ET.ElementTree, type)
2882        self.assertIsInstance(ET.Element, type)
2883        self.assertIsInstance(ET.TreeBuilder, type)
2884        self.assertIsInstance(ET.XMLParser, type)
2885
2886    def test_Element_subclass_trivial(self):
2887        class MyElement(ET.Element):
2888            pass
2889
2890        mye = MyElement('foo')
2891        self.assertIsInstance(mye, ET.Element)
2892        self.assertIsInstance(mye, MyElement)
2893        self.assertEqual(mye.tag, 'foo')
2894
2895        # test that attribute assignment works (issue 14849)
2896        mye.text = "joe"
2897        self.assertEqual(mye.text, "joe")
2898
2899    def test_Element_subclass_constructor(self):
2900        class MyElement(ET.Element):
2901            def __init__(self, tag, attrib={}, **extra):
2902                super(MyElement, self).__init__(tag + '__', attrib, **extra)
2903
2904        mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
2905        self.assertEqual(mye.tag, 'foo__')
2906        self.assertEqual(sorted(mye.items()),
2907            [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
2908
2909    def test_Element_subclass_new_method(self):
2910        class MyElement(ET.Element):
2911            def newmethod(self):
2912                return self.tag
2913
2914        mye = MyElement('joe')
2915        self.assertEqual(mye.newmethod(), 'joe')
2916
2917    def test_Element_subclass_find(self):
2918        class MyElement(ET.Element):
2919            pass
2920
2921        e = ET.Element('foo')
2922        e.text = 'text'
2923        sub = MyElement('bar')
2924        sub.text = 'subtext'
2925        e.append(sub)
2926        self.assertEqual(e.findtext('bar'), 'subtext')
2927        self.assertEqual(e.find('bar').tag, 'bar')
2928        found = list(e.findall('bar'))
2929        self.assertEqual(len(found), 1, found)
2930        self.assertEqual(found[0].tag, 'bar')
2931
2932
2933class ElementFindTest(unittest.TestCase):
2934    def test_find_simple(self):
2935        e = ET.XML(SAMPLE_XML)
2936        self.assertEqual(e.find('tag').tag, 'tag')
2937        self.assertEqual(e.find('section/tag').tag, 'tag')
2938        self.assertEqual(e.find('./tag').tag, 'tag')
2939
2940        e[2] = ET.XML(SAMPLE_SECTION)
2941        self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
2942
2943        self.assertEqual(e.findtext('./tag'), 'text')
2944        self.assertEqual(e.findtext('section/tag'), 'subtext')
2945
2946        # section/nexttag is found but has no text
2947        self.assertEqual(e.findtext('section/nexttag'), '')
2948        self.assertEqual(e.findtext('section/nexttag', 'default'), '')
2949
2950        # tog doesn't exist and 'default' kicks in
2951        self.assertIsNone(e.findtext('tog'))
2952        self.assertEqual(e.findtext('tog', 'default'), 'default')
2953
2954        # Issue #16922
2955        self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
2956
2957    def test_find_xpath(self):
2958        LINEAR_XML = '''
2959        <body>
2960            <tag class='a'/>
2961            <tag class='b'/>
2962            <tag class='c'/>
2963            <tag class='d'/>
2964        </body>'''
2965        e = ET.XML(LINEAR_XML)
2966
2967        # Test for numeric indexing and last()
2968        self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
2969        self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
2970        self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
2971        self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
2972        self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
2973
2974        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[0]')
2975        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[-1]')
2976        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
2977        self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
2978
2979    def test_findall(self):
2980        e = ET.XML(SAMPLE_XML)
2981        e[2] = ET.XML(SAMPLE_SECTION)
2982        self.assertEqual(summarize_list(e.findall('.')), ['body'])
2983        self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
2984        self.assertEqual(summarize_list(e.findall('tog')), [])
2985        self.assertEqual(summarize_list(e.findall('tog/foo')), [])
2986        self.assertEqual(summarize_list(e.findall('*')),
2987            ['tag', 'tag', 'section'])
2988        self.assertEqual(summarize_list(e.findall('.//tag')),
2989            ['tag'] * 4)
2990        self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
2991        self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
2992        self.assertEqual(summarize_list(e.findall('section/*')),
2993            ['tag', 'nexttag', 'nextsection'])
2994        self.assertEqual(summarize_list(e.findall('section//*')),
2995            ['tag', 'nexttag', 'nextsection', 'tag'])
2996        self.assertEqual(summarize_list(e.findall('section/.//*')),
2997            ['tag', 'nexttag', 'nextsection', 'tag'])
2998        self.assertEqual(summarize_list(e.findall('*/*')),
2999            ['tag', 'nexttag', 'nextsection'])
3000        self.assertEqual(summarize_list(e.findall('*//*')),
3001            ['tag', 'nexttag', 'nextsection', 'tag'])
3002        self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
3003        self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
3004        self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
3005        self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
3006
3007        self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
3008            ['tag'] * 3)
3009        self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
3010            ['tag'])
3011        self.assertEqual(summarize_list(e.findall('.//tag[@class!="a"]')),
3012            ['tag'] * 2)
3013        self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
3014            ['tag'] * 2)
3015        self.assertEqual(summarize_list(e.findall('.//tag[@class!="b"]')),
3016            ['tag'])
3017        self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
3018            ['tag'])
3019        self.assertEqual(summarize_list(e.findall('.//section[tag]')),
3020            ['section'])
3021        self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
3022        self.assertEqual(summarize_list(e.findall('../tag')), [])
3023        self.assertEqual(summarize_list(e.findall('section/../tag')),
3024            ['tag'] * 2)
3025        self.assertEqual(e.findall('section//'), e.findall('section//*'))
3026
3027        self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
3028            ['section'])
3029        self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
3030            ['section'])
3031        self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
3032            ['section'])
3033        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
3034            ['section'])
3035        self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
3036            ['section'])
3037
3038        # Negations of above tests. They match nothing because the sole section
3039        # tag has subtext.
3040        self.assertEqual(summarize_list(e.findall(".//section[tag!='subtext']")),
3041            [])
3042        self.assertEqual(summarize_list(e.findall(".//section[tag !='subtext']")),
3043            [])
3044        self.assertEqual(summarize_list(e.findall(".//section[tag!= 'subtext']")),
3045            [])
3046        self.assertEqual(summarize_list(e.findall(".//section[tag != 'subtext']")),
3047            [])
3048        self.assertEqual(summarize_list(e.findall(".//section[ tag != 'subtext' ]")),
3049            [])
3050
3051        self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
3052                         ['tag'])
3053        self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
3054                         ['tag'])
3055        self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
3056                         ['tag'])
3057        self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
3058                         ['tag'])
3059        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
3060                         ['tag'])
3061        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
3062                         [])
3063        self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
3064                         [])
3065
3066        # Negations of above tests.
3067        #   Matches everything but the tag containing subtext
3068        self.assertEqual(summarize_list(e.findall(".//tag[.!='subtext']")),
3069                         ['tag'] * 3)
3070        self.assertEqual(summarize_list(e.findall(".//tag[. !='subtext']")),
3071                         ['tag'] * 3)
3072        self.assertEqual(summarize_list(e.findall('.//tag[.!= "subtext"]')),
3073                         ['tag'] * 3)
3074        self.assertEqual(summarize_list(e.findall('.//tag[ . != "subtext" ]')),
3075                         ['tag'] * 3)
3076        self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext']")),
3077                         ['tag'] * 3)
3078        # Matches all tags.
3079        self.assertEqual(summarize_list(e.findall(".//tag[. != 'subtext ']")),
3080                         ['tag'] * 4)
3081        self.assertEqual(summarize_list(e.findall(".//tag[.!= ' subtext']")),
3082                         ['tag'] * 4)
3083
3084        # duplicate section => 2x tag matches
3085        e[1] = e[2]
3086        self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
3087                         ['section', 'section'])
3088        self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
3089                         ['tag', 'tag'])
3090
3091    def test_test_find_with_ns(self):
3092        e = ET.XML(SAMPLE_XML_NS)
3093        self.assertEqual(summarize_list(e.findall('tag')), [])
3094        self.assertEqual(
3095            summarize_list(e.findall("{http://effbot.org/ns}tag")),
3096            ['{http://effbot.org/ns}tag'] * 2)
3097        self.assertEqual(
3098            summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
3099            ['{http://effbot.org/ns}tag'] * 3)
3100
3101    def test_findall_different_nsmaps(self):
3102        root = ET.XML('''
3103            <a xmlns:x="X" xmlns:y="Y">
3104                <x:b><c/></x:b>
3105                <b/>
3106                <c><x:b/><b/></c><y:b/>
3107            </a>''')
3108        nsmap = {'xx': 'X'}
3109        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3110        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3111        nsmap = {'xx': 'Y'}
3112        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1)
3113        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2)
3114        nsmap = {'xx': 'X', '': 'Y'}
3115        self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
3116        self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
3117
3118    def test_findall_wildcard(self):
3119        root = ET.XML('''
3120            <a xmlns:x="X" xmlns:y="Y">
3121                <x:b><c/></x:b>
3122                <b/>
3123                <c><x:b/><b/></c><y:b/>
3124            </a>''')
3125        root.append(ET.Comment('test'))
3126
3127        self.assertEqual(summarize_list(root.findall("{*}b")),
3128                         ['{X}b', 'b', '{Y}b'])
3129        self.assertEqual(summarize_list(root.findall("{*}c")),
3130                         ['c'])
3131        self.assertEqual(summarize_list(root.findall("{X}*")),
3132                         ['{X}b'])
3133        self.assertEqual(summarize_list(root.findall("{Y}*")),
3134                         ['{Y}b'])
3135        self.assertEqual(summarize_list(root.findall("{}*")),
3136                         ['b', 'c'])
3137        self.assertEqual(summarize_list(root.findall("{}b")),  # only for consistency
3138                         ['b'])
3139        self.assertEqual(summarize_list(root.findall("{}b")),
3140                         summarize_list(root.findall("b")))
3141        self.assertEqual(summarize_list(root.findall("{*}*")),
3142                         ['{X}b', 'b', 'c', '{Y}b'])
3143        # This is an unfortunate difference, but that's how find('*') works.
3144        self.assertEqual(summarize_list(root.findall("{*}*") + [root[-1]]),
3145                         summarize_list(root.findall("*")))
3146
3147        self.assertEqual(summarize_list(root.findall(".//{*}b")),
3148                         ['{X}b', 'b', '{X}b', 'b', '{Y}b'])
3149        self.assertEqual(summarize_list(root.findall(".//{*}c")),
3150                         ['c', 'c'])
3151        self.assertEqual(summarize_list(root.findall(".//{X}*")),
3152                         ['{X}b', '{X}b'])
3153        self.assertEqual(summarize_list(root.findall(".//{Y}*")),
3154                         ['{Y}b'])
3155        self.assertEqual(summarize_list(root.findall(".//{}*")),
3156                         ['c', 'b', 'c', 'b'])
3157        self.assertEqual(summarize_list(root.findall(".//{}b")),  # only for consistency
3158                         ['b', 'b'])
3159        self.assertEqual(summarize_list(root.findall(".//{}b")),
3160                         summarize_list(root.findall(".//b")))
3161
3162    def test_bad_find(self):
3163        e = ET.XML(SAMPLE_XML)
3164        with self.assertRaisesRegex(SyntaxError, 'cannot use absolute path'):
3165            e.findall('/tag')
3166
3167    def test_find_through_ElementTree(self):
3168        e = ET.XML(SAMPLE_XML)
3169        self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
3170        self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
3171        self.assertEqual(summarize_list(ET.ElementTree(e).findall('tag')),
3172            ['tag'] * 2)
3173        # this produces a warning
3174        msg = ("This search is broken in 1.3 and earlier, and will be fixed "
3175               "in a future version.  If you rely on the current behaviour, "
3176               "change it to '.+'")
3177        with self.assertWarnsRegex(FutureWarning, msg):
3178            it = ET.ElementTree(e).findall('//tag')
3179        self.assertEqual(summarize_list(it), ['tag'] * 3)
3180
3181
3182class ElementIterTest(unittest.TestCase):
3183    def _ilist(self, elem, tag=None):
3184        return summarize_list(elem.iter(tag))
3185
3186    def test_basic(self):
3187        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
3188        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
3189        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
3190        self.assertEqual(next(doc.iter()).tag, 'html')
3191        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
3192        self.assertEqual(''.join(doc.find('body').itertext()),
3193            'this is a paragraph.')
3194        self.assertEqual(next(doc.itertext()), 'this is a ')
3195
3196        # iterparse should return an iterator
3197        sourcefile = serialize(doc, to_string=False)
3198        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
3199
3200        # With an explicit parser too (issue #9708)
3201        sourcefile = serialize(doc, to_string=False)
3202        parser = ET.XMLParser(target=ET.TreeBuilder())
3203        self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0], 'end')
3204
3205        tree = ET.ElementTree(None)
3206        self.assertRaises(AttributeError, tree.iter)
3207
3208        # Issue #16913
3209        doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
3210        self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
3211
3212    def test_corners(self):
3213        # single root, no subelements
3214        a = ET.Element('a')
3215        self.assertEqual(self._ilist(a), ['a'])
3216
3217        # one child
3218        b = ET.SubElement(a, 'b')
3219        self.assertEqual(self._ilist(a), ['a', 'b'])
3220
3221        # one child and one grandchild
3222        c = ET.SubElement(b, 'c')
3223        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
3224
3225        # two children, only first with grandchild
3226        d = ET.SubElement(a, 'd')
3227        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
3228
3229        # replace first child by second
3230        a[0] = a[1]
3231        del a[1]
3232        self.assertEqual(self._ilist(a), ['a', 'd'])
3233
3234    def test_iter_by_tag(self):
3235        doc = ET.XML('''
3236            <document>
3237                <house>
3238                    <room>bedroom1</room>
3239                    <room>bedroom2</room>
3240                </house>
3241                <shed>nothing here
3242                </shed>
3243                <house>
3244                    <room>bedroom8</room>
3245                </house>
3246            </document>''')
3247
3248        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
3249        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
3250
3251        # test that iter also accepts 'tag' as a keyword arg
3252        self.assertEqual(
3253            summarize_list(doc.iter(tag='room')),
3254            ['room'] * 3)
3255
3256        # make sure both tag=None and tag='*' return all tags
3257        all_tags = ['document', 'house', 'room', 'room',
3258                    'shed', 'house', 'room']
3259        self.assertEqual(summarize_list(doc.iter()), all_tags)
3260        self.assertEqual(self._ilist(doc), all_tags)
3261        self.assertEqual(self._ilist(doc, '*'), all_tags)
3262
3263    def test_copy(self):
3264        a = ET.Element('a')
3265        it = a.iter()
3266        with self.assertRaises(TypeError):
3267            copy.copy(it)
3268
3269    def test_pickle(self):
3270        a = ET.Element('a')
3271        it = a.iter()
3272        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
3273            with self.assertRaises((TypeError, pickle.PicklingError)):
3274                pickle.dumps(it, proto)
3275
3276
3277class TreeBuilderTest(unittest.TestCase):
3278    sample1 = ('<!DOCTYPE html PUBLIC'
3279        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3280        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3281        '<html>text<div>subtext</div>tail</html>')
3282
3283    sample2 = '''<toplevel>sometext</toplevel>'''
3284
3285    def _check_sample1_element(self, e):
3286        self.assertEqual(e.tag, 'html')
3287        self.assertEqual(e.text, 'text')
3288        self.assertEqual(e.tail, None)
3289        self.assertEqual(e.attrib, {})
3290        children = list(e)
3291        self.assertEqual(len(children), 1)
3292        child = children[0]
3293        self.assertEqual(child.tag, 'div')
3294        self.assertEqual(child.text, 'subtext')
3295        self.assertEqual(child.tail, 'tail')
3296        self.assertEqual(child.attrib, {})
3297
3298    def test_dummy_builder(self):
3299        class BaseDummyBuilder:
3300            def close(self):
3301                return 42
3302
3303        class DummyBuilder(BaseDummyBuilder):
3304            data = start = end = lambda *a: None
3305
3306        parser = ET.XMLParser(target=DummyBuilder())
3307        parser.feed(self.sample1)
3308        self.assertEqual(parser.close(), 42)
3309
3310        parser = ET.XMLParser(target=BaseDummyBuilder())
3311        parser.feed(self.sample1)
3312        self.assertEqual(parser.close(), 42)
3313
3314        parser = ET.XMLParser(target=object())
3315        parser.feed(self.sample1)
3316        self.assertIsNone(parser.close())
3317
3318    def test_treebuilder_comment(self):
3319        b = ET.TreeBuilder()
3320        self.assertEqual(b.comment('ctext').tag, ET.Comment)
3321        self.assertEqual(b.comment('ctext').text, 'ctext')
3322
3323        b = ET.TreeBuilder(comment_factory=ET.Comment)
3324        self.assertEqual(b.comment('ctext').tag, ET.Comment)
3325        self.assertEqual(b.comment('ctext').text, 'ctext')
3326
3327        b = ET.TreeBuilder(comment_factory=len)
3328        self.assertEqual(b.comment('ctext'), len('ctext'))
3329
3330    def test_treebuilder_pi(self):
3331        b = ET.TreeBuilder()
3332        self.assertEqual(b.pi('target', None).tag, ET.PI)
3333        self.assertEqual(b.pi('target', None).text, 'target')
3334
3335        b = ET.TreeBuilder(pi_factory=ET.PI)
3336        self.assertEqual(b.pi('target').tag, ET.PI)
3337        self.assertEqual(b.pi('target').text, "target")
3338        self.assertEqual(b.pi('pitarget', ' text ').tag, ET.PI)
3339        self.assertEqual(b.pi('pitarget', ' text ').text, "pitarget  text ")
3340
3341        b = ET.TreeBuilder(pi_factory=lambda target, text: (len(target), text))
3342        self.assertEqual(b.pi('target'), (len('target'), None))
3343        self.assertEqual(b.pi('pitarget', ' text '), (len('pitarget'), ' text '))
3344
3345    def test_late_tail(self):
3346        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3347        class TreeBuilderSubclass(ET.TreeBuilder):
3348            pass
3349
3350        xml = "<a>text<!-- comment -->tail</a>"
3351        a = ET.fromstring(xml)
3352        self.assertEqual(a.text, "texttail")
3353
3354        parser = ET.XMLParser(target=TreeBuilderSubclass())
3355        parser.feed(xml)
3356        a = parser.close()
3357        self.assertEqual(a.text, "texttail")
3358
3359        xml = "<a>text<?pi data?>tail</a>"
3360        a = ET.fromstring(xml)
3361        self.assertEqual(a.text, "texttail")
3362
3363        xml = "<a>text<?pi data?>tail</a>"
3364        parser = ET.XMLParser(target=TreeBuilderSubclass())
3365        parser.feed(xml)
3366        a = parser.close()
3367        self.assertEqual(a.text, "texttail")
3368
3369    def test_late_tail_mix_pi_comments(self):
3370        # Issue #37399: The tail of an ignored comment could overwrite the text before it.
3371        # Test appending tails to comments/pis.
3372        class TreeBuilderSubclass(ET.TreeBuilder):
3373            pass
3374
3375        xml = "<a>text<?pi1?> <!-- comment -->\n<?pi2?>tail</a>"
3376        parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
3377        parser.feed(xml)
3378        a = parser.close()
3379        self.assertEqual(a[0].text, ' comment ')
3380        self.assertEqual(a[0].tail, '\ntail')
3381        self.assertEqual(a.text, "text ")
3382
3383        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_comments=True))
3384        parser.feed(xml)
3385        a = parser.close()
3386        self.assertEqual(a[0].text, ' comment ')
3387        self.assertEqual(a[0].tail, '\ntail')
3388        self.assertEqual(a.text, "text ")
3389
3390        xml = "<a>text<!-- comment -->\n<?pi data?>tail</a>"
3391        parser = ET.XMLParser(target=ET.TreeBuilder(insert_pis=True))
3392        parser.feed(xml)
3393        a = parser.close()
3394        self.assertEqual(a[0].text, 'pi data')
3395        self.assertEqual(a[0].tail, 'tail')
3396        self.assertEqual(a.text, "text\n")
3397
3398        parser = ET.XMLParser(target=TreeBuilderSubclass(insert_pis=True))
3399        parser.feed(xml)
3400        a = parser.close()
3401        self.assertEqual(a[0].text, 'pi data')
3402        self.assertEqual(a[0].tail, 'tail')
3403        self.assertEqual(a.text, "text\n")
3404
3405    def test_treebuilder_elementfactory_none(self):
3406        parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
3407        parser.feed(self.sample1)
3408        e = parser.close()
3409        self._check_sample1_element(e)
3410
3411    def test_subclass(self):
3412        class MyTreeBuilder(ET.TreeBuilder):
3413            def foobar(self, x):
3414                return x * 2
3415
3416        tb = MyTreeBuilder()
3417        self.assertEqual(tb.foobar(10), 20)
3418
3419        parser = ET.XMLParser(target=tb)
3420        parser.feed(self.sample1)
3421
3422        e = parser.close()
3423        self._check_sample1_element(e)
3424
3425    def test_subclass_comment_pi(self):
3426        class MyTreeBuilder(ET.TreeBuilder):
3427            def foobar(self, x):
3428                return x * 2
3429
3430        tb = MyTreeBuilder(comment_factory=ET.Comment, pi_factory=ET.PI)
3431        self.assertEqual(tb.foobar(10), 20)
3432
3433        parser = ET.XMLParser(target=tb)
3434        parser.feed(self.sample1)
3435        parser.feed('<!-- a comment--><?and a pi?>')
3436
3437        e = parser.close()
3438        self._check_sample1_element(e)
3439
3440    def test_element_factory(self):
3441        lst = []
3442        def myfactory(tag, attrib):
3443            nonlocal lst
3444            lst.append(tag)
3445            return ET.Element(tag, attrib)
3446
3447        tb = ET.TreeBuilder(element_factory=myfactory)
3448        parser = ET.XMLParser(target=tb)
3449        parser.feed(self.sample2)
3450        parser.close()
3451
3452        self.assertEqual(lst, ['toplevel'])
3453
3454    def _check_element_factory_class(self, cls):
3455        tb = ET.TreeBuilder(element_factory=cls)
3456
3457        parser = ET.XMLParser(target=tb)
3458        parser.feed(self.sample1)
3459        e = parser.close()
3460        self.assertIsInstance(e, cls)
3461        self._check_sample1_element(e)
3462
3463    def test_element_factory_subclass(self):
3464        class MyElement(ET.Element):
3465            pass
3466        self._check_element_factory_class(MyElement)
3467
3468    def test_element_factory_pure_python_subclass(self):
3469        # Mimic SimpleTAL's behaviour (issue #16089): both versions of
3470        # TreeBuilder should be able to cope with a subclass of the
3471        # pure Python Element class.
3472        base = ET._Element_Py
3473        # Not from a C extension
3474        self.assertEqual(base.__module__, 'xml.etree.ElementTree')
3475        # Force some multiple inheritance with a C class to make things
3476        # more interesting.
3477        class MyElement(base, ValueError):
3478            pass
3479        self._check_element_factory_class(MyElement)
3480
3481    def test_doctype(self):
3482        class DoctypeParser:
3483            _doctype = None
3484
3485            def doctype(self, name, pubid, system):
3486                self._doctype = (name, pubid, system)
3487
3488            def close(self):
3489                return self._doctype
3490
3491        parser = ET.XMLParser(target=DoctypeParser())
3492        parser.feed(self.sample1)
3493
3494        self.assertEqual(parser.close(),
3495            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3496             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3497
3498    def test_builder_lookup_errors(self):
3499        class RaisingBuilder:
3500            def __init__(self, raise_in=None, what=ValueError):
3501                self.raise_in = raise_in
3502                self.what = what
3503
3504            def __getattr__(self, name):
3505                if name == self.raise_in:
3506                    raise self.what(self.raise_in)
3507                def handle(*args):
3508                    pass
3509                return handle
3510
3511        ET.XMLParser(target=RaisingBuilder())
3512        # cET also checks for 'close' and 'doctype', PyET does it only at need
3513        for event in ('start', 'data', 'end', 'comment', 'pi'):
3514            with self.assertRaisesRegex(ValueError, event):
3515                ET.XMLParser(target=RaisingBuilder(event))
3516
3517        ET.XMLParser(target=RaisingBuilder(what=AttributeError))
3518        for event in ('start', 'data', 'end', 'comment', 'pi'):
3519            parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
3520            parser.feed(self.sample1)
3521            self.assertIsNone(parser.close())
3522
3523
3524class XMLParserTest(unittest.TestCase):
3525    sample1 = b'<file><line>22</line></file>'
3526    sample2 = (b'<!DOCTYPE html PUBLIC'
3527        b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
3528        b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
3529        b'<html>text</html>')
3530    sample3 = ('<?xml version="1.0" encoding="iso-8859-1"?>\n'
3531        '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>')
3532
3533    def _check_sample_element(self, e):
3534        self.assertEqual(e.tag, 'file')
3535        self.assertEqual(e[0].tag, 'line')
3536        self.assertEqual(e[0].text, '22')
3537
3538    def test_constructor_args(self):
3539        parser2 = ET.XMLParser(encoding='utf-8',
3540                               target=ET.TreeBuilder())
3541        parser2.feed(self.sample1)
3542        self._check_sample_element(parser2.close())
3543
3544    def test_subclass(self):
3545        class MyParser(ET.XMLParser):
3546            pass
3547        parser = MyParser()
3548        parser.feed(self.sample1)
3549        self._check_sample_element(parser.close())
3550
3551    def test_doctype_warning(self):
3552        with warnings.catch_warnings():
3553            warnings.simplefilter('error', DeprecationWarning)
3554            parser = ET.XMLParser()
3555            parser.feed(self.sample2)
3556            parser.close()
3557
3558    def test_subclass_doctype(self):
3559        _doctype = None
3560        class MyParserWithDoctype(ET.XMLParser):
3561            def doctype(self, *args, **kwargs):
3562                nonlocal _doctype
3563                _doctype = (args, kwargs)
3564
3565        parser = MyParserWithDoctype()
3566        with self.assertWarnsRegex(RuntimeWarning, 'doctype'):
3567            parser.feed(self.sample2)
3568        parser.close()
3569        self.assertIsNone(_doctype)
3570
3571        _doctype = _doctype2 = None
3572        with warnings.catch_warnings():
3573            warnings.simplefilter('error', DeprecationWarning)
3574            warnings.simplefilter('error', RuntimeWarning)
3575            class DoctypeParser:
3576                def doctype(self, name, pubid, system):
3577                    nonlocal _doctype2
3578                    _doctype2 = (name, pubid, system)
3579
3580            parser = MyParserWithDoctype(target=DoctypeParser())
3581            parser.feed(self.sample2)
3582            parser.close()
3583            self.assertIsNone(_doctype)
3584            self.assertEqual(_doctype2,
3585                ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
3586                 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
3587
3588    def test_inherited_doctype(self):
3589        '''Ensure that ordinary usage is not deprecated (Issue 19176)'''
3590        with warnings.catch_warnings():
3591            warnings.simplefilter('error', DeprecationWarning)
3592            warnings.simplefilter('error', RuntimeWarning)
3593            class MyParserWithoutDoctype(ET.XMLParser):
3594                pass
3595            parser = MyParserWithoutDoctype()
3596            parser.feed(self.sample2)
3597            parser.close()
3598
3599    def test_parse_string(self):
3600        parser = ET.XMLParser(target=ET.TreeBuilder())
3601        parser.feed(self.sample3)
3602        e = parser.close()
3603        self.assertEqual(e.tag, 'money')
3604        self.assertEqual(e.attrib['value'], '$\xa3\u20ac\U0001017b')
3605        self.assertEqual(e.text, '$\xa3\u20ac\U0001017b')
3606
3607
3608class NamespaceParseTest(unittest.TestCase):
3609    def test_find_with_namespace(self):
3610        nsmap = {'h': 'hello', 'f': 'foo'}
3611        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
3612
3613        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
3614        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
3615        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
3616
3617
3618class ElementSlicingTest(unittest.TestCase):
3619    def _elem_tags(self, elemlist):
3620        return [e.tag for e in elemlist]
3621
3622    def _subelem_tags(self, elem):
3623        return self._elem_tags(list(elem))
3624
3625    def _make_elem_with_children(self, numchildren):
3626        """Create an Element with a tag 'a', with the given amount of children
3627           named 'a0', 'a1' ... and so on.
3628
3629        """
3630        e = ET.Element('a')
3631        for i in range(numchildren):
3632            ET.SubElement(e, 'a%s' % i)
3633        return e
3634
3635    def test_getslice_single_index(self):
3636        e = self._make_elem_with_children(10)
3637
3638        self.assertEqual(e[1].tag, 'a1')
3639        self.assertEqual(e[-2].tag, 'a8')
3640
3641        self.assertRaises(IndexError, lambda: e[12])
3642        self.assertRaises(IndexError, lambda: e[-12])
3643
3644    def test_getslice_range(self):
3645        e = self._make_elem_with_children(6)
3646
3647        self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
3648        self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
3649        self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
3650        self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
3651        self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
3652        self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
3653
3654    def test_getslice_steps(self):
3655        e = self._make_elem_with_children(10)
3656
3657        self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
3658        self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
3659        self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
3660        self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
3661        self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
3662        self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
3663
3664    def test_getslice_negative_steps(self):
3665        e = self._make_elem_with_children(4)
3666
3667        self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
3668        self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
3669        self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
3670        self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
3671        self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
3672
3673    def test_delslice(self):
3674        e = self._make_elem_with_children(4)
3675        del e[0:2]
3676        self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
3677
3678        e = self._make_elem_with_children(4)
3679        del e[0:]
3680        self.assertEqual(self._subelem_tags(e), [])
3681
3682        e = self._make_elem_with_children(4)
3683        del e[::-1]
3684        self.assertEqual(self._subelem_tags(e), [])
3685
3686        e = self._make_elem_with_children(4)
3687        del e[::-2]
3688        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3689
3690        e = self._make_elem_with_children(4)
3691        del e[1::2]
3692        self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
3693
3694        e = self._make_elem_with_children(2)
3695        del e[::2]
3696        self.assertEqual(self._subelem_tags(e), ['a1'])
3697
3698    def test_setslice_single_index(self):
3699        e = self._make_elem_with_children(4)
3700        e[1] = ET.Element('b')
3701        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3702
3703        e[-2] = ET.Element('c')
3704        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3705
3706        with self.assertRaises(IndexError):
3707            e[5] = ET.Element('d')
3708        with self.assertRaises(IndexError):
3709            e[-5] = ET.Element('d')
3710        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
3711
3712    def test_setslice_range(self):
3713        e = self._make_elem_with_children(4)
3714        e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
3715        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
3716
3717        e = self._make_elem_with_children(4)
3718        e[1:3] = [ET.Element('b')]
3719        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
3720
3721        e = self._make_elem_with_children(4)
3722        e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
3723        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
3724
3725    def test_setslice_steps(self):
3726        e = self._make_elem_with_children(6)
3727        e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
3728        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
3729
3730        e = self._make_elem_with_children(6)
3731        with self.assertRaises(ValueError):
3732            e[1:5:2] = [ET.Element('b')]
3733        with self.assertRaises(ValueError):
3734            e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
3735        with self.assertRaises(ValueError):
3736            e[1:5:2] = []
3737        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
3738
3739        e = self._make_elem_with_children(4)
3740        e[1::sys.maxsize] = [ET.Element('b')]
3741        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3742        e[1::sys.maxsize<<64] = [ET.Element('c')]
3743        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3744
3745    def test_setslice_negative_steps(self):
3746        e = self._make_elem_with_children(4)
3747        e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
3748        self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
3749
3750        e = self._make_elem_with_children(4)
3751        with self.assertRaises(ValueError):
3752            e[2:0:-1] = [ET.Element('b')]
3753        with self.assertRaises(ValueError):
3754            e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
3755        with self.assertRaises(ValueError):
3756            e[2:0:-1] = []
3757        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
3758
3759        e = self._make_elem_with_children(4)
3760        e[1::-sys.maxsize] = [ET.Element('b')]
3761        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
3762        e[1::-sys.maxsize-1] = [ET.Element('c')]
3763        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
3764        e[1::-sys.maxsize<<64] = [ET.Element('d')]
3765        self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
3766
3767    def test_issue123213_setslice_exception(self):
3768        e = ET.Element('tag')
3769        # Does not hide the internal exception when assigning to the element
3770        with self.assertRaises(ZeroDivisionError):
3771            e[:1] = (1/0 for i in range(2))
3772
3773        # Still raises the TypeError when assigning with a non-iterable
3774        with self.assertRaises(TypeError):
3775            e[:1] = None
3776
3777        # Preserve the original TypeError message when assigning.
3778        def f():
3779            raise TypeError("mymessage")
3780
3781        with self.assertRaisesRegex(TypeError, 'mymessage'):
3782            e[:1] = (f() for i in range(2))
3783
3784class IOTest(unittest.TestCase):
3785    def test_encoding(self):
3786        # Test encoding issues.
3787        elem = ET.Element("tag")
3788        elem.text = "abc"
3789        self.assertEqual(serialize(elem), '<tag>abc</tag>')
3790        for enc in ("utf-8", "us-ascii"):
3791            with self.subTest(enc):
3792                self.assertEqual(serialize(elem, encoding=enc),
3793                        b'<tag>abc</tag>')
3794                self.assertEqual(serialize(elem, encoding=enc.upper()),
3795                        b'<tag>abc</tag>')
3796        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3797            with self.subTest(enc):
3798                self.assertEqual(serialize(elem, encoding=enc),
3799                        ("<?xml version='1.0' encoding='%s'?>\n"
3800                         "<tag>abc</tag>" % enc).encode(enc))
3801                upper = enc.upper()
3802                self.assertEqual(serialize(elem, encoding=upper),
3803                        ("<?xml version='1.0' encoding='%s'?>\n"
3804                         "<tag>abc</tag>" % upper).encode(enc))
3805
3806        elem = ET.Element("tag")
3807        elem.text = "<&\"\'>"
3808        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
3809        self.assertEqual(serialize(elem, encoding="utf-8"),
3810                b'<tag>&lt;&amp;"\'&gt;</tag>')
3811        self.assertEqual(serialize(elem, encoding="us-ascii"),
3812                b'<tag>&lt;&amp;"\'&gt;</tag>')
3813        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3814            self.assertEqual(serialize(elem, encoding=enc),
3815                    ("<?xml version='1.0' encoding='%s'?>\n"
3816                     "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
3817
3818        elem = ET.Element("tag")
3819        elem.attrib["key"] = "<&\"\'>"
3820        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
3821        self.assertEqual(serialize(elem, encoding="utf-8"),
3822                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3823        self.assertEqual(serialize(elem, encoding="us-ascii"),
3824                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
3825        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3826            self.assertEqual(serialize(elem, encoding=enc),
3827                    ("<?xml version='1.0' encoding='%s'?>\n"
3828                     "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
3829
3830        elem = ET.Element("tag")
3831        elem.text = '\xe5\xf6\xf6<>'
3832        self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
3833        self.assertEqual(serialize(elem, encoding="utf-8"),
3834                b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
3835        self.assertEqual(serialize(elem, encoding="us-ascii"),
3836                b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
3837        for enc in ("iso-8859-1", "utf-16", "utf-32"):
3838            self.assertEqual(serialize(elem, encoding=enc),
3839                    ("<?xml version='1.0' encoding='%s'?>\n"
3840                     "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
3841
3842        elem = ET.Element("tag")
3843        elem.attrib["key"] = '\xe5\xf6\xf6<>'
3844        self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
3845        self.assertEqual(serialize(elem, encoding="utf-8"),
3846                b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
3847        self.assertEqual(serialize(elem, encoding="us-ascii"),
3848                b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
3849        for enc in ("iso-8859-1", "utf-16", "utf-16le", "utf-16be", "utf-32"):
3850            self.assertEqual(serialize(elem, encoding=enc),
3851                    ("<?xml version='1.0' encoding='%s'?>\n"
3852                     "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
3853
3854    def test_write_to_filename(self):
3855        self.addCleanup(os_helper.unlink, TESTFN)
3856        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3857        tree.write(TESTFN)
3858        with open(TESTFN, 'rb') as f:
3859            self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3860
3861    def test_write_to_filename_with_encoding(self):
3862        self.addCleanup(os_helper.unlink, TESTFN)
3863        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3864        tree.write(TESTFN, encoding='utf-8')
3865        with open(TESTFN, 'rb') as f:
3866            self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3867
3868        tree.write(TESTFN, encoding='ISO-8859-1')
3869        with open(TESTFN, 'rb') as f:
3870            self.assertEqual(f.read(), convlinesep(
3871                             b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3872                             b'''<site>\xf8</site>'''))
3873
3874    def test_write_to_filename_as_unicode(self):
3875        self.addCleanup(os_helper.unlink, TESTFN)
3876        with open(TESTFN, 'w') as f:
3877            encoding = f.encoding
3878        os_helper.unlink(TESTFN)
3879
3880        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3881        tree.write(TESTFN, encoding='unicode')
3882        with open(TESTFN, 'rb') as f:
3883            self.assertEqual(f.read(), b"<site>\xc3\xb8</site>")
3884
3885    def test_write_to_text_file(self):
3886        self.addCleanup(os_helper.unlink, TESTFN)
3887        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3888        with open(TESTFN, 'w', encoding='utf-8') as f:
3889            tree.write(f, encoding='unicode')
3890            self.assertFalse(f.closed)
3891        with open(TESTFN, 'rb') as f:
3892            self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3893
3894        with open(TESTFN, 'w', encoding='ascii', errors='xmlcharrefreplace') as f:
3895            tree.write(f, encoding='unicode')
3896            self.assertFalse(f.closed)
3897        with open(TESTFN, 'rb') as f:
3898            self.assertEqual(f.read(),  b'''<site>&#248;</site>''')
3899
3900        with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
3901            tree.write(f, encoding='unicode')
3902            self.assertFalse(f.closed)
3903        with open(TESTFN, 'rb') as f:
3904            self.assertEqual(f.read(), b'''<site>\xf8</site>''')
3905
3906    def test_write_to_binary_file(self):
3907        self.addCleanup(os_helper.unlink, TESTFN)
3908        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3909        with open(TESTFN, 'wb') as f:
3910            tree.write(f)
3911            self.assertFalse(f.closed)
3912        with open(TESTFN, 'rb') as f:
3913            self.assertEqual(f.read(), b'''<site>&#248;</site>''')
3914
3915    def test_write_to_binary_file_with_encoding(self):
3916        self.addCleanup(os_helper.unlink, TESTFN)
3917        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3918        with open(TESTFN, 'wb') as f:
3919            tree.write(f, encoding='utf-8')
3920            self.assertFalse(f.closed)
3921        with open(TESTFN, 'rb') as f:
3922            self.assertEqual(f.read(), b'''<site>\xc3\xb8</site>''')
3923
3924        with open(TESTFN, 'wb') as f:
3925            tree.write(f, encoding='ISO-8859-1')
3926            self.assertFalse(f.closed)
3927        with open(TESTFN, 'rb') as f:
3928            self.assertEqual(f.read(),
3929                             b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
3930                             b'''<site>\xf8</site>''')
3931
3932    def test_write_to_binary_file_with_bom(self):
3933        self.addCleanup(os_helper.unlink, TESTFN)
3934        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3935        # test BOM writing to buffered file
3936        with open(TESTFN, 'wb') as f:
3937            tree.write(f, encoding='utf-16')
3938            self.assertFalse(f.closed)
3939        with open(TESTFN, 'rb') as f:
3940            self.assertEqual(f.read(),
3941                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3942                    '''<site>\xf8</site>'''.encode("utf-16"))
3943        # test BOM writing to non-buffered file
3944        with open(TESTFN, 'wb', buffering=0) as f:
3945            tree.write(f, encoding='utf-16')
3946            self.assertFalse(f.closed)
3947        with open(TESTFN, 'rb') as f:
3948            self.assertEqual(f.read(),
3949                    '''<?xml version='1.0' encoding='utf-16'?>\n'''
3950                    '''<site>\xf8</site>'''.encode("utf-16"))
3951
3952    def test_read_from_stringio(self):
3953        tree = ET.ElementTree()
3954        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3955        tree.parse(stream)
3956        self.assertEqual(tree.getroot().tag, 'site')
3957
3958    def test_write_to_stringio(self):
3959        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3960        stream = io.StringIO()
3961        tree.write(stream, encoding='unicode')
3962        self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
3963
3964    def test_read_from_bytesio(self):
3965        tree = ET.ElementTree()
3966        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3967        tree.parse(raw)
3968        self.assertEqual(tree.getroot().tag, 'site')
3969
3970    def test_write_to_bytesio(self):
3971        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3972        raw = io.BytesIO()
3973        tree.write(raw)
3974        self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
3975
3976    class dummy:
3977        pass
3978
3979    def test_read_from_user_text_reader(self):
3980        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
3981        reader = self.dummy()
3982        reader.read = stream.read
3983        tree = ET.ElementTree()
3984        tree.parse(reader)
3985        self.assertEqual(tree.getroot().tag, 'site')
3986
3987    def test_write_to_user_text_writer(self):
3988        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
3989        stream = io.StringIO()
3990        writer = self.dummy()
3991        writer.write = stream.write
3992        tree.write(writer, encoding='unicode')
3993        self.assertEqual(stream.getvalue(), '''<site>\xf8</site>''')
3994
3995    def test_read_from_user_binary_reader(self):
3996        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
3997        reader = self.dummy()
3998        reader.read = raw.read
3999        tree = ET.ElementTree()
4000        tree.parse(reader)
4001        self.assertEqual(tree.getroot().tag, 'site')
4002        tree = ET.ElementTree()
4003
4004    def test_write_to_user_binary_writer(self):
4005        tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
4006        raw = io.BytesIO()
4007        writer = self.dummy()
4008        writer.write = raw.write
4009        tree.write(writer)
4010        self.assertEqual(raw.getvalue(), b'''<site>&#248;</site>''')
4011
4012    def test_write_to_user_binary_writer_with_bom(self):
4013        tree = ET.ElementTree(ET.XML('''<site />'''))
4014        raw = io.BytesIO()
4015        writer = self.dummy()
4016        writer.write = raw.write
4017        writer.seekable = lambda: True
4018        writer.tell = raw.tell
4019        tree.write(writer, encoding="utf-16")
4020        self.assertEqual(raw.getvalue(),
4021                '''<?xml version='1.0' encoding='utf-16'?>\n'''
4022                '''<site />'''.encode("utf-16"))
4023
4024    def test_tostringlist_invariant(self):
4025        root = ET.fromstring('<tag>foo</tag>')
4026        self.assertEqual(
4027            ET.tostring(root, 'unicode'),
4028            ''.join(ET.tostringlist(root, 'unicode')))
4029        self.assertEqual(
4030            ET.tostring(root, 'utf-16'),
4031            b''.join(ET.tostringlist(root, 'utf-16')))
4032
4033    def test_short_empty_elements(self):
4034        root = ET.fromstring('<tag>a<x />b<y></y>c</tag>')
4035        self.assertEqual(
4036            ET.tostring(root, 'unicode'),
4037            '<tag>a<x />b<y />c</tag>')
4038        self.assertEqual(
4039            ET.tostring(root, 'unicode', short_empty_elements=True),
4040            '<tag>a<x />b<y />c</tag>')
4041        self.assertEqual(
4042            ET.tostring(root, 'unicode', short_empty_elements=False),
4043            '<tag>a<x></x>b<y></y>c</tag>')
4044
4045
4046class ParseErrorTest(unittest.TestCase):
4047    def test_subclass(self):
4048        self.assertIsInstance(ET.ParseError(), SyntaxError)
4049
4050    def _get_error(self, s):
4051        try:
4052            ET.fromstring(s)
4053        except ET.ParseError as e:
4054            return e
4055
4056    def test_error_position(self):
4057        self.assertEqual(self._get_error('foo').position, (1, 0))
4058        self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
4059        self.assertEqual(self._get_error('foobar<').position, (1, 6))
4060
4061    def test_error_code(self):
4062        import xml.parsers.expat.errors as ERRORS
4063        self.assertEqual(self._get_error('foo').code,
4064                ERRORS.codes[ERRORS.XML_ERROR_SYNTAX])
4065
4066
4067class KeywordArgsTest(unittest.TestCase):
4068    # Test various issues with keyword arguments passed to ET.Element
4069    # constructor and methods
4070    def test_issue14818(self):
4071        x = ET.XML("<a>foo</a>")
4072        self.assertEqual(x.find('a', None),
4073                         x.find(path='a', namespaces=None))
4074        self.assertEqual(x.findtext('a', None, None),
4075                         x.findtext(path='a', default=None, namespaces=None))
4076        self.assertEqual(x.findall('a', None),
4077                         x.findall(path='a', namespaces=None))
4078        self.assertEqual(list(x.iterfind('a', None)),
4079                         list(x.iterfind(path='a', namespaces=None)))
4080
4081        self.assertEqual(ET.Element('a').attrib, {})
4082        elements = [
4083            ET.Element('a', dict(href="#", id="foo")),
4084            ET.Element('a', attrib=dict(href="#", id="foo")),
4085            ET.Element('a', dict(href="#"), id="foo"),
4086            ET.Element('a', href="#", id="foo"),
4087            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
4088        ]
4089        for e in elements:
4090            self.assertEqual(e.tag, 'a')
4091            self.assertEqual(e.attrib, dict(href="#", id="foo"))
4092
4093        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
4094        self.assertEqual(e2.attrib['key1'], 'value1')
4095
4096        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
4097            ET.Element('a', "I'm not a dict")
4098        with self.assertRaisesRegex(TypeError, 'must be dict, not str'):
4099            ET.Element('a', attrib="I'm not a dict")
4100
4101# --------------------------------------------------------------------
4102
4103class NoAcceleratorTest(unittest.TestCase):
4104    @classmethod
4105    def setUpClass(cls):
4106        if ET is not pyET:
4107            raise unittest.SkipTest('only for the Python version')
4108
4109    # Test that the C accelerator was not imported for pyET
4110    def test_correct_import_pyET(self):
4111        # The type of methods defined in Python code is types.FunctionType,
4112        # while the type of methods defined inside _elementtree is
4113        # <class 'wrapper_descriptor'>
4114        self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
4115        self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
4116
4117# --------------------------------------------------------------------
4118
4119class BoolTest(unittest.TestCase):
4120    def test_warning(self):
4121        e = ET.fromstring('<a style="new"></a>')
4122        msg = (
4123            r"Testing an element's truth value will always return True in "
4124            r"future versions.  "
4125            r"Use specific 'len\(elem\)' or 'elem is not None' test instead.")
4126        with self.assertWarnsRegex(DeprecationWarning, msg):
4127            result = bool(e)
4128        # Emulate prior behavior for now
4129        self.assertIs(result, False)
4130
4131        # Element with children
4132        ET.SubElement(e, 'b')
4133        with self.assertWarnsRegex(DeprecationWarning, msg):
4134            new_result = bool(e)
4135        self.assertIs(new_result, True)
4136
4137# --------------------------------------------------------------------
4138
4139def c14n_roundtrip(xml, **options):
4140    return pyET.canonicalize(xml, **options)
4141
4142
4143class C14NTest(unittest.TestCase):
4144    maxDiff = None
4145
4146    #
4147    # simple roundtrip tests (from c14n.py)
4148
4149    def test_simple_roundtrip(self):
4150        # Basics
4151        self.assertEqual(c14n_roundtrip("<doc/>"), '<doc></doc>')
4152        self.assertEqual(c14n_roundtrip("<doc xmlns='uri'/>"), # FIXME
4153                '<doc xmlns="uri"></doc>')
4154        self.assertEqual(c14n_roundtrip("<prefix:doc xmlns:prefix='uri'/>"),
4155            '<prefix:doc xmlns:prefix="uri"></prefix:doc>')
4156        self.assertEqual(c14n_roundtrip("<doc xmlns:prefix='uri'><prefix:bar/></doc>"),
4157            '<doc><prefix:bar xmlns:prefix="uri"></prefix:bar></doc>')
4158        self.assertEqual(c14n_roundtrip("<elem xmlns:wsu='http://docs.oasis-open.org/wss/2004/01/oasis-200401-wss-wssecurity-utility-1.0.xsd' xmlns:SOAP-ENV='http://schemas.xmlsoap.org/soap/envelope/' />"),
4159            '<elem></elem>')
4160
4161        # C14N spec
4162        self.assertEqual(c14n_roundtrip("<doc>Hello, world!<!-- Comment 1 --></doc>"),
4163            '<doc>Hello, world!</doc>')
4164        self.assertEqual(c14n_roundtrip("<value>&#x32;</value>"),
4165            '<value>2</value>')
4166        self.assertEqual(c14n_roundtrip('<compute><![CDATA[value>"0" && value<"10" ?"valid":"error"]]></compute>'),
4167            '<compute>value&gt;"0" &amp;&amp; value&lt;"10" ?"valid":"error"</compute>')
4168        self.assertEqual(c14n_roundtrip('''<compute expr='value>"0" &amp;&amp; value&lt;"10" ?"valid":"error"'>valid</compute>'''),
4169            '<compute expr="value>&quot;0&quot; &amp;&amp; value&lt;&quot;10&quot; ?&quot;valid&quot;:&quot;error&quot;">valid</compute>')
4170        self.assertEqual(c14n_roundtrip("<norm attr=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
4171            '<norm attr=" \'    &#xD;&#xA;&#x9;   \' "></norm>')
4172        self.assertEqual(c14n_roundtrip("<normNames attr='   A   &#x20;&#13;&#xa;&#9;   B   '/>"),
4173            '<normNames attr="   A    &#xD;&#xA;&#x9;   B   "></normNames>')
4174        self.assertEqual(c14n_roundtrip("<normId id=' &apos;   &#x20;&#13;&#xa;&#9;   &apos; '/>"),
4175            '<normId id=" \'    &#xD;&#xA;&#x9;   \' "></normId>')
4176
4177        # fragments from PJ's tests
4178        #self.assertEqual(c14n_roundtrip("<doc xmlns:x='http://example.com/x' xmlns='http://example.com/default'><b y:a1='1' xmlns='http://example.com/default' a3='3' xmlns:y='http://example.com/y' y:a2='2'/></doc>"),
4179        #'<doc xmlns:x="http://example.com/x"><b xmlns:y="http://example.com/y" a3="3" y:a1="1" y:a2="2"></b></doc>')
4180
4181        # Namespace issues
4182        xml = '<X xmlns="http://nps/a"><Y targets="abc,xyz"></Y></X>'
4183        self.assertEqual(c14n_roundtrip(xml), xml)
4184        xml = '<X xmlns="http://nps/a"><Y xmlns="http://nsp/b" targets="abc,xyz"></Y></X>'
4185        self.assertEqual(c14n_roundtrip(xml), xml)
4186        xml = '<X xmlns="http://nps/a"><Y xmlns:b="http://nsp/b" b:targets="abc,xyz"></Y></X>'
4187        self.assertEqual(c14n_roundtrip(xml), xml)
4188
4189    def test_c14n_exclusion(self):
4190        xml = textwrap.dedent("""\
4191        <root xmlns:x="http://example.com/x">
4192            <a x:attr="attrx">
4193                <b>abtext</b>
4194            </a>
4195            <b>btext</b>
4196            <c>
4197                <x:d>dtext</x:d>
4198            </c>
4199        </root>
4200        """)
4201        self.assertEqual(
4202            c14n_roundtrip(xml, strip_text=True),
4203            '<root>'
4204            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
4205            '<b>btext</b>'
4206            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4207            '</root>')
4208        self.assertEqual(
4209            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr']),
4210            '<root>'
4211            '<a><b>abtext</b></a>'
4212            '<b>btext</b>'
4213            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4214            '</root>')
4215        self.assertEqual(
4216            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d']),
4217            '<root>'
4218            '<a xmlns:x="http://example.com/x" x:attr="attrx"><b>abtext</b></a>'
4219            '<b>btext</b>'
4220            '<c></c>'
4221            '</root>')
4222        self.assertEqual(
4223            c14n_roundtrip(xml, strip_text=True, exclude_attrs=['{http://example.com/x}attr'],
4224                           exclude_tags=['{http://example.com/x}d']),
4225            '<root>'
4226            '<a><b>abtext</b></a>'
4227            '<b>btext</b>'
4228            '<c></c>'
4229            '</root>')
4230        self.assertEqual(
4231            c14n_roundtrip(xml, strip_text=True, exclude_tags=['a', 'b']),
4232            '<root>'
4233            '<c><x:d xmlns:x="http://example.com/x">dtext</x:d></c>'
4234            '</root>')
4235        self.assertEqual(
4236            c14n_roundtrip(xml, exclude_tags=['a', 'b']),
4237            '<root>\n'
4238            '    \n'
4239            '    \n'
4240            '    <c>\n'
4241            '        <x:d xmlns:x="http://example.com/x">dtext</x:d>\n'
4242            '    </c>\n'
4243            '</root>')
4244        self.assertEqual(
4245            c14n_roundtrip(xml, strip_text=True, exclude_tags=['{http://example.com/x}d', 'b']),
4246            '<root>'
4247            '<a xmlns:x="http://example.com/x" x:attr="attrx"></a>'
4248            '<c></c>'
4249            '</root>')
4250        self.assertEqual(
4251            c14n_roundtrip(xml, exclude_tags=['{http://example.com/x}d', 'b']),
4252            '<root>\n'
4253            '    <a xmlns:x="http://example.com/x" x:attr="attrx">\n'
4254            '        \n'
4255            '    </a>\n'
4256            '    \n'
4257            '    <c>\n'
4258            '        \n'
4259            '    </c>\n'
4260            '</root>')
4261
4262    #
4263    # basic method=c14n tests from the c14n 2.0 specification.  uses
4264    # test files under xmltestdata/c14n-20.
4265
4266    # note that this uses generated C14N versions of the standard ET.write
4267    # output, not roundtripped C14N (see above).
4268
4269    def test_xml_c14n2(self):
4270        datadir = findfile("c14n-20", subdir="xmltestdata")
4271        full_path = partial(os.path.join, datadir)
4272
4273        files = [filename[:-4] for filename in sorted(os.listdir(datadir))
4274                 if filename.endswith('.xml')]
4275        input_files = [
4276            filename for filename in files
4277            if filename.startswith('in')
4278        ]
4279        configs = {
4280            filename: {
4281                # <c14n2:PrefixRewrite>sequential</c14n2:PrefixRewrite>
4282                option.tag.split('}')[-1]: ((option.text or '').strip(), option)
4283                for option in ET.parse(full_path(filename) + ".xml").getroot()
4284            }
4285            for filename in files
4286            if filename.startswith('c14n')
4287        }
4288
4289        tests = {
4290            input_file: [
4291                (filename, configs[filename.rsplit('_', 1)[-1]])
4292                for filename in files
4293                if filename.startswith(f'out_{input_file}_')
4294                and filename.rsplit('_', 1)[-1] in configs
4295            ]
4296            for input_file in input_files
4297        }
4298
4299        # Make sure we found all test cases.
4300        self.assertEqual(30, len([
4301            output_file for output_files in tests.values()
4302            for output_file in output_files]))
4303
4304        def get_option(config, option_name, default=None):
4305            return config.get(option_name, (default, ()))[0]
4306
4307        for input_file, output_files in tests.items():
4308            for output_file, config in output_files:
4309                keep_comments = get_option(
4310                    config, 'IgnoreComments') == 'true'  # no, it's right :)
4311                strip_text = get_option(
4312                    config, 'TrimTextNodes') == 'true'
4313                rewrite_prefixes = get_option(
4314                    config, 'PrefixRewrite') == 'sequential'
4315                if 'QNameAware' in config:
4316                    qattrs = [
4317                        f"{{{el.get('NS')}}}{el.get('Name')}"
4318                        for el in config['QNameAware'][1].findall(
4319                            '{http://www.w3.org/2010/xml-c14n2}QualifiedAttr')
4320                    ]
4321                    qtags = [
4322                        f"{{{el.get('NS')}}}{el.get('Name')}"
4323                        for el in config['QNameAware'][1].findall(
4324                            '{http://www.w3.org/2010/xml-c14n2}Element')
4325                    ]
4326                else:
4327                    qtags = qattrs = None
4328
4329                # Build subtest description from config.
4330                config_descr = ','.join(
4331                    f"{name}={value or ','.join(c.tag.split('}')[-1] for c in children)}"
4332                    for name, (value, children) in sorted(config.items())
4333                )
4334
4335                with self.subTest(f"{output_file}({config_descr})"):
4336                    if input_file == 'inNsRedecl' and not rewrite_prefixes:
4337                        self.skipTest(
4338                            f"Redeclared namespace handling is not supported in {output_file}")
4339                    if input_file == 'inNsSuperfluous' and not rewrite_prefixes:
4340                        self.skipTest(
4341                            f"Redeclared namespace handling is not supported in {output_file}")
4342                    if 'QNameAware' in config and config['QNameAware'][1].find(
4343                            '{http://www.w3.org/2010/xml-c14n2}XPathElement') is not None:
4344                        self.skipTest(
4345                            f"QName rewriting in XPath text is not supported in {output_file}")
4346
4347                    f = full_path(input_file + ".xml")
4348                    if input_file == 'inC14N5':
4349                        # Hack: avoid setting up external entity resolution in the parser.
4350                        with open(full_path('world.txt'), 'rb') as entity_file:
4351                            with open(f, 'rb') as f:
4352                                f = io.BytesIO(f.read().replace(b'&ent2;', entity_file.read()))
4353
4354                    text = ET.canonicalize(
4355                        from_file=f,
4356                        with_comments=keep_comments,
4357                        strip_text=strip_text,
4358                        rewrite_prefixes=rewrite_prefixes,
4359                        qname_aware_tags=qtags, qname_aware_attrs=qattrs)
4360
4361                    with open(full_path(output_file + ".xml"), 'r', encoding='utf8') as f:
4362                        expected = f.read()
4363                        if input_file == 'inC14N3':
4364                            # FIXME: cET resolves default attributes but ET does not!
4365                            expected = expected.replace(' attr="default"', '')
4366                            text = text.replace(' attr="default"', '')
4367                    self.assertEqual(expected, text)
4368
4369# --------------------------------------------------------------------
4370
4371def setUpModule(module=None):
4372    # When invoked without a module, runs the Python ET tests by loading pyET.
4373    # Otherwise, uses the given module as the ET.
4374    global pyET
4375    pyET = import_fresh_module('xml.etree.ElementTree',
4376                               blocked=['_elementtree'])
4377    if module is None:
4378        module = pyET
4379
4380    global ET
4381    ET = module
4382
4383    # don't interfere with subsequent tests
4384    def cleanup():
4385        global ET, pyET
4386        ET = pyET = None
4387    unittest.addModuleCleanup(cleanup)
4388
4389    # Provide default namespace mapping and path cache.
4390    from xml.etree import ElementPath
4391    nsmap = ET.register_namespace._namespace_map
4392    # Copy the default namespace mapping
4393    nsmap_copy = nsmap.copy()
4394    unittest.addModuleCleanup(nsmap.update, nsmap_copy)
4395    unittest.addModuleCleanup(nsmap.clear)
4396
4397    # Copy the path cache (should be empty)
4398    path_cache = ElementPath._cache
4399    unittest.addModuleCleanup(setattr, ElementPath, "_cache", path_cache)
4400    ElementPath._cache = path_cache.copy()
4401
4402    # Align the Comment/PI factories.
4403    if hasattr(ET, '_set_factories'):
4404        old_factories = ET._set_factories(ET.Comment, ET.PI)
4405        unittest.addModuleCleanup(ET._set_factories, *old_factories)
4406
4407
4408if __name__ == '__main__':
4409    unittest.main()
4410