• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# IMPORTANT: the same tests are run from "test_xml_etree_c" in order
3# to ensure consistency between the C implementation and the Python
4# implementation.
5#
6# For this purpose, the module-level "ET" symbol is temporarily
7# monkey-patched when running the "test_xml_etree_c" test suite.
8
9import cgi
10import copy
11import functools
12import io
13import pickle
14import StringIO
15import sys
16import types
17import unittest
18import warnings
19import weakref
20
21from test import test_support as support
22from test.test_support import TESTFN, findfile, gc_collect, swap_attr
23
24# pyET is the pure-Python implementation.
25#
26# ET is pyET in test_xml_etree and is the C accelerated version in
27# test_xml_etree_c.
28from xml.etree import ElementTree as pyET
29ET = None
30
31SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
32SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
33UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata")
34
35SAMPLE_XML = """\
36<body>
37  <tag class='a'>text</tag>
38  <tag class='b' />
39  <section>
40    <tag class='b' id='inner'>subtext</tag>
41  </section>
42</body>
43"""
44
45SAMPLE_SECTION = """\
46<section>
47  <tag class='b' id='inner'>subtext</tag>
48  <nexttag />
49  <nextsection>
50    <tag />
51  </nextsection>
52</section>
53"""
54
55SAMPLE_XML_NS = """
56<body xmlns="http://effbot.org/ns">
57  <tag>text</tag>
58  <tag />
59  <section>
60    <tag>subtext</tag>
61  </section>
62</body>
63"""
64
65SAMPLE_XML_NS_ELEMS = """
66<root>
67<h:table xmlns:h="hello">
68  <h:tr>
69    <h:td>Apples</h:td>
70    <h:td>Bananas</h:td>
71  </h:tr>
72</h:table>
73
74<f:table xmlns:f="foo">
75  <f:name>African Coffee Table</f:name>
76  <f:width>80</f:width>
77  <f:length>120</f:length>
78</f:table>
79</root>
80"""
81
82ENTITY_XML = """\
83<!DOCTYPE points [
84<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
85%user-entities;
86]>
87<document>&entity;</document>
88"""
89
90
91def checkwarnings(*filters):
92    def decorator(test):
93        def newtest(*args, **kwargs):
94            with support.check_warnings(*filters):
95                test(*args, **kwargs)
96        functools.update_wrapper(newtest, test)
97        return newtest
98    return decorator
99
100
101class ModuleTest(unittest.TestCase):
102    # TODO: this should be removed once we get rid of the global module vars
103
104    def test_sanity(self):
105        # Import sanity.
106
107        from xml.etree import ElementTree
108        from xml.etree import ElementInclude
109        from xml.etree import ElementPath
110
111
112def serialize(elem, to_string=True, **options):
113    file = StringIO.StringIO()
114    tree = ET.ElementTree(elem)
115    tree.write(file, **options)
116    if to_string:
117        return file.getvalue()
118    else:
119        file.seek(0)
120        return file
121
122def summarize_list(seq):
123    return [elem.tag for elem in seq]
124
125def normalize_crlf(tree):
126    for elem in tree.iter():
127        if elem.text:
128            elem.text = elem.text.replace("\r\n", "\n")
129        if elem.tail:
130            elem.tail = elem.tail.replace("\r\n", "\n")
131
132def python_only(test):
133    def wrapper(*args):
134        if ET is not pyET:
135            raise unittest.SkipTest('only for the Python version')
136        return test(*args)
137    return wrapper
138
139def cet_only(test):
140    def wrapper(*args):
141        if ET is pyET:
142            raise unittest.SkipTest('only for the C version')
143        return test(*args)
144    return wrapper
145
146# --------------------------------------------------------------------
147# element tree tests
148
149class ElementTreeTest(unittest.TestCase):
150
151    def serialize_check(self, elem, expected):
152        self.assertEqual(serialize(elem), expected)
153
154    def test_interface(self):
155        # Test element tree interface.
156
157        def check_string(string):
158            len(string)
159            for char in string:
160                self.assertEqual(len(char), 1,
161                        msg="expected one-character string, got %r" % char)
162            new_string = string + ""
163            new_string = string + " "
164            string[:0]
165
166        def check_mapping(mapping):
167            len(mapping)
168            keys = mapping.keys()
169            items = mapping.items()
170            for key in keys:
171                item = mapping[key]
172            mapping["key"] = "value"
173            self.assertEqual(mapping["key"], "value",
174                    msg="expected value string, got %r" % mapping["key"])
175
176        def check_element(element):
177            self.assertTrue(ET.iselement(element), msg="not an element")
178            self.assertTrue(hasattr(element, "tag"), msg="no tag member")
179            self.assertTrue(hasattr(element, "attrib"), msg="no attrib member")
180            self.assertTrue(hasattr(element, "text"), msg="no text member")
181            self.assertTrue(hasattr(element, "tail"), msg="no tail member")
182
183            check_string(element.tag)
184            check_mapping(element.attrib)
185            if element.text is not None:
186                check_string(element.text)
187            if element.tail is not None:
188                check_string(element.tail)
189            for elem in element:
190                check_element(elem)
191
192        element = ET.Element("tag")
193        check_element(element)
194        tree = ET.ElementTree(element)
195        check_element(tree.getroot())
196        element = ET.Element("t\xe4g", key="value")
197        tree = ET.ElementTree(element)
198        self.assertRegexpMatches(repr(element), r"^<Element 't\\xe4g' at 0x.*>$")
199        element = ET.Element("tag", key="value")
200
201        # Make sure all standard element methods exist.
202
203        def check_method(method):
204            self.assertTrue(hasattr(method, '__call__'),
205                    msg="%s not callable" % method)
206
207        check_method(element.append)
208        check_method(element.extend)
209        check_method(element.insert)
210        check_method(element.remove)
211        check_method(element.getchildren)
212        check_method(element.find)
213        check_method(element.iterfind)
214        check_method(element.findall)
215        check_method(element.findtext)
216        check_method(element.clear)
217        check_method(element.get)
218        check_method(element.set)
219        check_method(element.keys)
220        check_method(element.items)
221        check_method(element.iter)
222        check_method(element.itertext)
223        check_method(element.getiterator)
224
225        # These methods return an iterable. See bug 6472.
226
227        def check_iter(it):
228            check_method(it.next)
229
230        check_iter(element.iter("tag"))
231        check_iter(element.iterfind("tag"))
232        check_iter(element.iterfind("*"))
233        check_iter(tree.iter("tag"))
234        check_iter(tree.iterfind("tag"))
235        check_iter(tree.iterfind("*"))
236
237        # These aliases are provided:
238
239        self.assertEqual(ET.XML, ET.fromstring)
240        self.assertEqual(ET.PI, ET.ProcessingInstruction)
241        self.assertEqual(ET.XMLParser, ET.XMLTreeBuilder)
242
243    def test_set_attribute(self):
244        element = ET.Element('tag')
245
246        self.assertEqual(element.tag, 'tag')
247        element.tag = 'Tag'
248        self.assertEqual(element.tag, 'Tag')
249        element.tag = 'TAG'
250        self.assertEqual(element.tag, 'TAG')
251
252        self.assertIsNone(element.text)
253        element.text = 'Text'
254        self.assertEqual(element.text, 'Text')
255        element.text = 'TEXT'
256        self.assertEqual(element.text, 'TEXT')
257
258        self.assertIsNone(element.tail)
259        element.tail = 'Tail'
260        self.assertEqual(element.tail, 'Tail')
261        element.tail = 'TAIL'
262        self.assertEqual(element.tail, 'TAIL')
263
264        self.assertEqual(element.attrib, {})
265        element.attrib = {'a': 'b', 'c': 'd'}
266        self.assertEqual(element.attrib, {'a': 'b', 'c': 'd'})
267        element.attrib = {'A': 'B', 'C': 'D'}
268        self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'})
269
270    def test_simpleops(self):
271        # Basic method sanity checks.
272
273        elem = ET.XML("<body><tag/></body>")
274        self.serialize_check(elem, '<body><tag /></body>')
275        e = ET.Element("tag2")
276        elem.append(e)
277        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
278        elem.remove(e)
279        self.serialize_check(elem, '<body><tag /></body>')
280        elem.insert(0, e)
281        self.serialize_check(elem, '<body><tag2 /><tag /></body>')
282        elem.remove(e)
283        elem.extend([e])
284        self.serialize_check(elem, '<body><tag /><tag2 /></body>')
285        elem.remove(e)
286
287        element = ET.Element("tag", key="value")
288        self.serialize_check(element, '<tag key="value" />') # 1
289        subelement = ET.Element("subtag")
290        element.append(subelement)
291        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 2
292        element.insert(0, subelement)
293        self.serialize_check(element,
294                '<tag key="value"><subtag /><subtag /></tag>') # 3
295        element.remove(subelement)
296        self.serialize_check(element, '<tag key="value"><subtag /></tag>') # 4
297        element.remove(subelement)
298        self.serialize_check(element, '<tag key="value" />') # 5
299        with self.assertRaises(ValueError) as cm:
300            element.remove(subelement)
301        self.assertEqual(str(cm.exception), 'list.remove(x): x not in list')
302        self.serialize_check(element, '<tag key="value" />') # 6
303        element[0:0] = [subelement, subelement, subelement]
304        self.serialize_check(element[1], '<subtag />')
305        self.assertEqual(element[1:9], [element[1], element[2]])
306        self.assertEqual(element[:9:2], [element[0], element[2]])
307        del element[1:2]
308        self.serialize_check(element,
309                '<tag key="value"><subtag /><subtag /></tag>')
310
311    def test_cdata(self):
312        # Test CDATA handling (etc).
313
314        self.serialize_check(ET.XML("<tag>hello</tag>"),
315                '<tag>hello</tag>')
316        self.serialize_check(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"),
317                '<tag>hello</tag>')
318        self.serialize_check(ET.XML("<tag><![CDATA[hello]]></tag>"),
319                '<tag>hello</tag>')
320
321    def test_file_init(self):
322        stringfile = StringIO.StringIO(SAMPLE_XML.encode("utf-8"))
323        tree = ET.ElementTree(file=stringfile)
324        self.assertEqual(tree.find("tag").tag, 'tag')
325        self.assertEqual(tree.find("section/tag").tag, 'tag')
326
327        tree = ET.ElementTree(file=SIMPLE_XMLFILE)
328        self.assertEqual(tree.find("element").tag, 'element')
329        self.assertEqual(tree.find("element/../empty-element").tag,
330                'empty-element')
331
332    def test_path_cache(self):
333        # Check that the path cache behaves sanely.
334
335        from xml.etree import ElementPath
336
337        elem = ET.XML(SAMPLE_XML)
338        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
339        cache_len_10 = len(ElementPath._cache)
340        for i in range(10): ET.ElementTree(elem).find('./'+str(i))
341        self.assertEqual(len(ElementPath._cache), cache_len_10)
342        for i in range(20): ET.ElementTree(elem).find('./'+str(i))
343        self.assertGreater(len(ElementPath._cache), cache_len_10)
344        for i in range(600): ET.ElementTree(elem).find('./'+str(i))
345        self.assertLess(len(ElementPath._cache), 500)
346
347    def test_copy(self):
348        # Test copy handling (etc).
349
350        import copy
351        e1 = ET.XML("<tag>hello<foo/></tag>")
352        e2 = copy.copy(e1)
353        e3 = copy.deepcopy(e1)
354        e1.find("foo").tag = "bar"
355        self.serialize_check(e1, '<tag>hello<bar /></tag>')
356        self.serialize_check(e2, '<tag>hello<bar /></tag>')
357        self.serialize_check(e3, '<tag>hello<foo /></tag>')
358
359    def test_attrib(self):
360        # Test attribute handling.
361
362        elem = ET.Element("tag")
363        elem.get("key") # 1.1
364        self.assertEqual(elem.get("key", "default"), 'default') # 1.2
365
366        elem.set("key", "value")
367        self.assertEqual(elem.get("key"), 'value') # 1.3
368
369        elem = ET.Element("tag", key="value")
370        self.assertEqual(elem.get("key"), 'value') # 2.1
371        self.assertEqual(elem.attrib, {'key': 'value'}) # 2.2
372
373        attrib = {"key": "value"}
374        elem = ET.Element("tag", attrib)
375        attrib.clear() # check for aliasing issues
376        self.assertEqual(elem.get("key"), 'value') # 3.1
377        self.assertEqual(elem.attrib, {'key': 'value'}) # 3.2
378
379        attrib = {"key": "value"}
380        elem = ET.Element("tag", **attrib)
381        attrib.clear() # check for aliasing issues
382        self.assertEqual(elem.get("key"), 'value') # 4.1
383        self.assertEqual(elem.attrib, {'key': 'value'}) # 4.2
384
385        elem = ET.Element("tag", {"key": "other"}, key="value")
386        self.assertEqual(elem.get("key"), 'value') # 5.1
387        self.assertEqual(elem.attrib, {'key': 'value'}) # 5.2
388
389        elem = ET.Element('test')
390        elem.text = "aa"
391        elem.set('testa', 'testval')
392        elem.set('testb', 'test2')
393        self.assertEqual(ET.tostring(elem),
394                b'<test testa="testval" testb="test2">aa</test>')
395        self.assertEqual(sorted(elem.keys()), ['testa', 'testb'])
396        self.assertEqual(sorted(elem.items()),
397                [('testa', 'testval'), ('testb', 'test2')])
398        self.assertEqual(elem.attrib['testb'], 'test2')
399        elem.attrib['testb'] = 'test1'
400        elem.attrib['testc'] = 'test2'
401        self.assertEqual(ET.tostring(elem),
402                b'<test testa="testval" testb="test1" testc="test2">aa</test>')
403
404        elem = ET.Element('test')
405        elem.set('a', '\r')
406        elem.set('b', '\r\n')
407        elem.set('c', '\t\n\r ')
408        elem.set('d', '\n\n')
409        self.assertEqual(ET.tostring(elem),
410                b'<test a="\r" b="\r&#10;" c="\t&#10;\r " d="&#10;&#10;" />')
411
412    def test_makeelement(self):
413        # Test makeelement handling.
414
415        elem = ET.Element("tag")
416        attrib = {"key": "value"}
417        subelem = elem.makeelement("subtag", attrib)
418        self.assertIsNot(subelem.attrib, attrib, msg="attrib aliasing")
419        elem.append(subelem)
420        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
421
422        elem.clear()
423        self.serialize_check(elem, '<tag />')
424        elem.append(subelem)
425        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
426        elem.extend([subelem, subelem])
427        self.serialize_check(elem,
428            '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>')
429        elem[:] = [subelem]
430        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
431        elem[:] = tuple([subelem])
432        self.serialize_check(elem, '<tag><subtag key="value" /></tag>')
433
434    def test_parsefile(self):
435        # Test parsing from file.
436
437        tree = ET.parse(SIMPLE_XMLFILE)
438        normalize_crlf(tree)
439        stream = StringIO.StringIO()
440        tree.write(stream)
441        self.assertEqual(stream.getvalue(),
442                '<root>\n'
443                '   <element key="value">text</element>\n'
444                '   <element>text</element>tail\n'
445                '   <empty-element />\n'
446                '</root>')
447        tree = ET.parse(SIMPLE_NS_XMLFILE)
448        normalize_crlf(tree)
449        stream = StringIO.StringIO()
450        tree.write(stream)
451        self.assertEqual(stream.getvalue(),
452                '<ns0:root xmlns:ns0="namespace">\n'
453                '   <ns0:element key="value">text</ns0:element>\n'
454                '   <ns0:element>text</ns0:element>tail\n'
455                '   <ns0:empty-element />\n'
456                '</ns0:root>')
457
458        with open(SIMPLE_XMLFILE) as f:
459            data = f.read()
460
461        parser = ET.XMLParser()
462        self.assertRegexpMatches(parser.version, r'^Expat ')
463        parser.feed(data)
464        self.serialize_check(parser.close(),
465                '<root>\n'
466                '   <element key="value">text</element>\n'
467                '   <element>text</element>tail\n'
468                '   <empty-element />\n'
469                '</root>')
470
471        parser = ET.XMLTreeBuilder() # 1.2 compatibility
472        parser.feed(data)
473        self.serialize_check(parser.close(),
474                '<root>\n'
475                '   <element key="value">text</element>\n'
476                '   <element>text</element>tail\n'
477                '   <empty-element />\n'
478                '</root>')
479
480        target = ET.TreeBuilder()
481        parser = ET.XMLParser(target=target)
482        parser.feed(data)
483        self.serialize_check(parser.close(),
484                '<root>\n'
485                '   <element key="value">text</element>\n'
486                '   <element>text</element>tail\n'
487                '   <empty-element />\n'
488                '</root>')
489
490    def test_parseliteral(self):
491        element = ET.XML("<html><body>text</body></html>")
492        self.assertEqual(ET.tostring(element),
493                '<html><body>text</body></html>')
494        element = ET.fromstring("<html><body>text</body></html>")
495        self.assertEqual(ET.tostring(element),
496                '<html><body>text</body></html>')
497        sequence = ["<html><body>", "text</bo", "dy></html>"]
498        element = ET.fromstringlist(sequence)
499        self.assertEqual(ET.tostring(element),
500                '<html><body>text</body></html>')
501        self.assertEqual("".join(ET.tostringlist(element)),
502                '<html><body>text</body></html>')
503        self.assertEqual(ET.tostring(element, "ascii"),
504                "<?xml version='1.0' encoding='ascii'?>\n"
505                "<html><body>text</body></html>")
506        _, ids = ET.XMLID("<html><body>text</body></html>")
507        self.assertEqual(len(ids), 0)
508        _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
509        self.assertEqual(len(ids), 1)
510        self.assertEqual(ids["body"].tag, 'body')
511
512    def test_iterparse(self):
513        # Test iterparse interface.
514
515        iterparse = ET.iterparse
516
517        context = iterparse(SIMPLE_XMLFILE)
518        action, elem = next(context)
519        self.assertEqual((action, elem.tag), ('end', 'element'))
520        self.assertEqual([(action, elem.tag) for action, elem in context], [
521                ('end', 'element'),
522                ('end', 'empty-element'),
523                ('end', 'root'),
524            ])
525        self.assertEqual(context.root.tag, 'root')
526
527        context = iterparse(SIMPLE_NS_XMLFILE)
528        self.assertEqual([(action, elem.tag) for action, elem in context], [
529                ('end', '{namespace}element'),
530                ('end', '{namespace}element'),
531                ('end', '{namespace}empty-element'),
532                ('end', '{namespace}root'),
533            ])
534
535        events = ()
536        context = iterparse(SIMPLE_XMLFILE, events)
537        self.assertEqual([(action, elem.tag) for action, elem in context], [])
538
539        events = ()
540        context = iterparse(SIMPLE_XMLFILE, events=events)
541        self.assertEqual([(action, elem.tag) for action, elem in context], [])
542
543        events = ("start", "end")
544        context = iterparse(SIMPLE_XMLFILE, events)
545        self.assertEqual([(action, elem.tag) for action, elem in context], [
546                ('start', 'root'),
547                ('start', 'element'),
548                ('end', 'element'),
549                ('start', 'element'),
550                ('end', 'element'),
551                ('start', 'empty-element'),
552                ('end', 'empty-element'),
553                ('end', 'root'),
554            ])
555
556        events = ("start", "end", "start-ns", "end-ns")
557        context = iterparse(SIMPLE_NS_XMLFILE, events)
558        self.assertEqual([(action, elem.tag) if action in ("start", "end")
559                                             else (action, elem)
560                          for action, elem in context], [
561                ('start-ns', ('', 'namespace')),
562                ('start', '{namespace}root'),
563                ('start', '{namespace}element'),
564                ('end', '{namespace}element'),
565                ('start', '{namespace}element'),
566                ('end', '{namespace}element'),
567                ('start', '{namespace}empty-element'),
568                ('end', '{namespace}empty-element'),
569                ('end', '{namespace}root'),
570                ('end-ns', None),
571            ])
572
573        events = ('start-ns', 'end-ns')
574        context = iterparse(StringIO.StringIO(r"<root xmlns=''/>"), events)
575        res = [(action, elem) for action, elem in context]
576        self.assertEqual(res, [('start-ns', ('', '')), ('end-ns', None)])
577
578        events = ("start", "end", "bogus")
579        with open(SIMPLE_XMLFILE, "rb") as f:
580            with self.assertRaises(ValueError) as cm:
581                iterparse(f, events)
582            self.assertFalse(f.closed)
583        self.assertEqual(str(cm.exception), "unknown event 'bogus'")
584
585        source = StringIO.StringIO(
586            "<?xml version='1.0' encoding='iso-8859-1'?>\n"
587            "<body xmlns='http://&#233;ffbot.org/ns'\n"
588            "      xmlns:cl\xe9='http://effbot.org/ns'>text</body>\n")
589        events = ("start-ns",)
590        context = iterparse(source, events)
591        self.assertEqual([(action, elem) for action, elem in context], [
592                ('start-ns', ('', u'http://\xe9ffbot.org/ns')),
593                ('start-ns', (u'cl\xe9', 'http://effbot.org/ns')),
594            ])
595
596        source = StringIO.StringIO("<document />junk")
597        it = iterparse(source)
598        action, elem = next(it)
599        self.assertEqual((action, elem.tag), ('end', 'document'))
600        with self.assertRaises(ET.ParseError) as cm:
601            next(it)
602        self.assertEqual(str(cm.exception),
603                'junk after document element: line 1, column 12')
604
605    def test_writefile(self):
606        elem = ET.Element("tag")
607        elem.text = "text"
608        self.serialize_check(elem, '<tag>text</tag>')
609        ET.SubElement(elem, "subtag").text = "subtext"
610        self.serialize_check(elem, '<tag>text<subtag>subtext</subtag></tag>')
611
612        # Test tag suppression
613        elem.tag = None
614        self.serialize_check(elem, 'text<subtag>subtext</subtag>')
615        elem.insert(0, ET.Comment("comment"))
616        self.serialize_check(elem,
617                'text<!--comment--><subtag>subtext</subtag>')     # assumes 1.3
618
619        elem[0] = ET.PI("key", "value")
620        self.serialize_check(elem, 'text<?key value?><subtag>subtext</subtag>')
621
622    def test_custom_builder(self):
623        # Test parser w. custom builder.
624
625        with open(SIMPLE_XMLFILE) as f:
626            data = f.read()
627        class Builder(list):
628            def start(self, tag, attrib):
629                self.append(("start", tag))
630            def end(self, tag):
631                self.append(("end", tag))
632            def data(self, text):
633                pass
634        builder = Builder()
635        parser = ET.XMLParser(target=builder)
636        parser.feed(data)
637        self.assertEqual(builder, [
638                ('start', 'root'),
639                ('start', 'element'),
640                ('end', 'element'),
641                ('start', 'element'),
642                ('end', 'element'),
643                ('start', 'empty-element'),
644                ('end', 'empty-element'),
645                ('end', 'root'),
646            ])
647
648        with open(SIMPLE_NS_XMLFILE) as f:
649            data = f.read()
650        class Builder(list):
651            def start(self, tag, attrib):
652                self.append(("start", tag))
653            def end(self, tag):
654                self.append(("end", tag))
655            def data(self, text):
656                pass
657            def pi(self, target, data):
658                self.append(("pi", target, data))
659            def comment(self, data):
660                self.append(("comment", data))
661        builder = Builder()
662        parser = ET.XMLParser(target=builder)
663        parser.feed(data)
664        self.assertEqual(builder, [
665                ('pi', 'pi', 'data'),
666                ('comment', ' comment '),
667                ('start', '{namespace}root'),
668                ('start', '{namespace}element'),
669                ('end', '{namespace}element'),
670                ('start', '{namespace}element'),
671                ('end', '{namespace}element'),
672                ('start', '{namespace}empty-element'),
673                ('end', '{namespace}empty-element'),
674                ('end', '{namespace}root'),
675            ])
676
677
678    # Element.getchildren() and ElementTree.getiterator() are deprecated.
679    @checkwarnings(("This method will be removed in future versions.  "
680                    "Use .+ instead.",
681                    (DeprecationWarning, PendingDeprecationWarning)))
682    def test_getchildren(self):
683        # Test Element.getchildren()
684
685        with open(SIMPLE_XMLFILE, "r") as f:
686            tree = ET.parse(f)
687        self.assertEqual([summarize_list(elem.getchildren())
688                          for elem in tree.getroot().iter()], [
689                ['element', 'element', 'empty-element'],
690                [],
691                [],
692                [],
693            ])
694        self.assertEqual([summarize_list(elem.getchildren())
695                          for elem in tree.getiterator()], [
696                ['element', 'element', 'empty-element'],
697                [],
698                [],
699                [],
700            ])
701
702        elem = ET.XML(SAMPLE_XML)
703        self.assertEqual(len(elem.getchildren()), 3)
704        self.assertEqual(len(elem[2].getchildren()), 1)
705        self.assertEqual(elem[:], elem.getchildren())
706        child1 = elem[0]
707        child2 = elem[2]
708        del elem[1:2]
709        self.assertEqual(len(elem.getchildren()), 2)
710        self.assertEqual(child1, elem[0])
711        self.assertEqual(child2, elem[1])
712        elem[0:2] = [child2, child1]
713        self.assertEqual(child2, elem[0])
714        self.assertEqual(child1, elem[1])
715        self.assertNotEqual(child1, elem[0])
716        elem.clear()
717        self.assertEqual(elem.getchildren(), [])
718
719    def test_writestring(self):
720        elem = ET.XML("<html><body>text</body></html>")
721        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
722        elem = ET.fromstring("<html><body>text</body></html>")
723        self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
724
725    def test_encoding(self):
726        def check(encoding, body=''):
727            xml = ("<?xml version='1.0' encoding='%s'?><xml>%s</xml>" %
728                   (encoding, body))
729            self.assertEqual(ET.XML(xml.encode(encoding)).text, body)
730        check("ascii", 'a')
731        check("us-ascii", 'a')
732        check("iso-8859-1", u'\xbd')
733        check("iso-8859-15", u'\u20ac')
734        check("cp437", u'\u221a')
735        check("mac-roman", u'\u02da')
736
737        def xml(encoding):
738            return "<?xml version='1.0' encoding='%s'?><xml />" % encoding
739        def bxml(encoding):
740            return xml(encoding).encode(encoding)
741        supported_encodings = [
742            'ascii', 'utf-8', 'utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le',
743            'iso8859-1', 'iso8859-2', 'iso8859-3', 'iso8859-4', 'iso8859-5',
744            'iso8859-6', 'iso8859-7', 'iso8859-8', 'iso8859-9', 'iso8859-10',
745            'iso8859-13', 'iso8859-14', 'iso8859-15', 'iso8859-16',
746            'cp437', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852',
747            'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862',
748            'cp863', 'cp865', 'cp866', 'cp869', 'cp874', 'cp1006',
749            'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255',
750            'cp1256', 'cp1257', 'cp1258',
751            'mac-cyrillic', 'mac-greek', 'mac-iceland', 'mac-latin2',
752            'mac-roman', 'mac-turkish',
753            'iso2022-jp', 'iso2022-jp-1', 'iso2022-jp-2', 'iso2022-jp-2004',
754            'iso2022-jp-3', 'iso2022-jp-ext',
755            'koi8-r', 'koi8-u',
756            'ptcp154',
757        ]
758        for encoding in supported_encodings:
759            self.assertEqual(ET.tostring(ET.XML(bxml(encoding))), b'<xml />')
760
761        unsupported_ascii_compatible_encodings = [
762            'big5', 'big5hkscs',
763            'cp932', 'cp949', 'cp950',
764            'euc-jp', 'euc-jis-2004', 'euc-jisx0213', 'euc-kr',
765            'gb2312', 'gbk', 'gb18030',
766            'iso2022-kr', 'johab', 'hz',
767            'shift-jis', 'shift-jis-2004', 'shift-jisx0213',
768            'utf-7',
769        ]
770        for encoding in unsupported_ascii_compatible_encodings:
771            self.assertRaises(ValueError, ET.XML, bxml(encoding))
772
773        unsupported_ascii_incompatible_encodings = [
774            'cp037', 'cp424', 'cp500', 'cp864', 'cp875', 'cp1026', 'cp1140',
775            'utf_32', 'utf_32_be', 'utf_32_le',
776        ]
777        for encoding in unsupported_ascii_incompatible_encodings:
778            self.assertRaises(ET.ParseError, ET.XML, bxml(encoding))
779
780        self.assertRaises(ValueError, ET.XML, xml('undefined').encode('ascii'))
781        self.assertRaises(LookupError, ET.XML, xml('xxx').encode('ascii'))
782
783    def test_methods(self):
784        # Test serialization methods.
785
786        e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
787        e.tail = "\n"
788        self.assertEqual(serialize(e),
789                '<html><link /><script>1 &lt; 2</script></html>\n')
790        self.assertEqual(serialize(e, method=None),
791                '<html><link /><script>1 &lt; 2</script></html>\n')
792        self.assertEqual(serialize(e, method="xml"),
793                '<html><link /><script>1 &lt; 2</script></html>\n')
794        self.assertEqual(serialize(e, method="html"),
795                '<html><link><script>1 < 2</script></html>\n')
796        self.assertEqual(serialize(e, method="text"), '1 < 2\n')
797
798    def test_issue18347(self):
799        e = ET.XML('<html><CamelCase>text</CamelCase></html>')
800        self.assertEqual(serialize(e),
801                '<html><CamelCase>text</CamelCase></html>')
802        self.assertEqual(serialize(e, method="html"),
803                '<html><CamelCase>text</CamelCase></html>')
804
805    def test_entity(self):
806        # Test entity handling.
807
808        # 1) good entities
809
810        e = ET.XML("<document title='&#x8230;'>test</document>")
811        self.assertEqual(serialize(e, encoding="us-ascii"),
812                '<document title="&#33328;">test</document>')
813        self.serialize_check(e, '<document title="&#33328;">test</document>')
814
815        # 2) bad entities
816
817        with self.assertRaises(ET.ParseError) as cm:
818            ET.XML("<document>&entity;</document>")
819        self.assertEqual(str(cm.exception),
820                'undefined entity: line 1, column 10')
821
822        with self.assertRaises(ET.ParseError) as cm:
823            ET.XML(ENTITY_XML)
824        self.assertEqual(str(cm.exception),
825                'undefined entity &entity;: line 5, column 10')
826
827        # 3) custom entity
828
829        parser = ET.XMLParser()
830        parser.entity["entity"] = "text"
831        parser.feed(ENTITY_XML)
832        root = parser.close()
833        self.serialize_check(root, '<document>text</document>')
834
835    def test_namespace(self):
836        # Test namespace issues.
837
838        # 1) xml namespace
839
840        elem = ET.XML("<tag xml:lang='en' />")
841        self.serialize_check(elem, '<tag xml:lang="en" />') # 1.1
842
843        # 2) other "well-known" namespaces
844
845        elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
846        self.serialize_check(elem,
847            '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />') # 2.1
848
849        elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
850        self.serialize_check(elem,
851            '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />') # 2.2
852
853        elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
854        self.serialize_check(elem,
855            '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />') # 2.3
856
857        # 3) unknown namespaces
858        elem = ET.XML(SAMPLE_XML_NS)
859        self.serialize_check(elem,
860            '<ns0:body xmlns:ns0="http://effbot.org/ns">\n'
861            '  <ns0:tag>text</ns0:tag>\n'
862            '  <ns0:tag />\n'
863            '  <ns0:section>\n'
864            '    <ns0:tag>subtext</ns0:tag>\n'
865            '  </ns0:section>\n'
866            '</ns0:body>')
867
868    def test_qname(self):
869        # Test QName handling.
870
871        # 1) decorated tags
872
873        elem = ET.Element("{uri}tag")
874        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.1
875        elem = ET.Element(ET.QName("{uri}tag"))
876        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.2
877        elem = ET.Element(ET.QName("uri", "tag"))
878        self.serialize_check(elem, '<ns0:tag xmlns:ns0="uri" />') # 1.3
879        elem = ET.Element(ET.QName("uri", "tag"))
880        subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
881        subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
882        self.serialize_check(elem,
883            '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>') # 1.4
884
885        # 2) decorated attributes
886
887        elem.clear()
888        elem.attrib["{uri}key"] = "value"
889        self.serialize_check(elem,
890            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.1
891
892        elem.clear()
893        elem.attrib[ET.QName("{uri}key")] = "value"
894        self.serialize_check(elem,
895            '<ns0:tag xmlns:ns0="uri" ns0:key="value" />') # 2.2
896
897        # 3) decorated values are not converted by default, but the
898        # QName wrapper can be used for values
899
900        elem.clear()
901        elem.attrib["{uri}key"] = "{uri}value"
902        self.serialize_check(elem,
903            '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />') # 3.1
904
905        elem.clear()
906        elem.attrib["{uri}key"] = ET.QName("{uri}value")
907        self.serialize_check(elem,
908            '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />') # 3.2
909
910        elem.clear()
911        subelem = ET.Element("tag")
912        subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
913        elem.append(subelem)
914        elem.append(subelem)
915        self.serialize_check(elem,
916            '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2">'
917            '<tag ns1:key="ns2:value" />'
918            '<tag ns1:key="ns2:value" />'
919            '</ns0:tag>') # 3.3
920
921        # 4) Direct QName tests
922
923        self.assertEqual(str(ET.QName('ns', 'tag')), '{ns}tag')
924        self.assertEqual(str(ET.QName('{ns}tag')), '{ns}tag')
925        q1 = ET.QName('ns', 'tag')
926        q2 = ET.QName('ns', 'tag')
927        self.assertEqual(q1, q2)
928        q2 = ET.QName('ns', 'other-tag')
929        self.assertNotEqual(q1, q2)
930        self.assertNotEqual(q1, 'ns:tag')
931        self.assertEqual(q1, '{ns}tag')
932
933    def test_doctype_public(self):
934        # Test PUBLIC doctype.
935
936        elem = ET.XML('<!DOCTYPE html PUBLIC'
937                ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
938                ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
939                '<html>text</html>')
940
941    def test_xpath_tokenizer(self):
942        # Test the XPath tokenizer.
943        from xml.etree import ElementPath
944        def check(p, expected):
945            self.assertEqual([op or tag
946                              for op, tag in ElementPath.xpath_tokenizer(p)],
947                             expected)
948
949        # tests from the xml specification
950        check("*", ['*'])
951        check("text()", ['text', '()'])
952        check("@name", ['@', 'name'])
953        check("@*", ['@', '*'])
954        check("para[1]", ['para', '[', '1', ']'])
955        check("para[last()]", ['para', '[', 'last', '()', ']'])
956        check("*/para", ['*', '/', 'para'])
957        check("/doc/chapter[5]/section[2]",
958              ['/', 'doc', '/', 'chapter', '[', '5', ']',
959               '/', 'section', '[', '2', ']'])
960        check("chapter//para", ['chapter', '//', 'para'])
961        check("//para", ['//', 'para'])
962        check("//olist/item", ['//', 'olist', '/', 'item'])
963        check(".", ['.'])
964        check(".//para", ['.', '//', 'para'])
965        check("..", ['..'])
966        check("../@lang", ['..', '/', '@', 'lang'])
967        check("chapter[title]", ['chapter', '[', 'title', ']'])
968        check("employee[@secretary and @assistant]", ['employee',
969              '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']'])
970
971        # additional tests
972        check("{http://spam}egg", ['{http://spam}egg'])
973        check("./spam.egg", ['.', '/', 'spam.egg'])
974        check(".//{http://spam}egg", ['.', '//', '{http://spam}egg'])
975
976    def test_processinginstruction(self):
977        # Test ProcessingInstruction directly
978
979        self.assertEqual(ET.tostring(ET.ProcessingInstruction('test', 'instruction')),
980                '<?test instruction?>')
981        self.assertEqual(ET.tostring(ET.PI('test', 'instruction')),
982                '<?test instruction?>')
983
984        # Issue #2746
985
986        self.assertEqual(ET.tostring(ET.PI('test', '<testing&>')),
987                '<?test <testing&>?>')
988        self.assertEqual(ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1'),
989                "<?xml version='1.0' encoding='latin1'?>\n"
990                "<?test <testing&>\xe3?>")
991
992    def test_html_empty_elems_serialization(self):
993        # issue 15970
994        # from http://www.w3.org/TR/html401/index/elements.html
995        for element in ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
996                        'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']:
997            for elem in [element, element.lower()]:
998                expected = '<%s>' % elem
999                serialized = serialize(ET.XML('<%s />' % elem), method='html')
1000                self.assertEqual(serialized, expected)
1001                serialized = serialize(ET.XML('<%s></%s>' % (elem,elem)),
1002                                       method='html')
1003                self.assertEqual(serialized, expected)
1004
1005
1006#
1007# xinclude tests (samples from appendix C of the xinclude specification)
1008
1009XINCLUDE = {}
1010
1011XINCLUDE["C1.xml"] = """\
1012<?xml version='1.0'?>
1013<document xmlns:xi="http://www.w3.org/2001/XInclude">
1014  <p>120 Mz is adequate for an average home user.</p>
1015  <xi:include href="disclaimer.xml"/>
1016</document>
1017"""
1018
1019XINCLUDE["disclaimer.xml"] = """\
1020<?xml version='1.0'?>
1021<disclaimer>
1022  <p>The opinions represented herein represent those of the individual
1023  and should not be interpreted as official policy endorsed by this
1024  organization.</p>
1025</disclaimer>
1026"""
1027
1028XINCLUDE["C2.xml"] = """\
1029<?xml version='1.0'?>
1030<document xmlns:xi="http://www.w3.org/2001/XInclude">
1031  <p>This document has been accessed
1032  <xi:include href="count.txt" parse="text"/> times.</p>
1033</document>
1034"""
1035
1036XINCLUDE["count.txt"] = "324387"
1037
1038XINCLUDE["C2b.xml"] = """\
1039<?xml version='1.0'?>
1040<document xmlns:xi="http://www.w3.org/2001/XInclude">
1041  <p>This document has been <em>accessed</em>
1042  <xi:include href="count.txt" parse="text"/> times.</p>
1043</document>
1044"""
1045
1046XINCLUDE["C3.xml"] = """\
1047<?xml version='1.0'?>
1048<document xmlns:xi="http://www.w3.org/2001/XInclude">
1049  <p>The following is the source of the "data.xml" resource:</p>
1050  <example><xi:include href="data.xml" parse="text"/></example>
1051</document>
1052"""
1053
1054XINCLUDE["data.xml"] = """\
1055<?xml version='1.0'?>
1056<data>
1057  <item><![CDATA[Brooks & Shields]]></item>
1058</data>
1059"""
1060
1061XINCLUDE["C5.xml"] = """\
1062<?xml version='1.0'?>
1063<div xmlns:xi="http://www.w3.org/2001/XInclude">
1064  <xi:include href="example.txt" parse="text">
1065    <xi:fallback>
1066      <xi:include href="fallback-example.txt" parse="text">
1067        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1068      </xi:include>
1069    </xi:fallback>
1070  </xi:include>
1071</div>
1072"""
1073
1074XINCLUDE["default.xml"] = """\
1075<?xml version='1.0'?>
1076<document xmlns:xi="http://www.w3.org/2001/XInclude">
1077  <p>Example.</p>
1078  <xi:include href="{}"/>
1079</document>
1080""".format(cgi.escape(SIMPLE_XMLFILE, True))
1081
1082#
1083# badly formatted xi:include tags
1084
1085XINCLUDE_BAD = {}
1086
1087XINCLUDE_BAD["B1.xml"] = """\
1088<?xml version='1.0'?>
1089<document xmlns:xi="http://www.w3.org/2001/XInclude">
1090  <p>120 Mz is adequate for an average home user.</p>
1091  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1092</document>
1093"""
1094
1095XINCLUDE_BAD["B2.xml"] = """\
1096<?xml version='1.0'?>
1097<div xmlns:xi="http://www.w3.org/2001/XInclude">
1098    <xi:fallback></xi:fallback>
1099</div>
1100"""
1101
1102class XIncludeTest(unittest.TestCase):
1103
1104    def xinclude_loader(self, href, parse="xml", encoding=None):
1105        try:
1106            data = XINCLUDE[href]
1107        except KeyError:
1108            raise IOError("resource not found")
1109        if parse == "xml":
1110            data = ET.XML(data)
1111        return data
1112
1113    def none_loader(self, href, parser, encoding=None):
1114        return None
1115
1116    def test_xinclude_default(self):
1117        from xml.etree import ElementInclude
1118        doc = self.xinclude_loader('default.xml')
1119        ElementInclude.include(doc)
1120        self.assertEqual(serialize(doc),
1121            '<document>\n'
1122            '  <p>Example.</p>\n'
1123            '  <root>\n'
1124            '   <element key="value">text</element>\n'
1125            '   <element>text</element>tail\n'
1126            '   <empty-element />\n'
1127            '</root>\n'
1128            '</document>')
1129
1130    def test_xinclude(self):
1131        from xml.etree import ElementInclude
1132
1133        # Basic inclusion example (XInclude C.1)
1134        document = self.xinclude_loader("C1.xml")
1135        ElementInclude.include(document, self.xinclude_loader)
1136        self.assertEqual(serialize(document),
1137            '<document>\n'
1138            '  <p>120 Mz is adequate for an average home user.</p>\n'
1139            '  <disclaimer>\n'
1140            '  <p>The opinions represented herein represent those of the individual\n'
1141            '  and should not be interpreted as official policy endorsed by this\n'
1142            '  organization.</p>\n'
1143            '</disclaimer>\n'
1144            '</document>') # C1
1145
1146        # Textual inclusion example (XInclude C.2)
1147        document = self.xinclude_loader("C2.xml")
1148        ElementInclude.include(document, self.xinclude_loader)
1149        self.assertEqual(serialize(document),
1150            '<document>\n'
1151            '  <p>This document has been accessed\n'
1152            '  324387 times.</p>\n'
1153            '</document>') # C2
1154
1155        # Textual inclusion after sibling element (based on modified XInclude C.2)
1156        document = self.xinclude_loader("C2b.xml")
1157        ElementInclude.include(document, self.xinclude_loader)
1158        self.assertEqual(serialize(document),
1159            '<document>\n'
1160            '  <p>This document has been <em>accessed</em>\n'
1161            '  324387 times.</p>\n'
1162            '</document>') # C2b
1163
1164        # Textual inclusion of XML example (XInclude C.3)
1165        document = self.xinclude_loader("C3.xml")
1166        ElementInclude.include(document, self.xinclude_loader)
1167        self.assertEqual(serialize(document),
1168            '<document>\n'
1169            '  <p>The following is the source of the "data.xml" resource:</p>\n'
1170            "  <example>&lt;?xml version='1.0'?&gt;\n"
1171            '&lt;data&gt;\n'
1172            '  &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;\n'
1173            '&lt;/data&gt;\n'
1174            '</example>\n'
1175            '</document>') # C3
1176
1177        # Fallback example (XInclude C.5)
1178        # Note! Fallback support is not yet implemented
1179        document = self.xinclude_loader("C5.xml")
1180        with self.assertRaises(IOError) as cm:
1181            ElementInclude.include(document, self.xinclude_loader)
1182        self.assertEqual(str(cm.exception), 'resource not found')
1183        self.assertEqual(serialize(document),
1184            '<div xmlns:ns0="http://www.w3.org/2001/XInclude">\n'
1185            '  <ns0:include href="example.txt" parse="text">\n'
1186            '    <ns0:fallback>\n'
1187            '      <ns0:include href="fallback-example.txt" parse="text">\n'
1188            '        <ns0:fallback><a href="mailto:bob@example.org">Report error</a></ns0:fallback>\n'
1189            '      </ns0:include>\n'
1190            '    </ns0:fallback>\n'
1191            '  </ns0:include>\n'
1192            '</div>') # C5
1193
1194    def test_xinclude_failures(self):
1195        from xml.etree import ElementInclude
1196
1197        # Test failure to locate included XML file.
1198        document = ET.XML(XINCLUDE["C1.xml"])
1199        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1200            ElementInclude.include(document, loader=self.none_loader)
1201        self.assertEqual(str(cm.exception),
1202                "cannot load 'disclaimer.xml' as 'xml'")
1203
1204        # Test failure to locate included text file.
1205        document = ET.XML(XINCLUDE["C2.xml"])
1206        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1207            ElementInclude.include(document, loader=self.none_loader)
1208        self.assertEqual(str(cm.exception),
1209                "cannot load 'count.txt' as 'text'")
1210
1211        # Test bad parse type.
1212        document = ET.XML(XINCLUDE_BAD["B1.xml"])
1213        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1214            ElementInclude.include(document, loader=self.none_loader)
1215        self.assertEqual(str(cm.exception),
1216                "unknown parse type in xi:include tag ('BAD_TYPE')")
1217
1218        # Test xi:fallback outside xi:include.
1219        document = ET.XML(XINCLUDE_BAD["B2.xml"])
1220        with self.assertRaises(ElementInclude.FatalIncludeError) as cm:
1221            ElementInclude.include(document, loader=self.none_loader)
1222        self.assertEqual(str(cm.exception),
1223                "xi:fallback tag must be child of xi:include "
1224                "('{http://www.w3.org/2001/XInclude}fallback')")
1225
1226# --------------------------------------------------------------------
1227# reported bugs
1228
1229class BugsTest(unittest.TestCase):
1230
1231    def test_bug_xmltoolkit21(self):
1232        # marshaller gives obscure errors for non-string values
1233
1234        def check(elem):
1235            with self.assertRaises(TypeError) as cm:
1236                serialize(elem)
1237            self.assertEqual(str(cm.exception),
1238                    'cannot serialize 123 (type int)')
1239
1240        elem = ET.Element(123)
1241        check(elem) # tag
1242
1243        elem = ET.Element("elem")
1244        elem.text = 123
1245        check(elem) # text
1246
1247        elem = ET.Element("elem")
1248        elem.tail = 123
1249        check(elem) # tail
1250
1251        elem = ET.Element("elem")
1252        elem.set(123, "123")
1253        check(elem) # attribute key
1254
1255        elem = ET.Element("elem")
1256        elem.set("123", 123)
1257        check(elem) # attribute value
1258
1259    def test_bug_xmltoolkit25(self):
1260        # typo in ElementTree.findtext
1261
1262        elem = ET.XML(SAMPLE_XML)
1263        tree = ET.ElementTree(elem)
1264        self.assertEqual(tree.findtext("tag"), 'text')
1265        self.assertEqual(tree.findtext("section/tag"), 'subtext')
1266
1267    def test_bug_xmltoolkit28(self):
1268        # .//tag causes exceptions
1269
1270        tree = ET.XML("<doc><table><tbody/></table></doc>")
1271        self.assertEqual(summarize_list(tree.findall(".//thead")), [])
1272        self.assertEqual(summarize_list(tree.findall(".//tbody")), ['tbody'])
1273
1274    def test_bug_xmltoolkitX1(self):
1275        # dump() doesn't flush the output buffer
1276
1277        tree = ET.XML("<doc><table><tbody/></table></doc>")
1278        with support.captured_stdout() as stdout:
1279            ET.dump(tree)
1280            self.assertEqual(stdout.getvalue(), '<doc><table><tbody /></table></doc>\n')
1281
1282    def test_bug_xmltoolkit39(self):
1283        # non-ascii element and attribute names doesn't work
1284
1285        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1286        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1287
1288        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1289                      b"<tag \xe4ttr='v&#228;lue' />")
1290        self.assertEqual(tree.attrib, {u'\xe4ttr': u'v\xe4lue'})
1291        self.assertEqual(ET.tostring(tree, "utf-8"),
1292                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1293
1294        tree = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1295                      b'<t\xe4g>text</t\xe4g>')
1296        self.assertEqual(ET.tostring(tree, "utf-8"),
1297                b'<t\xc3\xa4g>text</t\xc3\xa4g>')
1298
1299        tree = ET.Element(u"t\u00e4g")
1300        self.assertEqual(ET.tostring(tree, "utf-8"), b'<t\xc3\xa4g />')
1301
1302        tree = ET.Element("tag")
1303        tree.set(u"\u00e4ttr", u"v\u00e4lue")
1304        self.assertEqual(ET.tostring(tree, "utf-8"),
1305                b'<tag \xc3\xa4ttr="v\xc3\xa4lue" />')
1306
1307    def test_bug_xmltoolkit54(self):
1308        # problems handling internally defined entities
1309
1310        e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]>"
1311                   '<doc>&ldots;</doc>')
1312        self.assertEqual(serialize(e), '<doc>&#33328;</doc>')
1313
1314    def test_bug_xmltoolkit55(self):
1315        # make sure we're reporting the first error, not the last
1316
1317        with self.assertRaises(ET.ParseError) as cm:
1318            ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'>"
1319                   '<doc>&ldots;&ndots;&rdots;</doc>')
1320        self.assertEqual(str(cm.exception),
1321                'undefined entity &ldots;: line 1, column 36')
1322
1323    def test_bug_xmltoolkit60(self):
1324        # Handle crash in stream source.
1325
1326        class ExceptionFile:
1327            def read(self, x):
1328                raise IOError
1329
1330        self.assertRaises(IOError, ET.parse, ExceptionFile())
1331
1332    def test_bug_xmltoolkit62(self):
1333        # Don't crash when using custom entities.
1334
1335        ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
1336        parser = ET.XMLTreeBuilder()
1337        parser.entity.update(ENTITIES)
1338        parser.feed("""<?xml version="1.0" encoding="UTF-8"?>
1339<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1340<patent-application-publication>
1341<subdoc-abstract>
1342<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1343</subdoc-abstract>
1344</patent-application-publication>""")
1345        t = parser.close()
1346        self.assertEqual(t.find('.//paragraph').text,
1347            u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.')
1348
1349    @unittest.skipIf(sys.gettrace(), "Skips under coverage.")
1350    def test_bug_xmltoolkit63(self):
1351        # Check reference leak.
1352        def xmltoolkit63():
1353            tree = ET.TreeBuilder()
1354            tree.start("tag", {})
1355            tree.data("text")
1356            tree.end("tag")
1357
1358        xmltoolkit63()
1359        count = sys.getrefcount(None)
1360        for i in range(1000):
1361            xmltoolkit63()
1362        self.assertEqual(sys.getrefcount(None), count)
1363
1364    def test_bug_200708_newline(self):
1365        # Preserve newlines in attributes.
1366
1367        e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1368        self.assertEqual(ET.tostring(e),
1369                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
1370        self.assertEqual(ET.XML(ET.tostring(e)).get("text"),
1371                'def _f():\n  return 3\n')
1372        self.assertEqual(ET.tostring(ET.XML(ET.tostring(e))),
1373                b'<SomeTag text="def _f():&#10;  return 3&#10;" />')
1374
1375    def test_bug_200708_close(self):
1376        # Test default builder.
1377        parser = ET.XMLParser() # default
1378        parser.feed("<element>some text</element>")
1379        self.assertEqual(parser.close().tag, 'element')
1380
1381        # Test custom builder.
1382        class EchoTarget:
1383            def start(self, tag, attrib):
1384                pass
1385            def end(self, tag):
1386                pass
1387            def data(self, text):
1388                pass
1389            def close(self):
1390                return ET.Element("element") # simulate root
1391        parser = ET.XMLParser(target=EchoTarget())
1392        parser.feed("<element>some text</element>")
1393        self.assertEqual(parser.close().tag, 'element')
1394
1395    def test_bug_200709_default_namespace(self):
1396        e = ET.Element("{default}elem")
1397        s = ET.SubElement(e, "{default}elem")
1398        self.assertEqual(serialize(e, default_namespace="default"), # 1
1399                '<elem xmlns="default"><elem /></elem>')
1400
1401        e = ET.Element("{default}elem")
1402        s = ET.SubElement(e, "{default}elem")
1403        s = ET.SubElement(e, "{not-default}elem")
1404        self.assertEqual(serialize(e, default_namespace="default"), # 2
1405            '<elem xmlns="default" xmlns:ns1="not-default">'
1406            '<elem />'
1407            '<ns1:elem />'
1408            '</elem>')
1409
1410        e = ET.Element("{default}elem")
1411        s = ET.SubElement(e, "{default}elem")
1412        s = ET.SubElement(e, "elem") # unprefixed name
1413        with self.assertRaises(ValueError) as cm:
1414            serialize(e, default_namespace="default") # 3
1415        self.assertEqual(str(cm.exception),
1416                'cannot use non-qualified names with default_namespace option')
1417
1418    def test_bug_200709_register_namespace(self):
1419        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1420        self.assertEqual(ET.tostring(e),
1421            '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />')
1422        ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1423        e = ET.Element("{http://namespace.invalid/does/not/exist/}title")
1424        self.assertEqual(ET.tostring(e),
1425            '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />')
1426
1427        # And the Dublin Core namespace is in the default list:
1428
1429        e = ET.Element("{http://purl.org/dc/elements/1.1/}title")
1430        self.assertEqual(ET.tostring(e),
1431            '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />')
1432
1433    def test_bug_200709_element_comment(self):
1434        # Not sure if this can be fixed, really (since the serializer needs
1435        # ET.Comment, not cET.comment).
1436
1437        a = ET.Element('a')
1438        a.append(ET.Comment('foo'))
1439        self.assertEqual(a[0].tag, ET.Comment)
1440
1441        a = ET.Element('a')
1442        a.append(ET.PI('foo'))
1443        self.assertEqual(a[0].tag, ET.PI)
1444
1445    def test_bug_200709_element_insert(self):
1446        a = ET.Element('a')
1447        b = ET.SubElement(a, 'b')
1448        c = ET.SubElement(a, 'c')
1449        d = ET.Element('d')
1450        a.insert(0, d)
1451        self.assertEqual(summarize_list(a), ['d', 'b', 'c'])
1452        a.insert(-1, d)
1453        self.assertEqual(summarize_list(a), ['d', 'b', 'd', 'c'])
1454
1455    def test_bug_200709_iter_comment(self):
1456        a = ET.Element('a')
1457        b = ET.SubElement(a, 'b')
1458        comment_b = ET.Comment("TEST-b")
1459        b.append(comment_b)
1460        self.assertEqual(summarize_list(a.iter(ET.Comment)), [ET.Comment])
1461
1462    # --------------------------------------------------------------------
1463    # reported on bugs.python.org
1464
1465    def test_bug_1534630(self):
1466        bob = ET.TreeBuilder()
1467        e = bob.data("data")
1468        e = bob.start("tag", {})
1469        e = bob.end("tag")
1470        e = bob.close()
1471        self.assertEqual(serialize(e), '<tag />')
1472
1473    def test_issue6233(self):
1474        e = ET.XML(b"<?xml version='1.0' encoding='utf-8'?>"
1475                   b'<body>t\xc3\xa3g</body>')
1476        self.assertEqual(ET.tostring(e, 'ascii'),
1477                b"<?xml version='1.0' encoding='ascii'?>\n"
1478                b'<body>t&#227;g</body>')
1479        e = ET.XML(b"<?xml version='1.0' encoding='iso-8859-1'?>"
1480                   b'<body>t\xe3g</body>')
1481        self.assertEqual(ET.tostring(e, 'ascii'),
1482                b"<?xml version='1.0' encoding='ascii'?>\n"
1483                b'<body>t&#227;g</body>')
1484
1485    def test_issue3151(self):
1486        e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1487        self.assertEqual(e.tag, '{${stuff}}localname')
1488        t = ET.ElementTree(e)
1489        self.assertEqual(ET.tostring(e), b'<ns0:localname xmlns:ns0="${stuff}" />')
1490
1491    def test_issue6565(self):
1492        elem = ET.XML("<body><tag/></body>")
1493        self.assertEqual(summarize_list(elem), ['tag'])
1494        newelem = ET.XML(SAMPLE_XML)
1495        elem[:] = newelem[:]
1496        self.assertEqual(summarize_list(elem), ['tag', 'tag', 'section'])
1497
1498    def test_issue10777(self):
1499        # Registering a namespace twice caused a "dictionary changed size during
1500        # iteration" bug.
1501
1502        ET.register_namespace('test10777', 'http://myuri/')
1503        ET.register_namespace('test10777', 'http://myuri/')
1504
1505    def check_expat224_utf8_bug(self, text):
1506        xml = b'<a b="%s"/>' % text
1507        root = ET.XML(xml)
1508        self.assertEqual(root.get('b'), text.decode('utf-8'))
1509
1510    def test_expat224_utf8_bug(self):
1511        # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder.
1512        # Check that Expat 2.2.4 fixed the bug.
1513        #
1514        # Test buffer bounds at odd and even positions.
1515
1516        text = b'\xc3\xa0' * 1024
1517        self.check_expat224_utf8_bug(text)
1518
1519        text = b'x' + b'\xc3\xa0' * 1024
1520        self.check_expat224_utf8_bug(text)
1521
1522    def test_expat224_utf8_bug_file(self):
1523        with open(UTF8_BUG_XMLFILE, 'rb') as fp:
1524            raw = fp.read()
1525        root = ET.fromstring(raw)
1526        xmlattr = root.get('b')
1527
1528        # "Parse" manually the XML file to extract the value of the 'b'
1529        # attribute of the <a b='xxx' /> XML element
1530        text = raw.decode('utf-8').strip()
1531        text = text.replace('\r\n', ' ')
1532        text = text[6:-4]
1533        self.assertEqual(root.get('b'), text)
1534
1535
1536# --------------------------------------------------------------------
1537
1538
1539class BasicElementTest(unittest.TestCase):
1540    @python_only
1541    def test_cyclic_gc(self):
1542        class Dummy:
1543            pass
1544
1545        # Test the shortest cycle: d->element->d
1546        d = Dummy()
1547        d.dummyref = ET.Element('joe', attr=d)
1548        wref = weakref.ref(d)
1549        del d
1550        gc_collect()
1551        self.assertIsNone(wref())
1552
1553        # A longer cycle: d->e->e2->d
1554        e = ET.Element('joe')
1555        d = Dummy()
1556        d.dummyref = e
1557        wref = weakref.ref(d)
1558        e2 = ET.SubElement(e, 'foo', attr=d)
1559        del d, e, e2
1560        gc_collect()
1561        self.assertIsNone(wref())
1562
1563        # A cycle between Element objects as children of one another
1564        # e1->e2->e3->e1
1565        e1 = ET.Element('e1')
1566        e2 = ET.Element('e2')
1567        e3 = ET.Element('e3')
1568        e1.append(e2)
1569        e2.append(e2)
1570        e3.append(e1)
1571        wref = weakref.ref(e1)
1572        del e1, e2, e3
1573        gc_collect()
1574        self.assertIsNone(wref())
1575
1576    @python_only
1577    def test_weakref(self):
1578        flag = []
1579        def wref_cb(w):
1580            flag.append(True)
1581        e = ET.Element('e')
1582        wref = weakref.ref(e, wref_cb)
1583        self.assertEqual(wref().tag, 'e')
1584        del e
1585        self.assertEqual(flag, [True])
1586        self.assertEqual(wref(), None)
1587
1588    @python_only
1589    def test_get_keyword_args(self):
1590        e1 = ET.Element('foo' , x=1, y=2, z=3)
1591        self.assertEqual(e1.get('x', default=7), 1)
1592        self.assertEqual(e1.get('w', default=7), 7)
1593
1594
1595class BadElementTest(unittest.TestCase):
1596    def test_extend_mutable_list(self):
1597        class X(object):
1598            @property
1599            def __class__(self):
1600                L[:] = [ET.Element('baz')]
1601                return ET.Element
1602        L = [X()]
1603        e = ET.Element('foo')
1604        try:
1605            e.extend(L)
1606        except TypeError:
1607            pass
1608
1609        if ET is pyET:
1610            class Y(X, ET.Element):
1611                pass
1612            L = [Y('x')]
1613            e = ET.Element('foo')
1614            e.extend(L)
1615
1616    def test_extend_mutable_list2(self):
1617        class X(object):
1618            @property
1619            def __class__(self):
1620                del L[:]
1621                return ET.Element
1622        L = [X(), ET.Element('baz')]
1623        e = ET.Element('foo')
1624        try:
1625            e.extend(L)
1626        except TypeError:
1627            pass
1628
1629        if ET is pyET:
1630            class Y(X, ET.Element):
1631                pass
1632            L = [Y('bar'), ET.Element('baz')]
1633            e = ET.Element('foo')
1634            e.extend(L)
1635
1636    @python_only
1637    def test_remove_with_mutating(self):
1638        class X(ET.Element):
1639            def __eq__(self, o):
1640                del e[:]
1641                return False
1642            __hash__ = object.__hash__
1643        e = ET.Element('foo')
1644        e.extend([X('bar')])
1645        self.assertRaises(ValueError, e.remove, ET.Element('baz'))
1646
1647        e = ET.Element('foo')
1648        e.extend([ET.Element('bar')])
1649        self.assertRaises(ValueError, e.remove, X('baz'))
1650
1651    def test_recursive_repr(self):
1652        # Issue #25455
1653        e = ET.Element('foo')
1654        with swap_attr(e, 'tag', e):
1655            with self.assertRaises(RuntimeError):
1656                repr(e)  # Should not crash
1657
1658    def test_element_get_text(self):
1659        # Issue #27863
1660        class X(str):
1661            def __del__(self):
1662                try:
1663                    elem.text
1664                except NameError:
1665                    pass
1666
1667        b = ET.TreeBuilder()
1668        b.start('tag', {})
1669        b.data('ABCD')
1670        b.data(X('EFGH'))
1671        b.data('IJKL')
1672        b.end('tag')
1673
1674        elem = b.close()
1675        self.assertEqual(elem.text, 'ABCDEFGHIJKL')
1676
1677    def test_element_get_tail(self):
1678        # Issue #27863
1679        class X(str):
1680            def __del__(self):
1681                try:
1682                    elem[0].tail
1683                except NameError:
1684                    pass
1685
1686        b = ET.TreeBuilder()
1687        b.start('root', {})
1688        b.start('tag', {})
1689        b.end('tag')
1690        b.data('ABCD')
1691        b.data(X('EFGH'))
1692        b.data('IJKL')
1693        b.end('root')
1694
1695        elem = b.close()
1696        self.assertEqual(elem[0].tail, 'ABCDEFGHIJKL')
1697
1698    def test_element_iter(self):
1699        # Issue #27863
1700        e = ET.Element('tag')
1701        e.extend([None])  # non-Element
1702
1703        it = e.iter()
1704        self.assertIs(next(it), e)
1705        self.assertRaises((AttributeError, TypeError), list, it)
1706
1707    def test_subscr(self):
1708        # Issue #27863
1709        class X:
1710            def __index__(self):
1711                del e[:]
1712                return 1
1713
1714        e = ET.Element('elem')
1715        e.append(ET.Element('child'))
1716        e[:X()]  # shouldn't crash
1717
1718        e.append(ET.Element('child'))
1719        e[0:10:X()]  # shouldn't crash
1720
1721    def test_ass_subscr(self):
1722        # Issue #27863
1723        class X:
1724            def __index__(self):
1725                e[:] = []
1726                return 1
1727
1728        e = ET.Element('elem')
1729        for _ in range(10):
1730            e.insert(0, ET.Element('child'))
1731
1732        e[0:10:X()] = []  # shouldn't crash
1733
1734
1735class MutatingElementPath(str):
1736    def __new__(cls, elem, *args):
1737        self = str.__new__(cls, *args)
1738        self.elem = elem
1739        return self
1740    def __eq__(self, o):
1741        del self.elem[:]
1742        return True
1743    __hash__ = str.__hash__
1744
1745class BadElementPath(str):
1746    def __eq__(self, o):
1747        raise 1.0/0.0
1748    __hash__ = str.__hash__
1749
1750class BadElementPathTest(unittest.TestCase):
1751    def setUp(self):
1752        super(BadElementPathTest, self).setUp()
1753        from xml.etree import ElementPath
1754        self.path_cache = ElementPath._cache
1755        ElementPath._cache = {}
1756
1757    def tearDown(self):
1758        from xml.etree import ElementPath
1759        ElementPath._cache = self.path_cache
1760        super(BadElementPathTest, self).tearDown()
1761
1762    def test_find_with_mutating(self):
1763        e = ET.Element('foo')
1764        e.extend([ET.Element('bar')])
1765        e.find(MutatingElementPath(e, 'x'))
1766
1767    def test_find_with_error(self):
1768        e = ET.Element('foo')
1769        e.extend([ET.Element('bar')])
1770        try:
1771            e.find(BadElementPath('x'))
1772        except ZeroDivisionError:
1773            pass
1774
1775    def test_findtext_with_mutating(self):
1776        e = ET.Element('foo')
1777        e.extend([ET.Element('bar')])
1778        e.findtext(MutatingElementPath(e, 'x'))
1779
1780    def test_findtext_with_error(self):
1781        e = ET.Element('foo')
1782        e.extend([ET.Element('bar')])
1783        try:
1784            e.findtext(BadElementPath('x'))
1785        except ZeroDivisionError:
1786            pass
1787
1788    def test_findall_with_mutating(self):
1789        e = ET.Element('foo')
1790        e.extend([ET.Element('bar')])
1791        e.findall(MutatingElementPath(e, 'x'))
1792
1793    def test_findall_with_error(self):
1794        e = ET.Element('foo')
1795        e.extend([ET.Element('bar')])
1796        try:
1797            e.findall(BadElementPath('x'))
1798        except ZeroDivisionError:
1799            pass
1800
1801
1802class ElementTreeTypeTest(unittest.TestCase):
1803    def test_istype(self):
1804        self.assertIsInstance(ET.ParseError, type)
1805        self.assertIsInstance(ET.QName, type)
1806        self.assertIsInstance(ET.ElementTree, type)
1807        if ET is pyET:
1808            self.assertIsInstance(ET.Element, type)
1809            self.assertIsInstance(ET.TreeBuilder, type)
1810            self.assertIsInstance(ET.XMLParser, type)
1811
1812    @python_only
1813    def test_Element_subclass_trivial(self):
1814        class MyElement(ET.Element):
1815            pass
1816
1817        mye = MyElement('foo')
1818        self.assertIsInstance(mye, ET.Element)
1819        self.assertIsInstance(mye, MyElement)
1820        self.assertEqual(mye.tag, 'foo')
1821
1822        # test that attribute assignment works (issue 14849)
1823        mye.text = "joe"
1824        self.assertEqual(mye.text, "joe")
1825
1826    @python_only
1827    def test_Element_subclass_constructor(self):
1828        class MyElement(ET.Element):
1829            def __init__(self, tag, attrib={}, **extra):
1830                super(MyElement, self).__init__(tag + '__', attrib, **extra)
1831
1832        mye = MyElement('foo', {'a': 1, 'b': 2}, c=3, d=4)
1833        self.assertEqual(mye.tag, 'foo__')
1834        self.assertEqual(sorted(mye.items()),
1835            [('a', 1), ('b', 2), ('c', 3), ('d', 4)])
1836
1837    @python_only
1838    def test_Element_subclass_new_method(self):
1839        class MyElement(ET.Element):
1840            def newmethod(self):
1841                return self.tag
1842
1843        mye = MyElement('joe')
1844        self.assertEqual(mye.newmethod(), 'joe')
1845
1846
1847class ElementFindTest(unittest.TestCase):
1848    @python_only
1849    def test_simplefind(self):
1850        ET.ElementPath
1851        with swap_attr(ET, 'ElementPath', ET._SimpleElementPath()):
1852            e = ET.XML(SAMPLE_XML)
1853            self.assertEqual(e.find('tag').tag, 'tag')
1854            self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
1855            self.assertEqual(e.findtext('tag'), 'text')
1856            self.assertIsNone(e.findtext('tog'))
1857            self.assertEqual(e.findtext('tog', 'default'), 'default')
1858            self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
1859            self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
1860            self.assertEqual(summarize_list(e.findall('.//tag')), ['tag', 'tag', 'tag'])
1861
1862            # Path syntax doesn't work in this case.
1863            self.assertIsNone(e.find('section/tag'))
1864            self.assertIsNone(e.findtext('section/tag'))
1865            self.assertEqual(summarize_list(e.findall('section/tag')), [])
1866
1867    def test_find_simple(self):
1868        e = ET.XML(SAMPLE_XML)
1869        self.assertEqual(e.find('tag').tag, 'tag')
1870        self.assertEqual(e.find('section/tag').tag, 'tag')
1871        self.assertEqual(e.find('./tag').tag, 'tag')
1872
1873        e[2] = ET.XML(SAMPLE_SECTION)
1874        self.assertEqual(e.find('section/nexttag').tag, 'nexttag')
1875
1876        self.assertEqual(e.findtext('./tag'), 'text')
1877        self.assertEqual(e.findtext('section/tag'), 'subtext')
1878
1879        # section/nexttag is found but has no text
1880        self.assertEqual(e.findtext('section/nexttag'), '')
1881        self.assertEqual(e.findtext('section/nexttag', 'default'), '')
1882
1883        # tog doesn't exist and 'default' kicks in
1884        self.assertIsNone(e.findtext('tog'))
1885        self.assertEqual(e.findtext('tog', 'default'), 'default')
1886
1887        # Issue #16922
1888        self.assertEqual(ET.XML('<tag><empty /></tag>').findtext('empty'), '')
1889
1890    def test_find_xpath(self):
1891        LINEAR_XML = '''
1892        <body>
1893            <tag class='a'/>
1894            <tag class='b'/>
1895            <tag class='c'/>
1896            <tag class='d'/>
1897        </body>'''
1898        e = ET.XML(LINEAR_XML)
1899
1900        # Test for numeric indexing and last()
1901        self.assertEqual(e.find('./tag[1]').attrib['class'], 'a')
1902        self.assertEqual(e.find('./tag[2]').attrib['class'], 'b')
1903        self.assertEqual(e.find('./tag[last()]').attrib['class'], 'd')
1904        self.assertEqual(e.find('./tag[last()-1]').attrib['class'], 'c')
1905        self.assertEqual(e.find('./tag[last()-2]').attrib['class'], 'b')
1906
1907    def test_findall(self):
1908        e = ET.XML(SAMPLE_XML)
1909        e[2] = ET.XML(SAMPLE_SECTION)
1910        self.assertEqual(summarize_list(e.findall('.')), ['body'])
1911        self.assertEqual(summarize_list(e.findall('tag')), ['tag', 'tag'])
1912        self.assertEqual(summarize_list(e.findall('tog')), [])
1913        self.assertEqual(summarize_list(e.findall('tog/foo')), [])
1914        self.assertEqual(summarize_list(e.findall('*')),
1915            ['tag', 'tag', 'section'])
1916        self.assertEqual(summarize_list(e.findall('.//tag')),
1917            ['tag'] * 4)
1918        self.assertEqual(summarize_list(e.findall('section/tag')), ['tag'])
1919        self.assertEqual(summarize_list(e.findall('section//tag')), ['tag'] * 2)
1920        self.assertEqual(summarize_list(e.findall('section/*')),
1921            ['tag', 'nexttag', 'nextsection'])
1922        self.assertEqual(summarize_list(e.findall('section//*')),
1923            ['tag', 'nexttag', 'nextsection', 'tag'])
1924        self.assertEqual(summarize_list(e.findall('section/.//*')),
1925            ['tag', 'nexttag', 'nextsection', 'tag'])
1926        self.assertEqual(summarize_list(e.findall('*/*')),
1927            ['tag', 'nexttag', 'nextsection'])
1928        self.assertEqual(summarize_list(e.findall('*//*')),
1929            ['tag', 'nexttag', 'nextsection', 'tag'])
1930        self.assertEqual(summarize_list(e.findall('*/tag')), ['tag'])
1931        self.assertEqual(summarize_list(e.findall('*/./tag')), ['tag'])
1932        self.assertEqual(summarize_list(e.findall('./tag')), ['tag'] * 2)
1933        self.assertEqual(summarize_list(e.findall('././tag')), ['tag'] * 2)
1934
1935        self.assertEqual(summarize_list(e.findall('.//tag[@class]')),
1936            ['tag'] * 3)
1937        self.assertEqual(summarize_list(e.findall('.//tag[@class="a"]')),
1938            ['tag'])
1939        self.assertEqual(summarize_list(e.findall('.//tag[@class="b"]')),
1940            ['tag'] * 2)
1941        self.assertEqual(summarize_list(e.findall('.//tag[@id]')),
1942            ['tag'])
1943        self.assertEqual(summarize_list(e.findall('.//section[tag]')),
1944            ['section'])
1945        self.assertEqual(summarize_list(e.findall('.//section[element]')), [])
1946        self.assertEqual(summarize_list(e.findall('../tag')), [])
1947        self.assertEqual(summarize_list(e.findall('section/../tag')),
1948            ['tag'] * 2)
1949        self.assertEqual(e.findall('section//'), e.findall('section//*'))
1950
1951    def test_test_find_with_ns(self):
1952        e = ET.XML(SAMPLE_XML_NS)
1953        self.assertEqual(summarize_list(e.findall('tag')), [])
1954        self.assertEqual(
1955            summarize_list(e.findall("{http://effbot.org/ns}tag")),
1956            ['{http://effbot.org/ns}tag'] * 2)
1957        self.assertEqual(
1958            summarize_list(e.findall(".//{http://effbot.org/ns}tag")),
1959            ['{http://effbot.org/ns}tag'] * 3)
1960
1961    def test_bad_find(self):
1962        e = ET.XML(SAMPLE_XML)
1963        with self.assertRaisesRegexp(SyntaxError,
1964                                     'cannot use absolute path on element'):
1965            e.findall('/tag')
1966
1967    def test_find_through_ElementTree(self):
1968        e = ET.XML(SAMPLE_XML)
1969        self.assertEqual(ET.ElementTree(e).find('tag').tag, 'tag')
1970        self.assertEqual(ET.ElementTree(e).find('./tag').tag, 'tag')
1971        # this produces a warning
1972        msg = ("This search is broken in 1.3 and earlier, and will be fixed "
1973               "in a future version.  If you rely on the current behaviour, "
1974               "change it to '.+'")
1975        with support.check_warnings((msg, FutureWarning)):
1976            self.assertEqual(ET.ElementTree(e).find('/tag').tag, 'tag')
1977        e[2] = ET.XML(SAMPLE_SECTION)
1978        self.assertEqual(ET.ElementTree(e).find('section/tag').tag, 'tag')
1979        self.assertIsNone(ET.ElementTree(e).find('tog'))
1980        self.assertIsNone(ET.ElementTree(e).find('tog/foo'))
1981
1982        self.assertEqual(ET.ElementTree(e).findtext('tag'), 'text')
1983        self.assertIsNone(ET.ElementTree(e).findtext('tog/foo'))
1984        self.assertEqual(ET.ElementTree(e).findtext('tog/foo', 'default'),
1985             'default')
1986        self.assertEqual(ET.ElementTree(e).findtext('./tag'), 'text')
1987        with support.check_warnings((msg, FutureWarning)):
1988            self.assertEqual(ET.ElementTree(e).findtext('/tag'), 'text')
1989        self.assertEqual(ET.ElementTree(e).findtext('section/tag'), 'subtext')
1990
1991        self.assertEqual(summarize_list(ET.ElementTree(e).findall('./tag')),
1992            ['tag'] * 2)
1993        with support.check_warnings((msg, FutureWarning)):
1994            it = ET.ElementTree(e).findall('/tag')
1995        self.assertEqual(summarize_list(it), ['tag'] * 2)
1996
1997
1998class ElementIterTest(unittest.TestCase):
1999    def _ilist(self, elem, tag=None):
2000        return summarize_list(elem.iter(tag))
2001
2002    def test_basic(self):
2003        doc = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
2004        self.assertEqual(self._ilist(doc), ['html', 'body', 'i'])
2005        self.assertEqual(self._ilist(doc.find('body')), ['body', 'i'])
2006        self.assertEqual(next(doc.iter()).tag, 'html')
2007        self.assertEqual(''.join(doc.itertext()), 'this is a paragraph...')
2008        self.assertEqual(''.join(doc.find('body').itertext()),
2009            'this is a paragraph.')
2010        self.assertEqual(next(doc.itertext()), 'this is a ')
2011
2012        # Method iterparse should return an iterator. See bug 6472.
2013        sourcefile = serialize(doc, to_string=False)
2014        self.assertEqual(next(ET.iterparse(sourcefile))[0], 'end')
2015
2016        if ET is pyET:
2017            # With an explitit parser too (issue #9708)
2018            sourcefile = serialize(doc, to_string=False)
2019            parser = ET.XMLParser(target=ET.TreeBuilder())
2020            self.assertEqual(next(ET.iterparse(sourcefile, parser=parser))[0],
2021                             'end')
2022
2023        tree = ET.ElementTree(None)
2024        self.assertRaises(AttributeError, tree.iter)
2025
2026        # Issue #16913
2027        doc = ET.XML("<root>a&amp;<sub>b&amp;</sub>c&amp;</root>")
2028        self.assertEqual(''.join(doc.itertext()), 'a&b&c&')
2029
2030    def test_corners(self):
2031        # single root, no subelements
2032        a = ET.Element('a')
2033        self.assertEqual(self._ilist(a), ['a'])
2034
2035        # one child
2036        b = ET.SubElement(a, 'b')
2037        self.assertEqual(self._ilist(a), ['a', 'b'])
2038
2039        # one child and one grandchild
2040        c = ET.SubElement(b, 'c')
2041        self.assertEqual(self._ilist(a), ['a', 'b', 'c'])
2042
2043        # two children, only first with grandchild
2044        d = ET.SubElement(a, 'd')
2045        self.assertEqual(self._ilist(a), ['a', 'b', 'c', 'd'])
2046
2047        # replace first child by second
2048        a[0] = a[1]
2049        del a[1]
2050        self.assertEqual(self._ilist(a), ['a', 'd'])
2051
2052    def test_iter_by_tag(self):
2053        doc = ET.XML('''
2054            <document>
2055                <house>
2056                    <room>bedroom1</room>
2057                    <room>bedroom2</room>
2058                </house>
2059                <shed>nothing here
2060                </shed>
2061                <house>
2062                    <room>bedroom8</room>
2063                </house>
2064            </document>''')
2065
2066        self.assertEqual(self._ilist(doc, 'room'), ['room'] * 3)
2067        self.assertEqual(self._ilist(doc, 'house'), ['house'] * 2)
2068
2069        if ET is pyET:
2070            # test that iter also accepts 'tag' as a keyword arg
2071            self.assertEqual(
2072                summarize_list(doc.iter(tag='room')),
2073                ['room'] * 3)
2074
2075        # make sure both tag=None and tag='*' return all tags
2076        all_tags = ['document', 'house', 'room', 'room',
2077                    'shed', 'house', 'room']
2078        self.assertEqual(summarize_list(doc.iter()), all_tags)
2079        self.assertEqual(self._ilist(doc), all_tags)
2080        self.assertEqual(self._ilist(doc, '*'), all_tags)
2081
2082    def test_getiterator(self):
2083        # Element.getiterator() is deprecated.
2084        if sys.py3kwarning or ET is pyET:
2085            with support.check_warnings(("This method will be removed in future versions.  "
2086                                         "Use .+ instead.", PendingDeprecationWarning)):
2087                self._test_getiterator()
2088        else:
2089            self._test_getiterator()
2090
2091    def _test_getiterator(self):
2092        doc = ET.XML('''
2093            <document>
2094                <house>
2095                    <room>bedroom1</room>
2096                    <room>bedroom2</room>
2097                </house>
2098                <shed>nothing here
2099                </shed>
2100                <house>
2101                    <room>bedroom8</room>
2102                </house>
2103            </document>''')
2104
2105        self.assertEqual(summarize_list(doc.getiterator('room')),
2106                         ['room'] * 3)
2107        self.assertEqual(summarize_list(doc.getiterator('house')),
2108                         ['house'] * 2)
2109
2110        if ET is pyET:
2111            # test that getiterator also accepts 'tag' as a keyword arg
2112            self.assertEqual(
2113                summarize_list(doc.getiterator(tag='room')),
2114                ['room'] * 3)
2115
2116        # make sure both tag=None and tag='*' return all tags
2117        all_tags = ['document', 'house', 'room', 'room',
2118                    'shed', 'house', 'room']
2119        self.assertEqual(summarize_list(doc.getiterator()), all_tags)
2120        self.assertEqual(summarize_list(doc.getiterator(None)), all_tags)
2121        self.assertEqual(summarize_list(doc.getiterator('*')), all_tags)
2122
2123    def test_copy(self):
2124        a = ET.Element('a')
2125        it = a.iter()
2126        with self.assertRaises(TypeError):
2127            copy.copy(it)
2128
2129    def test_pickle(self):
2130        a = ET.Element('a')
2131        it = a.iter()
2132        for proto in range(pickle.HIGHEST_PROTOCOL + 1):
2133            with self.assertRaises((TypeError, pickle.PicklingError)):
2134                pickle.dumps(it, proto)
2135
2136
2137class TreeBuilderTest(unittest.TestCase):
2138    sample1 = ('<!DOCTYPE html PUBLIC'
2139        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2140        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2141        '<html>text<div>subtext</div>tail</html>')
2142
2143    sample2 = '''<toplevel>sometext</toplevel>'''
2144
2145    def _check_sample1_element(self, e):
2146        self.assertEqual(e.tag, 'html')
2147        self.assertEqual(e.text, 'text')
2148        self.assertEqual(e.tail, None)
2149        self.assertEqual(e.attrib, {})
2150        children = list(e)
2151        self.assertEqual(len(children), 1)
2152        child = children[0]
2153        self.assertEqual(child.tag, 'div')
2154        self.assertEqual(child.text, 'subtext')
2155        self.assertEqual(child.tail, 'tail')
2156        self.assertEqual(child.attrib, {})
2157
2158    def test_dummy_builder(self):
2159        class DummyBuilder:
2160            data = start = end = lambda *a: None
2161
2162            def close(self):
2163                return 42
2164
2165        parser = ET.XMLParser(target=DummyBuilder())
2166        parser.feed(self.sample1)
2167        self.assertEqual(parser.close(), 42)
2168
2169    @python_only
2170    def test_treebuilder_elementfactory_none(self):
2171        parser = ET.XMLParser(target=ET.TreeBuilder(element_factory=None))
2172        parser.feed(self.sample1)
2173        e = parser.close()
2174        self._check_sample1_element(e)
2175
2176    @python_only
2177    def test_subclass(self):
2178        class MyTreeBuilder(ET.TreeBuilder):
2179            def foobar(self, x):
2180                return x * 2
2181
2182        tb = MyTreeBuilder()
2183        self.assertEqual(tb.foobar(10), 20)
2184
2185        parser = ET.XMLParser(target=tb)
2186        parser.feed(self.sample1)
2187
2188        e = parser.close()
2189        self._check_sample1_element(e)
2190
2191    @python_only
2192    def test_element_factory(self):
2193        lst = []
2194        def myfactory(tag, attrib):
2195            lst.append(tag)
2196            return ET.Element(tag, attrib)
2197
2198        tb = ET.TreeBuilder(element_factory=myfactory)
2199        parser = ET.XMLParser(target=tb)
2200        parser.feed(self.sample2)
2201        parser.close()
2202
2203        self.assertEqual(lst, ['toplevel'])
2204
2205    @python_only
2206    def test_element_factory_subclass(self):
2207        class MyElement(ET.Element):
2208            pass
2209
2210        tb = ET.TreeBuilder(element_factory=MyElement)
2211
2212        parser = ET.XMLParser(target=tb)
2213        parser.feed(self.sample1)
2214        e = parser.close()
2215        self.assertIsInstance(e, MyElement)
2216        self._check_sample1_element(e)
2217
2218
2219    @python_only
2220    def test_doctype(self):
2221        class DoctypeParser:
2222            _doctype = None
2223
2224            def doctype(self, name, pubid, system):
2225                self._doctype = (name, pubid, system)
2226
2227            data = start = end = lambda *a: None
2228
2229            def close(self):
2230                return self._doctype
2231
2232        parser = ET.XMLParser(target=DoctypeParser())
2233        parser.feed(self.sample1)
2234
2235        self.assertEqual(parser.close(),
2236            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2237             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
2238
2239    @cet_only  # PyET does not look up the attributes in XMLParser().__init__()
2240    def test_builder_lookup_errors(self):
2241        class RaisingBuilder(object):
2242            def __init__(self, raise_in=None, what=ValueError):
2243                self.raise_in = raise_in
2244                self.what = what
2245
2246            def __getattr__(self, name):
2247                if name == self.raise_in:
2248                    raise self.what(self.raise_in)
2249                def handle(*args):
2250                    pass
2251                return handle
2252
2253        ET.XMLParser(target=RaisingBuilder())
2254        # cET also checks for 'close' and 'doctype', PyET does it only at need
2255        for event in ('start', 'data', 'end', 'comment', 'pi'):
2256            with self.assertRaises(ValueError):
2257                ET.XMLParser(target=RaisingBuilder(event))
2258
2259        ET.XMLParser(target=RaisingBuilder(what=AttributeError))
2260        for event in ('start', 'data', 'end', 'comment', 'pi'):
2261            parser = ET.XMLParser(target=RaisingBuilder(event, what=AttributeError))
2262            parser.feed(self.sample1)
2263            self.assertIsNone(parser.close())
2264
2265
2266class XMLParserTest(unittest.TestCase):
2267    sample1 = b'<file><line>22</line></file>'
2268    sample2 = (b'<!DOCTYPE html PUBLIC'
2269        b' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
2270        b' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
2271        b'<html>text</html>')
2272
2273    def _check_sample_element(self, e):
2274        self.assertEqual(e.tag, 'file')
2275        self.assertEqual(e[0].tag, 'line')
2276        self.assertEqual(e[0].text, '22')
2277
2278    @python_only
2279    def test_constructor_args(self):
2280        # Positional args. The first (html) is not supported, but should be
2281        # nevertheless correctly accepted.
2282        with support.check_py3k_warnings((r'.*\bhtml\b', DeprecationWarning)):
2283            parser = ET.XMLParser(None, ET.TreeBuilder(), 'utf-8')
2284        parser.feed(self.sample1)
2285        self._check_sample_element(parser.close())
2286
2287        # Now as keyword args.
2288        parser2 = ET.XMLParser(encoding='utf-8',
2289                               target=ET.TreeBuilder())
2290        parser2.feed(self.sample1)
2291        self._check_sample_element(parser2.close())
2292
2293    @python_only
2294    def test_subclass(self):
2295        class MyParser(ET.XMLParser):
2296            pass
2297        parser = MyParser()
2298        parser.feed(self.sample1)
2299        self._check_sample_element(parser.close())
2300
2301    @python_only
2302    def test_doctype_warning(self):
2303        parser = ET.XMLParser()
2304        with support.check_warnings(('', DeprecationWarning)):
2305            parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2306                'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')
2307        parser.feed('<html/>')
2308        parser.close()
2309
2310    @python_only
2311    def test_subclass_doctype(self):
2312        _doctype = []
2313        class MyParserWithDoctype(ET.XMLParser):
2314            def doctype(self, name, pubid, system):
2315                _doctype.append((name, pubid, system))
2316
2317        parser = MyParserWithDoctype()
2318        with support.check_warnings(('', DeprecationWarning)):
2319            parser.feed(self.sample2)
2320        parser.close()
2321        self.assertEqual(_doctype,
2322            [('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2323              'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')])
2324
2325        _doctype = []
2326        _doctype2 = []
2327        with warnings.catch_warnings():
2328            warnings.simplefilter('error', DeprecationWarning)
2329            class DoctypeParser:
2330                data = start = end = close = lambda *a: None
2331
2332                def doctype(self, name, pubid, system):
2333                    _doctype2.append((name, pubid, system))
2334
2335            parser = MyParserWithDoctype(target=DoctypeParser())
2336            parser.feed(self.sample2)
2337            parser.close()
2338            self.assertEqual(_doctype, [])
2339            self.assertEqual(_doctype2,
2340                [('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
2341                  'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')])
2342
2343
2344class NamespaceParseTest(unittest.TestCase):
2345    def test_find_with_namespace(self):
2346        nsmap = {'h': 'hello', 'f': 'foo'}
2347        doc = ET.fromstring(SAMPLE_XML_NS_ELEMS)
2348
2349        self.assertEqual(len(doc.findall('{hello}table', nsmap)), 1)
2350        self.assertEqual(len(doc.findall('.//{hello}td', nsmap)), 2)
2351        self.assertEqual(len(doc.findall('.//{foo}name', nsmap)), 1)
2352
2353
2354class ElementSlicingTest(unittest.TestCase):
2355    def _elem_tags(self, elemlist):
2356        return [e.tag for e in elemlist]
2357
2358    def _subelem_tags(self, elem):
2359        return self._elem_tags(list(elem))
2360
2361    def _make_elem_with_children(self, numchildren):
2362        """Create an Element with a tag 'a', with the given amount of children
2363           named 'a0', 'a1' ... and so on.
2364
2365        """
2366        e = ET.Element('a')
2367        for i in range(numchildren):
2368            ET.SubElement(e, 'a%s' % i)
2369        return e
2370
2371    def test_getslice_single_index(self):
2372        e = self._make_elem_with_children(10)
2373
2374        self.assertEqual(e[1].tag, 'a1')
2375        self.assertEqual(e[-2].tag, 'a8')
2376
2377        self.assertRaises(IndexError, lambda: e[12])
2378        self.assertRaises(IndexError, lambda: e[-12])
2379
2380    def test_getslice_range(self):
2381        e = self._make_elem_with_children(6)
2382
2383        self.assertEqual(self._elem_tags(e[3:]), ['a3', 'a4', 'a5'])
2384        self.assertEqual(self._elem_tags(e[3:6]), ['a3', 'a4', 'a5'])
2385        self.assertEqual(self._elem_tags(e[3:16]), ['a3', 'a4', 'a5'])
2386        self.assertEqual(self._elem_tags(e[3:5]), ['a3', 'a4'])
2387        self.assertEqual(self._elem_tags(e[3:-1]), ['a3', 'a4'])
2388        self.assertEqual(self._elem_tags(e[:2]), ['a0', 'a1'])
2389
2390    def test_getslice_steps(self):
2391        e = self._make_elem_with_children(10)
2392
2393        self.assertEqual(self._elem_tags(e[8:10:1]), ['a8', 'a9'])
2394        self.assertEqual(self._elem_tags(e[::3]), ['a0', 'a3', 'a6', 'a9'])
2395        self.assertEqual(self._elem_tags(e[::8]), ['a0', 'a8'])
2396        self.assertEqual(self._elem_tags(e[1::8]), ['a1', 'a9'])
2397        self.assertEqual(self._elem_tags(e[3::sys.maxsize]), ['a3'])
2398        self.assertEqual(self._elem_tags(e[3::sys.maxsize<<64]), ['a3'])
2399
2400    def test_getslice_negative_steps(self):
2401        e = self._make_elem_with_children(4)
2402
2403        self.assertEqual(self._elem_tags(e[::-1]), ['a3', 'a2', 'a1', 'a0'])
2404        self.assertEqual(self._elem_tags(e[::-2]), ['a3', 'a1'])
2405        self.assertEqual(self._elem_tags(e[3::-sys.maxsize]), ['a3'])
2406        self.assertEqual(self._elem_tags(e[3::-sys.maxsize-1]), ['a3'])
2407        self.assertEqual(self._elem_tags(e[3::-sys.maxsize<<64]), ['a3'])
2408
2409    def test_delslice(self):
2410        e = self._make_elem_with_children(4)
2411        del e[0:2]
2412        self.assertEqual(self._subelem_tags(e), ['a2', 'a3'])
2413
2414        e = self._make_elem_with_children(4)
2415        del e[0:]
2416        self.assertEqual(self._subelem_tags(e), [])
2417
2418        if ET is pyET:
2419            e = self._make_elem_with_children(4)
2420            del e[::-1]
2421            self.assertEqual(self._subelem_tags(e), [])
2422
2423            e = self._make_elem_with_children(4)
2424            del e[::-2]
2425            self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2426
2427            e = self._make_elem_with_children(4)
2428            del e[1::2]
2429            self.assertEqual(self._subelem_tags(e), ['a0', 'a2'])
2430
2431            e = self._make_elem_with_children(2)
2432            del e[::2]
2433            self.assertEqual(self._subelem_tags(e), ['a1'])
2434
2435    def test_setslice_single_index(self):
2436        e = self._make_elem_with_children(4)
2437        e[1] = ET.Element('b')
2438        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2439
2440        e[-2] = ET.Element('c')
2441        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
2442
2443        with self.assertRaises(IndexError):
2444            e[5] = ET.Element('d')
2445        with self.assertRaises(IndexError):
2446            e[-5] = ET.Element('d')
2447        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'c', 'a3'])
2448
2449    def test_setslice_range(self):
2450        e = self._make_elem_with_children(4)
2451        e[1:3] = [ET.Element('b%s' % i) for i in range(2)]
2452        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'a3'])
2453
2454        e = self._make_elem_with_children(4)
2455        e[1:3] = [ET.Element('b')]
2456        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a3'])
2457
2458        e = self._make_elem_with_children(4)
2459        e[1:3] = [ET.Element('b%s' % i) for i in range(3)]
2460        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'b1', 'b2', 'a3'])
2461
2462    def test_setslice_steps(self):
2463        e = self._make_elem_with_children(6)
2464        e[1:5:2] = [ET.Element('b%s' % i) for i in range(2)]
2465        self.assertEqual(self._subelem_tags(e), ['a0', 'b0', 'a2', 'b1', 'a4', 'a5'])
2466
2467        e = self._make_elem_with_children(6)
2468        with self.assertRaises(ValueError):
2469            e[1:5:2] = [ET.Element('b')]
2470        with self.assertRaises(ValueError):
2471            e[1:5:2] = [ET.Element('b%s' % i) for i in range(3)]
2472        with self.assertRaises(ValueError):
2473            e[1:5:2] = []
2474        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3', 'a4', 'a5'])
2475
2476        e = self._make_elem_with_children(4)
2477        e[1::sys.maxsize] = [ET.Element('b')]
2478        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2479        e[1::sys.maxsize<<64] = [ET.Element('c')]
2480        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
2481
2482    def test_setslice_negative_steps(self):
2483        e = self._make_elem_with_children(4)
2484        e[2:0:-1] = [ET.Element('b%s' % i) for i in range(2)]
2485        self.assertEqual(self._subelem_tags(e), ['a0', 'b1', 'b0', 'a3'])
2486
2487        e = self._make_elem_with_children(4)
2488        with self.assertRaises(ValueError):
2489            e[2:0:-1] = [ET.Element('b')]
2490        with self.assertRaises(ValueError):
2491            e[2:0:-1] = [ET.Element('b%s' % i) for i in range(3)]
2492        with self.assertRaises(ValueError):
2493            e[2:0:-1] = []
2494        self.assertEqual(self._subelem_tags(e), ['a0', 'a1', 'a2', 'a3'])
2495
2496        e = self._make_elem_with_children(4)
2497        e[1::-sys.maxsize] = [ET.Element('b')]
2498        self.assertEqual(self._subelem_tags(e), ['a0', 'b', 'a2', 'a3'])
2499        e[1::-sys.maxsize-1] = [ET.Element('c')]
2500        self.assertEqual(self._subelem_tags(e), ['a0', 'c', 'a2', 'a3'])
2501        e[1::-sys.maxsize<<64] = [ET.Element('d')]
2502        self.assertEqual(self._subelem_tags(e), ['a0', 'd', 'a2', 'a3'])
2503
2504
2505class IOTest(unittest.TestCase):
2506    def tearDown(self):
2507        support.unlink(TESTFN)
2508
2509    def test_encoding(self):
2510        # Test encoding issues.
2511        elem = ET.Element("tag")
2512        elem.text = u"abc"
2513        self.assertEqual(serialize(elem), '<tag>abc</tag>')
2514        self.assertEqual(serialize(elem, encoding="utf-8"),
2515                '<tag>abc</tag>')
2516        self.assertEqual(serialize(elem, encoding="us-ascii"),
2517                '<tag>abc</tag>')
2518        self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2519                "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2520                "<tag>abc</tag>")
2521
2522        elem = ET.Element("tag")
2523        elem.text = "<&\"\'>"
2524        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
2525        self.assertEqual(serialize(elem, encoding="utf-8"),
2526                b'<tag>&lt;&amp;"\'&gt;</tag>')
2527        self.assertEqual(serialize(elem, encoding="us-ascii"),
2528                b'<tag>&lt;&amp;"\'&gt;</tag>')
2529        self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2530                "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2531                "<tag>&lt;&amp;\"'&gt;</tag>")
2532
2533        elem = ET.Element("tag")
2534        elem.attrib["key"] = "<&\"\'>"
2535        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" />')
2536        self.assertEqual(serialize(elem, encoding="utf-8"),
2537                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2538        self.assertEqual(serialize(elem, encoding="us-ascii"),
2539                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
2540        self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2541                "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2542                "<tag key=\"&lt;&amp;&quot;'&gt;\" />")
2543
2544        elem = ET.Element("tag")
2545        elem.text = u'\xe5\xf6\xf6<>'
2546        self.assertEqual(serialize(elem),
2547                '<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
2548        self.assertEqual(serialize(elem, encoding="utf-8"),
2549                '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
2550        self.assertEqual(serialize(elem, encoding="us-ascii"),
2551                '<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
2552        self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2553                "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2554                "<tag>\xe5\xf6\xf6&lt;&gt;</tag>")
2555
2556        elem = ET.Element("tag")
2557        elem.attrib["key"] = u'\xe5\xf6\xf6<>'
2558        self.assertEqual(serialize(elem),
2559                '<tag key="&#229;&#246;&#246;&lt;&gt;" />')
2560        self.assertEqual(serialize(elem, encoding="utf-8"),
2561                '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
2562        self.assertEqual(serialize(elem, encoding="us-ascii"),
2563                '<tag key="&#229;&#246;&#246;&lt;&gt;" />')
2564        self.assertEqual(serialize(elem, encoding="iso-8859-1"),
2565                "<?xml version='1.0' encoding='iso-8859-1'?>\n"
2566                "<tag key=\"\xe5\xf6\xf6&lt;&gt;\" />")
2567
2568    def test_write_to_filename(self):
2569        tree = ET.ElementTree(ET.XML('''<site />'''))
2570        tree.write(TESTFN)
2571        with open(TESTFN, 'rb') as f:
2572            self.assertEqual(f.read(), b'''<site />''')
2573
2574    def test_write_to_file(self):
2575        tree = ET.ElementTree(ET.XML('''<site />'''))
2576        with open(TESTFN, 'wb') as f:
2577            tree.write(f)
2578            self.assertFalse(f.closed)
2579        with open(TESTFN, 'rb') as f:
2580            self.assertEqual(f.read(), b'''<site />''')
2581
2582    def test_read_from_stringio(self):
2583        tree = ET.ElementTree()
2584        stream = StringIO.StringIO('''<?xml version="1.0"?><site></site>''')
2585        tree.parse(stream)
2586        self.assertEqual(tree.getroot().tag, 'site')
2587
2588    def test_write_to_stringio(self):
2589        tree = ET.ElementTree(ET.XML('''<site />'''))
2590        stream = StringIO.StringIO()
2591        tree.write(stream)
2592        self.assertEqual(stream.getvalue(), '''<site />''')
2593
2594    class dummy:
2595        pass
2596
2597    def test_read_from_user_reader(self):
2598        stream = StringIO.StringIO('''<?xml version="1.0"?><site></site>''')
2599        reader = self.dummy()
2600        reader.read = stream.read
2601        tree = ET.ElementTree()
2602        tree.parse(reader)
2603        self.assertEqual(tree.getroot().tag, 'site')
2604
2605    def test_write_to_user_writer(self):
2606        tree = ET.ElementTree(ET.XML('''<site />'''))
2607        stream = StringIO.StringIO()
2608        writer = self.dummy()
2609        writer.write = stream.write
2610        tree.write(writer)
2611        self.assertEqual(stream.getvalue(), '''<site />''')
2612
2613    def test_tostringlist_invariant(self):
2614        root = ET.fromstring('<tag>foo</tag>')
2615        self.assertEqual(
2616            ET.tostring(root),
2617            ''.join(ET.tostringlist(root)))
2618        self.assertEqual(
2619            ET.tostring(root, 'utf-16'),
2620            b''.join(ET.tostringlist(root, 'utf-16')))
2621
2622
2623class ParseErrorTest(unittest.TestCase):
2624    def test_subclass(self):
2625        self.assertIsInstance(ET.ParseError(), SyntaxError)
2626
2627    def _get_error(self, s):
2628        try:
2629            ET.fromstring(s)
2630        except ET.ParseError as e:
2631            return e
2632
2633    def test_error_position(self):
2634        self.assertEqual(self._get_error('foo').position, (1, 0))
2635        self.assertEqual(self._get_error('<tag>&foo;</tag>').position, (1, 5))
2636        self.assertEqual(self._get_error('foobar<').position, (1, 6))
2637
2638    @python_only
2639    def test_error_code(self):
2640        from xml.parsers import expat
2641        self.assertEqual(expat.ErrorString(self._get_error('foo').code),
2642                         expat.errors.XML_ERROR_SYNTAX)
2643
2644
2645class KeywordArgsTest(unittest.TestCase):
2646    # Test various issues with keyword arguments passed to ET.Element
2647    # constructor and methods
2648    def test_issue14818(self):
2649        x = ET.XML("<a>foo</a>")
2650        self.assertEqual(x.find('a', None),
2651                         x.find(path='a', namespaces=None))
2652        self.assertEqual(x.findtext('a', None, None),
2653                         x.findtext(path='a', default=None, namespaces=None))
2654        self.assertEqual(x.findall('a', None),
2655                         x.findall(path='a', namespaces=None))
2656        self.assertEqual(list(x.iterfind('a', None)),
2657                         list(x.iterfind(path='a', namespaces=None)))
2658
2659        self.assertEqual(ET.Element('a').attrib, {})
2660        elements = [
2661            ET.Element('a', dict(href="#", id="foo")),
2662            ET.Element('a', attrib=dict(href="#", id="foo")),
2663            ET.Element('a', dict(href="#"), id="foo"),
2664            ET.Element('a', href="#", id="foo"),
2665            ET.Element('a', dict(href="#", id="foo"), href="#", id="foo"),
2666        ]
2667        for e in elements:
2668            self.assertEqual(e.tag, 'a')
2669            self.assertEqual(e.attrib, dict(href="#", id="foo"))
2670
2671        e2 = ET.SubElement(elements[0], 'foobar', attrib={'key1': 'value1'})
2672        self.assertEqual(e2.attrib['key1'], 'value1')
2673
2674        with self.assertRaisesRegexp(TypeError, 'must be dict, not str'):
2675            ET.Element('a', "I'm not a dict")
2676        with self.assertRaisesRegexp(TypeError, 'must be dict, not str'):
2677            ET.Element('a', attrib="I'm not a dict")
2678
2679# --------------------------------------------------------------------
2680
2681class NoAcceleratorTest(unittest.TestCase):
2682    def setUp(self):
2683        if ET is not pyET:
2684            raise unittest.SkipTest('only for the Python version')
2685
2686    # Test that the C accelerator was not imported for pyET
2687    def test_correct_import_pyET(self):
2688        # The type of methods defined in Python code is types.FunctionType,
2689        # while the type of methods defined inside _elementtree is
2690        # <class 'wrapper_descriptor'>
2691        self.assertIsInstance(pyET.Element.__init__, types.FunctionType)
2692        self.assertIsInstance(pyET.XMLParser.__init__, types.FunctionType)
2693
2694# --------------------------------------------------------------------
2695
2696
2697def test_main(module=None):
2698    # When invoked without a module, runs the Python ET tests by loading pyET.
2699    # Otherwise, uses the given module as the ET.
2700    if module is None:
2701        module = pyET
2702
2703    global ET
2704    ET = module
2705
2706    test_classes = [
2707        ModuleTest,
2708        ElementSlicingTest,
2709        BasicElementTest,
2710        BadElementTest,
2711        BadElementPathTest,
2712        ElementTreeTest,
2713        IOTest,
2714        ParseErrorTest,
2715        XIncludeTest,
2716        ElementTreeTypeTest,
2717        ElementFindTest,
2718        ElementIterTest,
2719        TreeBuilderTest,
2720        XMLParserTest,
2721        BugsTest,
2722        ]
2723
2724    # These tests will only run for the pure-Python version that doesn't import
2725    # _elementtree. We can't use skipUnless here, because pyET is filled in only
2726    # after the module is loaded.
2727    if pyET is not ET:
2728        test_classes.extend([
2729            NoAcceleratorTest,
2730            ])
2731
2732    # Provide default namespace mapping and path cache.
2733    from xml.etree import ElementPath
2734    nsmap = pyET._namespace_map
2735    # Copy the default namespace mapping
2736    nsmap_copy = nsmap.copy()
2737    # Copy the path cache (should be empty)
2738    path_cache = ElementPath._cache
2739    ElementPath._cache = path_cache.copy()
2740    try:
2741        support.run_unittest(*test_classes)
2742    finally:
2743        from xml.etree import ElementPath
2744        # Restore mapping and path cache
2745        nsmap.clear()
2746        nsmap.update(nsmap_copy)
2747        ElementPath._cache = path_cache
2748        # don't interfere with subsequent tests
2749        ET = None
2750
2751
2752if __name__ == '__main__':
2753    test_main()
2754