• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# xml.etree test.  This file contains enough tests to make sure that
2# all included components work as they should.
3# Large parts are extracted from the upstream test suite.
4
5# IMPORTANT: the same doctests are run from "test_xml_etree_c" in
6# order to ensure consistency between the C implementation and the
7# Python implementation.
8#
9# For this purpose, the module-level "ET" symbol is temporarily
10# monkey-patched when running the "test_xml_etree_c" test suite.
11# Don't re-import "xml.etree.ElementTree" module in the docstring,
12# except if the test is specific to the Python implementation.
13
14import sys
15import cgi
16
17from test import test_support
18from test.test_support import findfile
19
20from xml.etree import ElementTree as ET
21
22SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
23SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
24
25SAMPLE_XML = """\
26<body>
27  <tag class='a'>text</tag>
28  <tag class='b' />
29  <section>
30    <tag class='b' id='inner'>subtext</tag>
31  </section>
32</body>
33"""
34
35SAMPLE_SECTION = """\
36<section>
37  <tag class='b' id='inner'>subtext</tag>
38  <nexttag />
39  <nextsection>
40    <tag />
41  </nextsection>
42</section>
43"""
44
45SAMPLE_XML_NS = """
46<body xmlns="http://effbot.org/ns">
47  <tag>text</tag>
48  <tag />
49  <section>
50    <tag>subtext</tag>
51  </section>
52</body>
53"""
54
55
56def sanity():
57    """
58    Import sanity.
59
60    >>> from xml.etree import ElementTree
61    >>> from xml.etree import ElementInclude
62    >>> from xml.etree import ElementPath
63    """
64
65def check_method(method):
66    if not hasattr(method, '__call__'):
67        print method, "not callable"
68
69def serialize(elem, to_string=True, **options):
70    import StringIO
71    file = StringIO.StringIO()
72    tree = ET.ElementTree(elem)
73    tree.write(file, **options)
74    if to_string:
75        return file.getvalue()
76    else:
77        file.seek(0)
78        return file
79
80def summarize(elem):
81    if elem.tag == ET.Comment:
82        return "<Comment>"
83    return elem.tag
84
85def summarize_list(seq):
86    return [summarize(elem) for elem in seq]
87
88def normalize_crlf(tree):
89    for elem in tree.iter():
90        if elem.text:
91            elem.text = elem.text.replace("\r\n", "\n")
92        if elem.tail:
93            elem.tail = elem.tail.replace("\r\n", "\n")
94
95def check_string(string):
96    len(string)
97    for char in string:
98        if len(char) != 1:
99            print "expected one-character string, got %r" % char
100    new_string = string + ""
101    new_string = string + " "
102    string[:0]
103
104def check_mapping(mapping):
105    len(mapping)
106    keys = mapping.keys()
107    items = mapping.items()
108    for key in keys:
109        item = mapping[key]
110    mapping["key"] = "value"
111    if mapping["key"] != "value":
112        print "expected value string, got %r" % mapping["key"]
113
114def check_element(element):
115    if not ET.iselement(element):
116        print "not an element"
117    if not hasattr(element, "tag"):
118        print "no tag member"
119    if not hasattr(element, "attrib"):
120        print "no attrib member"
121    if not hasattr(element, "text"):
122        print "no text member"
123    if not hasattr(element, "tail"):
124        print "no tail member"
125
126    check_string(element.tag)
127    check_mapping(element.attrib)
128    if element.text is not None:
129        check_string(element.text)
130    if element.tail is not None:
131        check_string(element.tail)
132    for elem in element:
133        check_element(elem)
134
135# --------------------------------------------------------------------
136# element tree tests
137
138def interface():
139    r"""
140    Test element tree interface.
141
142    >>> element = ET.Element("tag")
143    >>> check_element(element)
144    >>> tree = ET.ElementTree(element)
145    >>> check_element(tree.getroot())
146
147    >>> element = ET.Element("t\xe4g", key="value")
148    >>> tree = ET.ElementTree(element)
149    >>> repr(element)   # doctest: +ELLIPSIS
150    "<Element 't\\xe4g' at 0x...>"
151    >>> element = ET.Element("tag", key="value")
152
153    Make sure all standard element methods exist.
154
155    >>> check_method(element.append)
156    >>> check_method(element.extend)
157    >>> check_method(element.insert)
158    >>> check_method(element.remove)
159    >>> check_method(element.getchildren)
160    >>> check_method(element.find)
161    >>> check_method(element.iterfind)
162    >>> check_method(element.findall)
163    >>> check_method(element.findtext)
164    >>> check_method(element.clear)
165    >>> check_method(element.get)
166    >>> check_method(element.set)
167    >>> check_method(element.keys)
168    >>> check_method(element.items)
169    >>> check_method(element.iter)
170    >>> check_method(element.itertext)
171    >>> check_method(element.getiterator)
172
173    These methods return an iterable. See bug 6472.
174
175    >>> check_method(element.iter("tag").next)
176    >>> check_method(element.iterfind("tag").next)
177    >>> check_method(element.iterfind("*").next)
178    >>> check_method(tree.iter("tag").next)
179    >>> check_method(tree.iterfind("tag").next)
180    >>> check_method(tree.iterfind("*").next)
181
182    These aliases are provided:
183
184    >>> assert ET.XML == ET.fromstring
185    >>> assert ET.PI == ET.ProcessingInstruction
186    >>> assert ET.XMLParser == ET.XMLTreeBuilder
187    """
188
189def simpleops():
190    """
191    Basic method sanity checks.
192
193    >>> elem = ET.XML("<body><tag/></body>")
194    >>> serialize(elem)
195    '<body><tag /></body>'
196    >>> e = ET.Element("tag2")
197    >>> elem.append(e)
198    >>> serialize(elem)
199    '<body><tag /><tag2 /></body>'
200    >>> elem.remove(e)
201    >>> serialize(elem)
202    '<body><tag /></body>'
203    >>> elem.insert(0, e)
204    >>> serialize(elem)
205    '<body><tag2 /><tag /></body>'
206    >>> elem.remove(e)
207    >>> elem.extend([e])
208    >>> serialize(elem)
209    '<body><tag /><tag2 /></body>'
210    >>> elem.remove(e)
211
212    >>> element = ET.Element("tag", key="value")
213    >>> serialize(element) # 1
214    '<tag key="value" />'
215    >>> subelement = ET.Element("subtag")
216    >>> element.append(subelement)
217    >>> serialize(element) # 2
218    '<tag key="value"><subtag /></tag>'
219    >>> element.insert(0, subelement)
220    >>> serialize(element) # 3
221    '<tag key="value"><subtag /><subtag /></tag>'
222    >>> element.remove(subelement)
223    >>> serialize(element) # 4
224    '<tag key="value"><subtag /></tag>'
225    >>> element.remove(subelement)
226    >>> serialize(element) # 5
227    '<tag key="value" />'
228    >>> element.remove(subelement)
229    Traceback (most recent call last):
230    ValueError: list.remove(x): x not in list
231    >>> serialize(element) # 6
232    '<tag key="value" />'
233    >>> element[0:0] = [subelement, subelement, subelement]
234    >>> serialize(element[1])
235    '<subtag />'
236    >>> element[1:9] == [element[1], element[2]]
237    True
238    >>> element[:9:2] == [element[0], element[2]]
239    True
240    >>> del element[1:2]
241    >>> serialize(element)
242    '<tag key="value"><subtag /><subtag /></tag>'
243    """
244
245def cdata():
246    """
247    Test CDATA handling (etc).
248
249    >>> serialize(ET.XML("<tag>hello</tag>"))
250    '<tag>hello</tag>'
251    >>> serialize(ET.XML("<tag>&#104;&#101;&#108;&#108;&#111;</tag>"))
252    '<tag>hello</tag>'
253    >>> serialize(ET.XML("<tag><![CDATA[hello]]></tag>"))
254    '<tag>hello</tag>'
255    """
256
257# Only with Python implementation
258def simplefind():
259    """
260    Test find methods using the elementpath fallback.
261
262    >>> from xml.etree import ElementTree
263
264    >>> CurrentElementPath = ElementTree.ElementPath
265    >>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
266    >>> elem = ElementTree.XML(SAMPLE_XML)
267    >>> elem.find("tag").tag
268    'tag'
269    >>> ElementTree.ElementTree(elem).find("tag").tag
270    'tag'
271    >>> elem.findtext("tag")
272    'text'
273    >>> elem.findtext("tog")
274    >>> elem.findtext("tog", "default")
275    'default'
276    >>> ElementTree.ElementTree(elem).findtext("tag")
277    'text'
278    >>> summarize_list(elem.findall("tag"))
279    ['tag', 'tag']
280    >>> summarize_list(elem.findall(".//tag"))
281    ['tag', 'tag', 'tag']
282
283    Path syntax doesn't work in this case.
284
285    >>> elem.find("section/tag")
286    >>> elem.findtext("section/tag")
287    >>> summarize_list(elem.findall("section/tag"))
288    []
289
290    >>> ElementTree.ElementPath = CurrentElementPath
291    """
292
293def find():
294    """
295    Test find methods (including xpath syntax).
296
297    >>> elem = ET.XML(SAMPLE_XML)
298    >>> elem.find("tag").tag
299    'tag'
300    >>> ET.ElementTree(elem).find("tag").tag
301    'tag'
302    >>> elem.find("section/tag").tag
303    'tag'
304    >>> elem.find("./tag").tag
305    'tag'
306    >>> ET.ElementTree(elem).find("./tag").tag
307    'tag'
308    >>> ET.ElementTree(elem).find("/tag").tag
309    'tag'
310    >>> elem[2] = ET.XML(SAMPLE_SECTION)
311    >>> elem.find("section/nexttag").tag
312    'nexttag'
313    >>> ET.ElementTree(elem).find("section/tag").tag
314    'tag'
315    >>> ET.ElementTree(elem).find("tog")
316    >>> ET.ElementTree(elem).find("tog/foo")
317    >>> elem.findtext("tag")
318    'text'
319    >>> elem.findtext("section/nexttag")
320    ''
321    >>> elem.findtext("section/nexttag", "default")
322    ''
323    >>> elem.findtext("tog")
324    >>> elem.findtext("tog", "default")
325    'default'
326    >>> ET.ElementTree(elem).findtext("tag")
327    'text'
328    >>> ET.ElementTree(elem).findtext("tog/foo")
329    >>> ET.ElementTree(elem).findtext("tog/foo", "default")
330    'default'
331    >>> ET.ElementTree(elem).findtext("./tag")
332    'text'
333    >>> ET.ElementTree(elem).findtext("/tag")
334    'text'
335    >>> elem.findtext("section/tag")
336    'subtext'
337    >>> ET.ElementTree(elem).findtext("section/tag")
338    'subtext'
339    >>> summarize_list(elem.findall("."))
340    ['body']
341    >>> summarize_list(elem.findall("tag"))
342    ['tag', 'tag']
343    >>> summarize_list(elem.findall("tog"))
344    []
345    >>> summarize_list(elem.findall("tog/foo"))
346    []
347    >>> summarize_list(elem.findall("*"))
348    ['tag', 'tag', 'section']
349    >>> summarize_list(elem.findall(".//tag"))
350    ['tag', 'tag', 'tag', 'tag']
351    >>> summarize_list(elem.findall("section/tag"))
352    ['tag']
353    >>> summarize_list(elem.findall("section//tag"))
354    ['tag', 'tag']
355    >>> summarize_list(elem.findall("section/*"))
356    ['tag', 'nexttag', 'nextsection']
357    >>> summarize_list(elem.findall("section//*"))
358    ['tag', 'nexttag', 'nextsection', 'tag']
359    >>> summarize_list(elem.findall("section/.//*"))
360    ['tag', 'nexttag', 'nextsection', 'tag']
361    >>> summarize_list(elem.findall("*/*"))
362    ['tag', 'nexttag', 'nextsection']
363    >>> summarize_list(elem.findall("*//*"))
364    ['tag', 'nexttag', 'nextsection', 'tag']
365    >>> summarize_list(elem.findall("*/tag"))
366    ['tag']
367    >>> summarize_list(elem.findall("*/./tag"))
368    ['tag']
369    >>> summarize_list(elem.findall("./tag"))
370    ['tag', 'tag']
371    >>> summarize_list(elem.findall(".//tag"))
372    ['tag', 'tag', 'tag', 'tag']
373    >>> summarize_list(elem.findall("././tag"))
374    ['tag', 'tag']
375    >>> summarize_list(elem.findall(".//tag[@class]"))
376    ['tag', 'tag', 'tag']
377    >>> summarize_list(elem.findall(".//tag[@class='a']"))
378    ['tag']
379    >>> summarize_list(elem.findall(".//tag[@class='b']"))
380    ['tag', 'tag']
381    >>> summarize_list(elem.findall(".//tag[@id]"))
382    ['tag']
383    >>> summarize_list(elem.findall(".//section[tag]"))
384    ['section']
385    >>> summarize_list(elem.findall(".//section[element]"))
386    []
387    >>> summarize_list(elem.findall("../tag"))
388    []
389    >>> summarize_list(elem.findall("section/../tag"))
390    ['tag', 'tag']
391    >>> summarize_list(ET.ElementTree(elem).findall("./tag"))
392    ['tag', 'tag']
393
394    Following example is invalid in 1.2.
395    A leading '*' is assumed in 1.3.
396
397    >>> elem.findall("section//") == elem.findall("section//*")
398    True
399
400    ET's Path module handles this case incorrectly; this gives
401    a warning in 1.3, and the behaviour will be modified in 1.4.
402
403    >>> summarize_list(ET.ElementTree(elem).findall("/tag"))
404    ['tag', 'tag']
405
406    >>> elem = ET.XML(SAMPLE_XML_NS)
407    >>> summarize_list(elem.findall("tag"))
408    []
409    >>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
410    ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
411    >>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
412    ['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
413    """
414
415def file_init():
416    """
417    >>> import StringIO
418
419    >>> stringfile = StringIO.StringIO(SAMPLE_XML)
420    >>> tree = ET.ElementTree(file=stringfile)
421    >>> tree.find("tag").tag
422    'tag'
423    >>> tree.find("section/tag").tag
424    'tag'
425
426    >>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
427    >>> tree.find("element").tag
428    'element'
429    >>> tree.find("element/../empty-element").tag
430    'empty-element'
431    """
432
433def bad_find():
434    """
435    Check bad or unsupported path expressions.
436
437    >>> elem = ET.XML(SAMPLE_XML)
438    >>> elem.findall("/tag")
439    Traceback (most recent call last):
440    SyntaxError: cannot use absolute path on element
441    """
442
443def path_cache():
444    """
445    Check that the path cache behaves sanely.
446
447    >>> elem = ET.XML(SAMPLE_XML)
448    >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
449    >>> cache_len_10 = len(ET.ElementPath._cache)
450    >>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
451    >>> len(ET.ElementPath._cache) == cache_len_10
452    True
453    >>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
454    >>> len(ET.ElementPath._cache) > cache_len_10
455    True
456    >>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
457    >>> len(ET.ElementPath._cache) < 500
458    True
459    """
460
461def copy():
462    """
463    Test copy handling (etc).
464
465    >>> import copy
466    >>> e1 = ET.XML("<tag>hello<foo/></tag>")
467    >>> e2 = copy.copy(e1)
468    >>> e3 = copy.deepcopy(e1)
469    >>> e1.find("foo").tag = "bar"
470    >>> serialize(e1)
471    '<tag>hello<bar /></tag>'
472    >>> serialize(e2)
473    '<tag>hello<bar /></tag>'
474    >>> serialize(e3)
475    '<tag>hello<foo /></tag>'
476
477    """
478
479def attrib():
480    """
481    Test attribute handling.
482
483    >>> elem = ET.Element("tag")
484    >>> elem.get("key") # 1.1
485    >>> elem.get("key", "default") # 1.2
486    'default'
487    >>> elem.set("key", "value")
488    >>> elem.get("key") # 1.3
489    'value'
490
491    >>> elem = ET.Element("tag", key="value")
492    >>> elem.get("key") # 2.1
493    'value'
494    >>> elem.attrib # 2.2
495    {'key': 'value'}
496
497    >>> attrib = {"key": "value"}
498    >>> elem = ET.Element("tag", attrib)
499    >>> attrib.clear() # check for aliasing issues
500    >>> elem.get("key") # 3.1
501    'value'
502    >>> elem.attrib # 3.2
503    {'key': 'value'}
504
505    >>> attrib = {"key": "value"}
506    >>> elem = ET.Element("tag", **attrib)
507    >>> attrib.clear() # check for aliasing issues
508    >>> elem.get("key") # 4.1
509    'value'
510    >>> elem.attrib # 4.2
511    {'key': 'value'}
512
513    >>> elem = ET.Element("tag", {"key": "other"}, key="value")
514    >>> elem.get("key") # 5.1
515    'value'
516    >>> elem.attrib # 5.2
517    {'key': 'value'}
518
519    >>> elem = ET.Element('test')
520    >>> elem.text = "aa"
521    >>> elem.set('testa', 'testval')
522    >>> elem.set('testb', 'test2')
523    >>> ET.tostring(elem)
524    '<test testa="testval" testb="test2">aa</test>'
525    >>> sorted(elem.keys())
526    ['testa', 'testb']
527    >>> sorted(elem.items())
528    [('testa', 'testval'), ('testb', 'test2')]
529    >>> elem.attrib['testb']
530    'test2'
531    >>> elem.attrib['testb'] = 'test1'
532    >>> elem.attrib['testc'] = 'test2'
533    >>> ET.tostring(elem)
534    '<test testa="testval" testb="test1" testc="test2">aa</test>'
535    """
536
537def makeelement():
538    """
539    Test makeelement handling.
540
541    >>> elem = ET.Element("tag")
542    >>> attrib = {"key": "value"}
543    >>> subelem = elem.makeelement("subtag", attrib)
544    >>> if subelem.attrib is attrib:
545    ...     print "attrib aliasing"
546    >>> elem.append(subelem)
547    >>> serialize(elem)
548    '<tag><subtag key="value" /></tag>'
549
550    >>> elem.clear()
551    >>> serialize(elem)
552    '<tag />'
553    >>> elem.append(subelem)
554    >>> serialize(elem)
555    '<tag><subtag key="value" /></tag>'
556    >>> elem.extend([subelem, subelem])
557    >>> serialize(elem)
558    '<tag><subtag key="value" /><subtag key="value" /><subtag key="value" /></tag>'
559    >>> elem[:] = [subelem]
560    >>> serialize(elem)
561    '<tag><subtag key="value" /></tag>'
562    >>> elem[:] = tuple([subelem])
563    >>> serialize(elem)
564    '<tag><subtag key="value" /></tag>'
565
566    """
567
568def parsefile():
569    """
570    Test parsing from file.
571
572    >>> tree = ET.parse(SIMPLE_XMLFILE)
573    >>> normalize_crlf(tree)
574    >>> tree.write(sys.stdout)
575    <root>
576       <element key="value">text</element>
577       <element>text</element>tail
578       <empty-element />
579    </root>
580    >>> tree = ET.parse(SIMPLE_NS_XMLFILE)
581    >>> normalize_crlf(tree)
582    >>> tree.write(sys.stdout)
583    <ns0:root xmlns:ns0="namespace">
584       <ns0:element key="value">text</ns0:element>
585       <ns0:element>text</ns0:element>tail
586       <ns0:empty-element />
587    </ns0:root>
588
589    >>> with open(SIMPLE_XMLFILE) as f:
590    ...     data = f.read()
591
592    >>> parser = ET.XMLParser()
593    >>> parser.version  # doctest: +ELLIPSIS
594    'Expat ...'
595    >>> parser.feed(data)
596    >>> print serialize(parser.close())
597    <root>
598       <element key="value">text</element>
599       <element>text</element>tail
600       <empty-element />
601    </root>
602
603    >>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
604    >>> parser.feed(data)
605    >>> print serialize(parser.close())
606    <root>
607       <element key="value">text</element>
608       <element>text</element>tail
609       <empty-element />
610    </root>
611
612    >>> target = ET.TreeBuilder()
613    >>> parser = ET.XMLParser(target=target)
614    >>> parser.feed(data)
615    >>> print serialize(parser.close())
616    <root>
617       <element key="value">text</element>
618       <element>text</element>tail
619       <empty-element />
620    </root>
621    """
622
623def parseliteral():
624    """
625    >>> element = ET.XML("<html><body>text</body></html>")
626    >>> ET.ElementTree(element).write(sys.stdout)
627    <html><body>text</body></html>
628    >>> element = ET.fromstring("<html><body>text</body></html>")
629    >>> ET.ElementTree(element).write(sys.stdout)
630    <html><body>text</body></html>
631    >>> sequence = ["<html><body>", "text</bo", "dy></html>"]
632    >>> element = ET.fromstringlist(sequence)
633    >>> print ET.tostring(element)
634    <html><body>text</body></html>
635    >>> print "".join(ET.tostringlist(element))
636    <html><body>text</body></html>
637    >>> ET.tostring(element, "ascii")
638    "<?xml version='1.0' encoding='ascii'?>\\n<html><body>text</body></html>"
639    >>> _, ids = ET.XMLID("<html><body>text</body></html>")
640    >>> len(ids)
641    0
642    >>> _, ids = ET.XMLID("<html><body id='body'>text</body></html>")
643    >>> len(ids)
644    1
645    >>> ids["body"].tag
646    'body'
647    """
648
649def iterparse():
650    """
651    Test iterparse interface.
652
653    >>> iterparse = ET.iterparse
654
655    >>> context = iterparse(SIMPLE_XMLFILE)
656    >>> action, elem = next(context)
657    >>> print action, elem.tag
658    end element
659    >>> for action, elem in context:
660    ...   print action, elem.tag
661    end element
662    end empty-element
663    end root
664    >>> context.root.tag
665    'root'
666
667    >>> context = iterparse(SIMPLE_NS_XMLFILE)
668    >>> for action, elem in context:
669    ...   print action, elem.tag
670    end {namespace}element
671    end {namespace}element
672    end {namespace}empty-element
673    end {namespace}root
674
675    >>> events = ()
676    >>> context = iterparse(SIMPLE_XMLFILE, events)
677    >>> for action, elem in context:
678    ...   print action, elem.tag
679
680    >>> events = ()
681    >>> context = iterparse(SIMPLE_XMLFILE, events=events)
682    >>> for action, elem in context:
683    ...   print action, elem.tag
684
685    >>> events = ("start", "end")
686    >>> context = iterparse(SIMPLE_XMLFILE, events)
687    >>> for action, elem in context:
688    ...   print action, elem.tag
689    start root
690    start element
691    end element
692    start element
693    end element
694    start empty-element
695    end empty-element
696    end root
697
698    >>> events = ("start", "end", "start-ns", "end-ns")
699    >>> context = iterparse(SIMPLE_NS_XMLFILE, events)
700    >>> for action, elem in context:
701    ...   if action in ("start", "end"):
702    ...     print action, elem.tag
703    ...   else:
704    ...     print action, elem
705    start-ns ('', 'namespace')
706    start {namespace}root
707    start {namespace}element
708    end {namespace}element
709    start {namespace}element
710    end {namespace}element
711    start {namespace}empty-element
712    end {namespace}empty-element
713    end {namespace}root
714    end-ns None
715
716    >>> import StringIO
717
718    >>> events = ('start-ns', 'end-ns')
719    >>> context = ET.iterparse(StringIO.StringIO(r"<root xmlns=''/>"), events)
720    >>> for action, elem in context:
721    ...   print action, elem
722    start-ns ('', '')
723    end-ns None
724
725    >>> events = ("start", "end", "bogus")
726    >>> with open(SIMPLE_XMLFILE, "rb") as f:
727    ...     iterparse(f, events)
728    Traceback (most recent call last):
729    ValueError: unknown event 'bogus'
730
731    >>> source = StringIO.StringIO(
732    ...     "<?xml version='1.0' encoding='iso-8859-1'?>\\n"
733    ...     "<body xmlns='http://&#233;ffbot.org/ns'\\n"
734    ...     "      xmlns:cl\\xe9='http://effbot.org/ns'>text</body>\\n")
735    >>> events = ("start-ns",)
736    >>> context = iterparse(source, events)
737    >>> for action, elem in context:
738    ...     print action, elem
739    start-ns ('', u'http://\\xe9ffbot.org/ns')
740    start-ns (u'cl\\xe9', 'http://effbot.org/ns')
741
742    >>> source = StringIO.StringIO("<document />junk")
743    >>> try:
744    ...   for action, elem in iterparse(source):
745    ...     print action, elem.tag
746    ... except ET.ParseError, v:
747    ...   print v
748    end document
749    junk after document element: line 1, column 12
750    """
751
752def writefile():
753    """
754    >>> elem = ET.Element("tag")
755    >>> elem.text = "text"
756    >>> serialize(elem)
757    '<tag>text</tag>'
758    >>> ET.SubElement(elem, "subtag").text = "subtext"
759    >>> serialize(elem)
760    '<tag>text<subtag>subtext</subtag></tag>'
761
762    Test tag suppression
763    >>> elem.tag = None
764    >>> serialize(elem)
765    'text<subtag>subtext</subtag>'
766    >>> elem.insert(0, ET.Comment("comment"))
767    >>> serialize(elem)     # assumes 1.3
768    'text<!--comment--><subtag>subtext</subtag>'
769    >>> elem[0] = ET.PI("key", "value")
770    >>> serialize(elem)
771    'text<?key value?><subtag>subtext</subtag>'
772    """
773
774def custom_builder():
775    """
776    Test parser w. custom builder.
777
778    >>> with open(SIMPLE_XMLFILE) as f:
779    ...     data = f.read()
780    >>> class Builder:
781    ...     def start(self, tag, attrib):
782    ...         print "start", tag
783    ...     def end(self, tag):
784    ...         print "end", tag
785    ...     def data(self, text):
786    ...         pass
787    >>> builder = Builder()
788    >>> parser = ET.XMLParser(target=builder)
789    >>> parser.feed(data)
790    start root
791    start element
792    end element
793    start element
794    end element
795    start empty-element
796    end empty-element
797    end root
798
799    >>> with open(SIMPLE_NS_XMLFILE) as f:
800    ...     data = f.read()
801    >>> class Builder:
802    ...     def start(self, tag, attrib):
803    ...         print "start", tag
804    ...     def end(self, tag):
805    ...         print "end", tag
806    ...     def data(self, text):
807    ...         pass
808    ...     def pi(self, target, data):
809    ...         print "pi", target, repr(data)
810    ...     def comment(self, data):
811    ...         print "comment", repr(data)
812    >>> builder = Builder()
813    >>> parser = ET.XMLParser(target=builder)
814    >>> parser.feed(data)
815    pi pi 'data'
816    comment ' comment '
817    start {namespace}root
818    start {namespace}element
819    end {namespace}element
820    start {namespace}element
821    end {namespace}element
822    start {namespace}empty-element
823    end {namespace}empty-element
824    end {namespace}root
825
826    """
827
828def getchildren():
829    """
830    Test Element.getchildren()
831
832    >>> with open(SIMPLE_XMLFILE, "r") as f:
833    ...     tree = ET.parse(f)
834    >>> for elem in tree.getroot().iter():
835    ...     summarize_list(elem.getchildren())
836    ['element', 'element', 'empty-element']
837    []
838    []
839    []
840    >>> for elem in tree.getiterator():
841    ...     summarize_list(elem.getchildren())
842    ['element', 'element', 'empty-element']
843    []
844    []
845    []
846
847    >>> elem = ET.XML(SAMPLE_XML)
848    >>> len(elem.getchildren())
849    3
850    >>> len(elem[2].getchildren())
851    1
852    >>> elem[:] == elem.getchildren()
853    True
854    >>> child1 = elem[0]
855    >>> child2 = elem[2]
856    >>> del elem[1:2]
857    >>> len(elem.getchildren())
858    2
859    >>> child1 == elem[0]
860    True
861    >>> child2 == elem[1]
862    True
863    >>> elem[0:2] = [child2, child1]
864    >>> child2 == elem[0]
865    True
866    >>> child1 == elem[1]
867    True
868    >>> child1 == elem[0]
869    False
870    >>> elem.clear()
871    >>> elem.getchildren()
872    []
873    """
874
875def writestring():
876    """
877    >>> elem = ET.XML("<html><body>text</body></html>")
878    >>> ET.tostring(elem)
879    '<html><body>text</body></html>'
880    >>> elem = ET.fromstring("<html><body>text</body></html>")
881    >>> ET.tostring(elem)
882    '<html><body>text</body></html>'
883    """
884
885def check_encoding(encoding):
886    """
887    >>> check_encoding("ascii")
888    >>> check_encoding("us-ascii")
889    >>> check_encoding("iso-8859-1")
890    >>> check_encoding("iso-8859-15")
891    >>> check_encoding("cp437")
892    >>> check_encoding("mac-roman")
893    >>> check_encoding("gbk")
894    Traceback (most recent call last):
895    ValueError: multi-byte encodings are not supported
896    >>> check_encoding("cp037")
897    Traceback (most recent call last):
898    ParseError: unknown encoding: line 1, column 30
899    """
900    ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
901
902def encoding():
903    r"""
904    Test encoding issues.
905
906    >>> elem = ET.Element("tag")
907    >>> elem.text = u"abc"
908    >>> serialize(elem)
909    '<tag>abc</tag>'
910    >>> serialize(elem, encoding="utf-8")
911    '<tag>abc</tag>'
912    >>> serialize(elem, encoding="us-ascii")
913    '<tag>abc</tag>'
914    >>> serialize(elem, encoding="iso-8859-1")
915    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
916
917    >>> elem.text = "<&\"\'>"
918    >>> serialize(elem)
919    '<tag>&lt;&amp;"\'&gt;</tag>'
920    >>> serialize(elem, encoding="utf-8")
921    '<tag>&lt;&amp;"\'&gt;</tag>'
922    >>> serialize(elem, encoding="us-ascii") # cdata characters
923    '<tag>&lt;&amp;"\'&gt;</tag>'
924    >>> serialize(elem, encoding="iso-8859-1")
925    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
926
927    >>> elem.attrib["key"] = "<&\"\'>"
928    >>> elem.text = None
929    >>> serialize(elem)
930    '<tag key="&lt;&amp;&quot;\'&gt;" />'
931    >>> serialize(elem, encoding="utf-8")
932    '<tag key="&lt;&amp;&quot;\'&gt;" />'
933    >>> serialize(elem, encoding="us-ascii")
934    '<tag key="&lt;&amp;&quot;\'&gt;" />'
935    >>> serialize(elem, encoding="iso-8859-1")
936    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="&lt;&amp;&quot;\'&gt;" />'
937
938    >>> elem.text = u'\xe5\xf6\xf6<>'
939    >>> elem.attrib.clear()
940    >>> serialize(elem)
941    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
942    >>> serialize(elem, encoding="utf-8")
943    '<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
944    >>> serialize(elem, encoding="us-ascii")
945    '<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
946    >>> serialize(elem, encoding="iso-8859-1")
947    "<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
948
949    >>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
950    >>> elem.text = None
951    >>> serialize(elem)
952    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
953    >>> serialize(elem, encoding="utf-8")
954    '<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
955    >>> serialize(elem, encoding="us-ascii")
956    '<tag key="&#229;&#246;&#246;&lt;&gt;" />'
957    >>> serialize(elem, encoding="iso-8859-1")
958    '<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag key="\xe5\xf6\xf6&lt;&gt;" />'
959    """
960
961def methods():
962    r"""
963    Test serialization methods.
964
965    >>> e = ET.XML("<html><link/><script>1 &lt; 2</script></html>")
966    >>> e.tail = "\n"
967    >>> serialize(e)
968    '<html><link /><script>1 &lt; 2</script></html>\n'
969    >>> serialize(e, method=None)
970    '<html><link /><script>1 &lt; 2</script></html>\n'
971    >>> serialize(e, method="xml")
972    '<html><link /><script>1 &lt; 2</script></html>\n'
973    >>> serialize(e, method="html")
974    '<html><link><script>1 < 2</script></html>\n'
975    >>> serialize(e, method="text")
976    '1 < 2\n'
977    """
978
979def iterators():
980    """
981    Test iterators.
982
983    >>> e = ET.XML("<html><body>this is a <i>paragraph</i>.</body>..</html>")
984    >>> summarize_list(e.iter())
985    ['html', 'body', 'i']
986    >>> summarize_list(e.find("body").iter())
987    ['body', 'i']
988    >>> summarize(next(e.iter()))
989    'html'
990    >>> "".join(e.itertext())
991    'this is a paragraph...'
992    >>> "".join(e.find("body").itertext())
993    'this is a paragraph.'
994    >>> next(e.itertext())
995    'this is a '
996
997    Method iterparse should return an iterator. See bug 6472.
998
999    >>> sourcefile = serialize(e, to_string=False)
1000    >>> next(ET.iterparse(sourcefile))  # doctest: +ELLIPSIS
1001    ('end', <Element 'i' at 0x...>)
1002
1003    >>> tree = ET.ElementTree(None)
1004    >>> tree.iter()
1005    Traceback (most recent call last):
1006    AttributeError: 'NoneType' object has no attribute 'iter'
1007    """
1008
1009ENTITY_XML = """\
1010<!DOCTYPE points [
1011<!ENTITY % user-entities SYSTEM 'user-entities.xml'>
1012%user-entities;
1013]>
1014<document>&entity;</document>
1015"""
1016
1017def entity():
1018    """
1019    Test entity handling.
1020
1021    1) good entities
1022
1023    >>> e = ET.XML("<document title='&#x8230;'>test</document>")
1024    >>> serialize(e)
1025    '<document title="&#33328;">test</document>'
1026
1027    2) bad entities
1028
1029    >>> ET.XML("<document>&entity;</document>")
1030    Traceback (most recent call last):
1031    ParseError: undefined entity: line 1, column 10
1032
1033    >>> ET.XML(ENTITY_XML)
1034    Traceback (most recent call last):
1035    ParseError: undefined entity &entity;: line 5, column 10
1036
1037    3) custom entity
1038
1039    >>> parser = ET.XMLParser()
1040    >>> parser.entity["entity"] = "text"
1041    >>> parser.feed(ENTITY_XML)
1042    >>> root = parser.close()
1043    >>> serialize(root)
1044    '<document>text</document>'
1045    """
1046
1047def error(xml):
1048    """
1049
1050    Test error handling.
1051
1052    >>> issubclass(ET.ParseError, SyntaxError)
1053    True
1054    >>> error("foo").position
1055    (1, 0)
1056    >>> error("<tag>&foo;</tag>").position
1057    (1, 5)
1058    >>> error("foobar<").position
1059    (1, 6)
1060
1061    """
1062    try:
1063        ET.XML(xml)
1064    except ET.ParseError:
1065        return sys.exc_value
1066
1067def namespace():
1068    """
1069    Test namespace issues.
1070
1071    1) xml namespace
1072
1073    >>> elem = ET.XML("<tag xml:lang='en' />")
1074    >>> serialize(elem) # 1.1
1075    '<tag xml:lang="en" />'
1076
1077    2) other "well-known" namespaces
1078
1079    >>> elem = ET.XML("<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' />")
1080    >>> serialize(elem) # 2.1
1081    '<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />'
1082
1083    >>> elem = ET.XML("<html:html xmlns:html='http://www.w3.org/1999/xhtml' />")
1084    >>> serialize(elem) # 2.2
1085    '<html:html xmlns:html="http://www.w3.org/1999/xhtml" />'
1086
1087    >>> elem = ET.XML("<soap:Envelope xmlns:soap='http://schemas.xmlsoap.org/soap/envelope' />")
1088    >>> serialize(elem) # 2.3
1089    '<ns0:Envelope xmlns:ns0="http://schemas.xmlsoap.org/soap/envelope" />'
1090
1091    3) unknown namespaces
1092    >>> elem = ET.XML(SAMPLE_XML_NS)
1093    >>> print serialize(elem)
1094    <ns0:body xmlns:ns0="http://effbot.org/ns">
1095      <ns0:tag>text</ns0:tag>
1096      <ns0:tag />
1097      <ns0:section>
1098        <ns0:tag>subtext</ns0:tag>
1099      </ns0:section>
1100    </ns0:body>
1101    """
1102
1103def qname():
1104    """
1105    Test QName handling.
1106
1107    1) decorated tags
1108
1109    >>> elem = ET.Element("{uri}tag")
1110    >>> serialize(elem) # 1.1
1111    '<ns0:tag xmlns:ns0="uri" />'
1112    >>> elem = ET.Element(ET.QName("{uri}tag"))
1113    >>> serialize(elem) # 1.2
1114    '<ns0:tag xmlns:ns0="uri" />'
1115    >>> elem = ET.Element(ET.QName("uri", "tag"))
1116    >>> serialize(elem) # 1.3
1117    '<ns0:tag xmlns:ns0="uri" />'
1118    >>> elem = ET.Element(ET.QName("uri", "tag"))
1119    >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
1120    >>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
1121    >>> serialize(elem) # 1.4
1122    '<ns0:tag xmlns:ns0="uri"><ns0:tag1 /><ns0:tag2 /></ns0:tag>'
1123
1124    2) decorated attributes
1125
1126    >>> elem.clear()
1127    >>> elem.attrib["{uri}key"] = "value"
1128    >>> serialize(elem) # 2.1
1129    '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1130
1131    >>> elem.clear()
1132    >>> elem.attrib[ET.QName("{uri}key")] = "value"
1133    >>> serialize(elem) # 2.2
1134    '<ns0:tag xmlns:ns0="uri" ns0:key="value" />'
1135
1136    3) decorated values are not converted by default, but the
1137       QName wrapper can be used for values
1138
1139    >>> elem.clear()
1140    >>> elem.attrib["{uri}key"] = "{uri}value"
1141    >>> serialize(elem) # 3.1
1142    '<ns0:tag xmlns:ns0="uri" ns0:key="{uri}value" />'
1143
1144    >>> elem.clear()
1145    >>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
1146    >>> serialize(elem) # 3.2
1147    '<ns0:tag xmlns:ns0="uri" ns0:key="ns0:value" />'
1148
1149    >>> elem.clear()
1150    >>> subelem = ET.Element("tag")
1151    >>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
1152    >>> elem.append(subelem)
1153    >>> elem.append(subelem)
1154    >>> serialize(elem) # 3.3
1155    '<ns0:tag xmlns:ns0="uri" xmlns:ns1="uri1" xmlns:ns2="uri2"><tag ns1:key="ns2:value" /><tag ns1:key="ns2:value" /></ns0:tag>'
1156
1157    4) Direct QName tests
1158
1159    >>> str(ET.QName('ns', 'tag'))
1160    '{ns}tag'
1161    >>> str(ET.QName('{ns}tag'))
1162    '{ns}tag'
1163    >>> q1 = ET.QName('ns', 'tag')
1164    >>> q2 = ET.QName('ns', 'tag')
1165    >>> q1 == q2
1166    True
1167    >>> q2 = ET.QName('ns', 'other-tag')
1168    >>> q1 == q2
1169    False
1170    >>> q1 == 'ns:tag'
1171    False
1172    >>> q1 == '{ns}tag'
1173    True
1174    """
1175
1176def doctype_public():
1177    """
1178    Test PUBLIC doctype.
1179
1180    >>> elem = ET.XML('<!DOCTYPE html PUBLIC'
1181    ...   ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
1182    ...   ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
1183    ...   '<html>text</html>')
1184
1185    """
1186
1187def xpath_tokenizer(p):
1188    """
1189    Test the XPath tokenizer.
1190
1191    >>> # tests from the xml specification
1192    >>> xpath_tokenizer("*")
1193    ['*']
1194    >>> xpath_tokenizer("text()")
1195    ['text', '()']
1196    >>> xpath_tokenizer("@name")
1197    ['@', 'name']
1198    >>> xpath_tokenizer("@*")
1199    ['@', '*']
1200    >>> xpath_tokenizer("para[1]")
1201    ['para', '[', '1', ']']
1202    >>> xpath_tokenizer("para[last()]")
1203    ['para', '[', 'last', '()', ']']
1204    >>> xpath_tokenizer("*/para")
1205    ['*', '/', 'para']
1206    >>> xpath_tokenizer("/doc/chapter[5]/section[2]")
1207    ['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
1208    >>> xpath_tokenizer("chapter//para")
1209    ['chapter', '//', 'para']
1210    >>> xpath_tokenizer("//para")
1211    ['//', 'para']
1212    >>> xpath_tokenizer("//olist/item")
1213    ['//', 'olist', '/', 'item']
1214    >>> xpath_tokenizer(".")
1215    ['.']
1216    >>> xpath_tokenizer(".//para")
1217    ['.', '//', 'para']
1218    >>> xpath_tokenizer("..")
1219    ['..']
1220    >>> xpath_tokenizer("../@lang")
1221    ['..', '/', '@', 'lang']
1222    >>> xpath_tokenizer("chapter[title]")
1223    ['chapter', '[', 'title', ']']
1224    >>> xpath_tokenizer("employee[@secretary and @assistant]")
1225    ['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
1226
1227    >>> # additional tests
1228    >>> xpath_tokenizer("{http://spam}egg")
1229    ['{http://spam}egg']
1230    >>> xpath_tokenizer("./spam.egg")
1231    ['.', '/', 'spam.egg']
1232    >>> xpath_tokenizer(".//{http://spam}egg")
1233    ['.', '//', '{http://spam}egg']
1234    """
1235    from xml.etree import ElementPath
1236    out = []
1237    for op, tag in ElementPath.xpath_tokenizer(p):
1238        out.append(op or tag)
1239    return out
1240
1241def processinginstruction():
1242    """
1243    Test ProcessingInstruction directly
1244
1245    >>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
1246    '<?test instruction?>'
1247    >>> ET.tostring(ET.PI('test', 'instruction'))
1248    '<?test instruction?>'
1249
1250    Issue #2746
1251
1252    >>> ET.tostring(ET.PI('test', '<testing&>'))
1253    '<?test <testing&>?>'
1254    >>> ET.tostring(ET.PI('test', u'<testing&>\xe3'), 'latin1')
1255    "<?xml version='1.0' encoding='latin1'?>\\n<?test <testing&>\\xe3?>"
1256    """
1257
1258#
1259# xinclude tests (samples from appendix C of the xinclude specification)
1260
1261XINCLUDE = {}
1262
1263XINCLUDE["C1.xml"] = """\
1264<?xml version='1.0'?>
1265<document xmlns:xi="http://www.w3.org/2001/XInclude">
1266  <p>120 Mz is adequate for an average home user.</p>
1267  <xi:include href="disclaimer.xml"/>
1268</document>
1269"""
1270
1271XINCLUDE["disclaimer.xml"] = """\
1272<?xml version='1.0'?>
1273<disclaimer>
1274  <p>The opinions represented herein represent those of the individual
1275  and should not be interpreted as official policy endorsed by this
1276  organization.</p>
1277</disclaimer>
1278"""
1279
1280XINCLUDE["C2.xml"] = """\
1281<?xml version='1.0'?>
1282<document xmlns:xi="http://www.w3.org/2001/XInclude">
1283  <p>This document has been accessed
1284  <xi:include href="count.txt" parse="text"/> times.</p>
1285</document>
1286"""
1287
1288XINCLUDE["count.txt"] = "324387"
1289
1290XINCLUDE["C2b.xml"] = """\
1291<?xml version='1.0'?>
1292<document xmlns:xi="http://www.w3.org/2001/XInclude">
1293  <p>This document has been <em>accessed</em>
1294  <xi:include href="count.txt" parse="text"/> times.</p>
1295</document>
1296"""
1297
1298XINCLUDE["C3.xml"] = """\
1299<?xml version='1.0'?>
1300<document xmlns:xi="http://www.w3.org/2001/XInclude">
1301  <p>The following is the source of the "data.xml" resource:</p>
1302  <example><xi:include href="data.xml" parse="text"/></example>
1303</document>
1304"""
1305
1306XINCLUDE["data.xml"] = """\
1307<?xml version='1.0'?>
1308<data>
1309  <item><![CDATA[Brooks & Shields]]></item>
1310</data>
1311"""
1312
1313XINCLUDE["C5.xml"] = """\
1314<?xml version='1.0'?>
1315<div xmlns:xi="http://www.w3.org/2001/XInclude">
1316  <xi:include href="example.txt" parse="text">
1317    <xi:fallback>
1318      <xi:include href="fallback-example.txt" parse="text">
1319        <xi:fallback><a href="mailto:bob@example.org">Report error</a></xi:fallback>
1320      </xi:include>
1321    </xi:fallback>
1322  </xi:include>
1323</div>
1324"""
1325
1326XINCLUDE["default.xml"] = """\
1327<?xml version='1.0'?>
1328<document xmlns:xi="http://www.w3.org/2001/XInclude">
1329  <p>Example.</p>
1330  <xi:include href="{}"/>
1331</document>
1332""".format(cgi.escape(SIMPLE_XMLFILE, True))
1333
1334def xinclude_loader(href, parse="xml", encoding=None):
1335    try:
1336        data = XINCLUDE[href]
1337    except KeyError:
1338        raise IOError("resource not found")
1339    if parse == "xml":
1340        from xml.etree.ElementTree import XML
1341        return XML(data)
1342    return data
1343
1344def xinclude():
1345    r"""
1346    Basic inclusion example (XInclude C.1)
1347
1348    >>> from xml.etree import ElementTree as ET
1349    >>> from xml.etree import ElementInclude
1350
1351    >>> document = xinclude_loader("C1.xml")
1352    >>> ElementInclude.include(document, xinclude_loader)
1353    >>> print serialize(document) # C1
1354    <document>
1355      <p>120 Mz is adequate for an average home user.</p>
1356      <disclaimer>
1357      <p>The opinions represented herein represent those of the individual
1358      and should not be interpreted as official policy endorsed by this
1359      organization.</p>
1360    </disclaimer>
1361    </document>
1362
1363    Textual inclusion example (XInclude C.2)
1364
1365    >>> document = xinclude_loader("C2.xml")
1366    >>> ElementInclude.include(document, xinclude_loader)
1367    >>> print serialize(document) # C2
1368    <document>
1369      <p>This document has been accessed
1370      324387 times.</p>
1371    </document>
1372
1373    Textual inclusion after sibling element (based on modified XInclude C.2)
1374
1375    >>> document = xinclude_loader("C2b.xml")
1376    >>> ElementInclude.include(document, xinclude_loader)
1377    >>> print(serialize(document)) # C2b
1378    <document>
1379      <p>This document has been <em>accessed</em>
1380      324387 times.</p>
1381    </document>
1382
1383    Textual inclusion of XML example (XInclude C.3)
1384
1385    >>> document = xinclude_loader("C3.xml")
1386    >>> ElementInclude.include(document, xinclude_loader)
1387    >>> print serialize(document) # C3
1388    <document>
1389      <p>The following is the source of the "data.xml" resource:</p>
1390      <example>&lt;?xml version='1.0'?&gt;
1391    &lt;data&gt;
1392      &lt;item&gt;&lt;![CDATA[Brooks &amp; Shields]]&gt;&lt;/item&gt;
1393    &lt;/data&gt;
1394    </example>
1395    </document>
1396
1397    Fallback example (XInclude C.5)
1398    Note! Fallback support is not yet implemented
1399
1400    >>> document = xinclude_loader("C5.xml")
1401    >>> ElementInclude.include(document, xinclude_loader)
1402    Traceback (most recent call last):
1403    IOError: resource not found
1404    >>> # print serialize(document) # C5
1405    """
1406
1407def xinclude_default():
1408    """
1409    >>> from xml.etree import ElementInclude
1410
1411    >>> document = xinclude_loader("default.xml")
1412    >>> ElementInclude.include(document)
1413    >>> print serialize(document) # default
1414    <document>
1415      <p>Example.</p>
1416      <root>
1417       <element key="value">text</element>
1418       <element>text</element>tail
1419       <empty-element />
1420    </root>
1421    </document>
1422    """
1423
1424#
1425# badly formatted xi:include tags
1426
1427XINCLUDE_BAD = {}
1428
1429XINCLUDE_BAD["B1.xml"] = """\
1430<?xml version='1.0'?>
1431<document xmlns:xi="http://www.w3.org/2001/XInclude">
1432  <p>120 Mz is adequate for an average home user.</p>
1433  <xi:include href="disclaimer.xml" parse="BAD_TYPE"/>
1434</document>
1435"""
1436
1437XINCLUDE_BAD["B2.xml"] = """\
1438<?xml version='1.0'?>
1439<div xmlns:xi="http://www.w3.org/2001/XInclude">
1440    <xi:fallback></xi:fallback>
1441</div>
1442"""
1443
1444def xinclude_failures():
1445    r"""
1446    Test failure to locate included XML file.
1447
1448    >>> from xml.etree import ElementInclude
1449
1450    >>> def none_loader(href, parser, encoding=None):
1451    ...     return None
1452
1453    >>> document = ET.XML(XINCLUDE["C1.xml"])
1454    >>> ElementInclude.include(document, loader=none_loader)
1455    Traceback (most recent call last):
1456    FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
1457
1458    Test failure to locate included text file.
1459
1460    >>> document = ET.XML(XINCLUDE["C2.xml"])
1461    >>> ElementInclude.include(document, loader=none_loader)
1462    Traceback (most recent call last):
1463    FatalIncludeError: cannot load 'count.txt' as 'text'
1464
1465    Test bad parse type.
1466
1467    >>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
1468    >>> ElementInclude.include(document, loader=none_loader)
1469    Traceback (most recent call last):
1470    FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
1471
1472    Test xi:fallback outside xi:include.
1473
1474    >>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
1475    >>> ElementInclude.include(document, loader=none_loader)
1476    Traceback (most recent call last):
1477    FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
1478    """
1479
1480# --------------------------------------------------------------------
1481# reported bugs
1482
1483def bug_xmltoolkit21():
1484    """
1485
1486    marshaller gives obscure errors for non-string values
1487
1488    >>> elem = ET.Element(123)
1489    >>> serialize(elem) # tag
1490    Traceback (most recent call last):
1491    TypeError: cannot serialize 123 (type int)
1492    >>> elem = ET.Element("elem")
1493    >>> elem.text = 123
1494    >>> serialize(elem) # text
1495    Traceback (most recent call last):
1496    TypeError: cannot serialize 123 (type int)
1497    >>> elem = ET.Element("elem")
1498    >>> elem.tail = 123
1499    >>> serialize(elem) # tail
1500    Traceback (most recent call last):
1501    TypeError: cannot serialize 123 (type int)
1502    >>> elem = ET.Element("elem")
1503    >>> elem.set(123, "123")
1504    >>> serialize(elem) # attribute key
1505    Traceback (most recent call last):
1506    TypeError: cannot serialize 123 (type int)
1507    >>> elem = ET.Element("elem")
1508    >>> elem.set("123", 123)
1509    >>> serialize(elem) # attribute value
1510    Traceback (most recent call last):
1511    TypeError: cannot serialize 123 (type int)
1512
1513    """
1514
1515def bug_xmltoolkit25():
1516    """
1517
1518    typo in ElementTree.findtext
1519
1520    >>> elem = ET.XML(SAMPLE_XML)
1521    >>> tree = ET.ElementTree(elem)
1522    >>> tree.findtext("tag")
1523    'text'
1524    >>> tree.findtext("section/tag")
1525    'subtext'
1526
1527    """
1528
1529def bug_xmltoolkit28():
1530    """
1531
1532    .//tag causes exceptions
1533
1534    >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1535    >>> summarize_list(tree.findall(".//thead"))
1536    []
1537    >>> summarize_list(tree.findall(".//tbody"))
1538    ['tbody']
1539
1540    """
1541
1542def bug_xmltoolkitX1():
1543    """
1544
1545    dump() doesn't flush the output buffer
1546
1547    >>> tree = ET.XML("<doc><table><tbody/></table></doc>")
1548    >>> ET.dump(tree); sys.stdout.write("tail")
1549    <doc><table><tbody /></table></doc>
1550    tail
1551
1552    """
1553
1554def bug_xmltoolkit39():
1555    """
1556
1557    non-ascii element and attribute names doesn't work
1558
1559    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g />")
1560    >>> ET.tostring(tree, "utf-8")
1561    '<t\\xc3\\xa4g />'
1562
1563    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><tag \xe4ttr='v&#228;lue' />")
1564    >>> tree.attrib
1565    {u'\\xe4ttr': u'v\\xe4lue'}
1566    >>> ET.tostring(tree, "utf-8")
1567    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1568
1569    >>> tree = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><t\xe4g>text</t\xe4g>")
1570    >>> ET.tostring(tree, "utf-8")
1571    '<t\\xc3\\xa4g>text</t\\xc3\\xa4g>'
1572
1573    >>> tree = ET.Element(u"t\u00e4g")
1574    >>> ET.tostring(tree, "utf-8")
1575    '<t\\xc3\\xa4g />'
1576
1577    >>> tree = ET.Element("tag")
1578    >>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
1579    >>> ET.tostring(tree, "utf-8")
1580    '<tag \\xc3\\xa4ttr="v\\xc3\\xa4lue" />'
1581
1582    """
1583
1584def bug_xmltoolkit54():
1585    """
1586
1587    problems handling internally defined entities
1588
1589    >>> e = ET.XML("<!DOCTYPE doc [<!ENTITY ldots '&#x8230;'>]><doc>&ldots;</doc>")
1590    >>> serialize(e)
1591    '<doc>&#33328;</doc>'
1592
1593    """
1594
1595def bug_xmltoolkit55():
1596    """
1597
1598    make sure we're reporting the first error, not the last
1599
1600    >>> e = ET.XML("<!DOCTYPE doc SYSTEM 'doc.dtd'><doc>&ldots;&ndots;&rdots;</doc>")
1601    Traceback (most recent call last):
1602    ParseError: undefined entity &ldots;: line 1, column 36
1603
1604    """
1605
1606class ExceptionFile:
1607    def read(self, x):
1608        raise IOError
1609
1610def xmltoolkit60():
1611    """
1612
1613    Handle crash in stream source.
1614    >>> tree = ET.parse(ExceptionFile())
1615    Traceback (most recent call last):
1616    IOError
1617
1618    """
1619
1620XMLTOOLKIT62_DOC = """<?xml version="1.0" encoding="UTF-8"?>
1621<!DOCTYPE patent-application-publication SYSTEM "pap-v15-2001-01-31.dtd" []>
1622<patent-application-publication>
1623<subdoc-abstract>
1624<paragraph id="A-0001" lvl="0">A new cultivar of Begonia plant named &lsquo;BCT9801BEG&rsquo;.</paragraph>
1625</subdoc-abstract>
1626</patent-application-publication>"""
1627
1628
1629def xmltoolkit62():
1630    """
1631
1632    Don't crash when using custom entities.
1633
1634    >>> xmltoolkit62()
1635    u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
1636
1637    """
1638    ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
1639    parser = ET.XMLTreeBuilder()
1640    parser.entity.update(ENTITIES)
1641    parser.feed(XMLTOOLKIT62_DOC)
1642    t = parser.close()
1643    return t.find('.//paragraph').text
1644
1645def xmltoolkit63():
1646    """
1647
1648    Check reference leak.
1649    >>> xmltoolkit63()
1650    >>> count = sys.getrefcount(None)
1651    >>> for i in range(1000):
1652    ...     xmltoolkit63()
1653    >>> sys.getrefcount(None) - count
1654    0
1655
1656    """
1657    tree = ET.TreeBuilder()
1658    tree.start("tag", {})
1659    tree.data("text")
1660    tree.end("tag")
1661
1662# --------------------------------------------------------------------
1663
1664
1665def bug_200708_newline():
1666    r"""
1667
1668    Preserve newlines in attributes.
1669
1670    >>> e = ET.Element('SomeTag', text="def _f():\n  return 3\n")
1671    >>> ET.tostring(e)
1672    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1673    >>> ET.XML(ET.tostring(e)).get("text")
1674    'def _f():\n  return 3\n'
1675    >>> ET.tostring(ET.XML(ET.tostring(e)))
1676    '<SomeTag text="def _f():&#10;  return 3&#10;" />'
1677
1678    """
1679
1680def bug_200708_close():
1681    """
1682
1683    Test default builder.
1684    >>> parser = ET.XMLParser() # default
1685    >>> parser.feed("<element>some text</element>")
1686    >>> summarize(parser.close())
1687    'element'
1688
1689    Test custom builder.
1690    >>> class EchoTarget:
1691    ...     def close(self):
1692    ...         return ET.Element("element") # simulate root
1693    >>> parser = ET.XMLParser(EchoTarget())
1694    >>> parser.feed("<element>some text</element>")
1695    >>> summarize(parser.close())
1696    'element'
1697
1698    """
1699
1700def bug_200709_default_namespace():
1701    """
1702
1703    >>> e = ET.Element("{default}elem")
1704    >>> s = ET.SubElement(e, "{default}elem")
1705    >>> serialize(e, default_namespace="default") # 1
1706    '<elem xmlns="default"><elem /></elem>'
1707
1708    >>> e = ET.Element("{default}elem")
1709    >>> s = ET.SubElement(e, "{default}elem")
1710    >>> s = ET.SubElement(e, "{not-default}elem")
1711    >>> serialize(e, default_namespace="default") # 2
1712    '<elem xmlns="default" xmlns:ns1="not-default"><elem /><ns1:elem /></elem>'
1713
1714    >>> e = ET.Element("{default}elem")
1715    >>> s = ET.SubElement(e, "{default}elem")
1716    >>> s = ET.SubElement(e, "elem") # unprefixed name
1717    >>> serialize(e, default_namespace="default") # 3
1718    Traceback (most recent call last):
1719    ValueError: cannot use non-qualified names with default_namespace option
1720
1721    """
1722
1723def bug_200709_register_namespace():
1724    """
1725
1726    >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1727    '<ns0:title xmlns:ns0="http://namespace.invalid/does/not/exist/" />'
1728    >>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
1729    >>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
1730    '<foo:title xmlns:foo="http://namespace.invalid/does/not/exist/" />'
1731
1732    And the Dublin Core namespace is in the default list:
1733
1734    >>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
1735    '<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" />'
1736
1737    """
1738
1739def bug_200709_element_comment():
1740    """
1741
1742    Not sure if this can be fixed, really (since the serializer needs
1743    ET.Comment, not cET.comment).
1744
1745    >>> a = ET.Element('a')
1746    >>> a.append(ET.Comment('foo'))
1747    >>> a[0].tag == ET.Comment
1748    True
1749
1750    >>> a = ET.Element('a')
1751    >>> a.append(ET.PI('foo'))
1752    >>> a[0].tag == ET.PI
1753    True
1754
1755    """
1756
1757def bug_200709_element_insert():
1758    """
1759
1760    >>> a = ET.Element('a')
1761    >>> b = ET.SubElement(a, 'b')
1762    >>> c = ET.SubElement(a, 'c')
1763    >>> d = ET.Element('d')
1764    >>> a.insert(0, d)
1765    >>> summarize_list(a)
1766    ['d', 'b', 'c']
1767    >>> a.insert(-1, d)
1768    >>> summarize_list(a)
1769    ['d', 'b', 'd', 'c']
1770
1771    """
1772
1773def bug_200709_iter_comment():
1774    """
1775
1776    >>> a = ET.Element('a')
1777    >>> b = ET.SubElement(a, 'b')
1778    >>> comment_b = ET.Comment("TEST-b")
1779    >>> b.append(comment_b)
1780    >>> summarize_list(a.iter(ET.Comment))
1781    ['<Comment>']
1782
1783    """
1784
1785def bug_18347():
1786    """
1787
1788    >>> e = ET.XML('<html><CamelCase>text</CamelCase></html>')
1789    >>> serialize(e)
1790    '<html><CamelCase>text</CamelCase></html>'
1791    >>> serialize(e, method="html")
1792    '<html><CamelCase>text</CamelCase></html>'
1793    """
1794
1795# --------------------------------------------------------------------
1796# reported on bugs.python.org
1797
1798def bug_1534630():
1799    """
1800
1801    >>> bob = ET.TreeBuilder()
1802    >>> e = bob.data("data")
1803    >>> e = bob.start("tag", {})
1804    >>> e = bob.end("tag")
1805    >>> e = bob.close()
1806    >>> serialize(e)
1807    '<tag />'
1808
1809    """
1810
1811def check_issue6233():
1812    """
1813
1814    >>> e = ET.XML("<?xml version='1.0' encoding='utf-8'?><body>t\\xc3\\xa3g</body>")
1815    >>> ET.tostring(e, 'ascii')
1816    "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1817    >>> e = ET.XML("<?xml version='1.0' encoding='iso-8859-1'?><body>t\\xe3g</body>")
1818    >>> ET.tostring(e, 'ascii')
1819    "<?xml version='1.0' encoding='ascii'?>\\n<body>t&#227;g</body>"
1820
1821    """
1822
1823def check_issue3151():
1824    """
1825
1826    >>> e = ET.XML('<prefix:localname xmlns:prefix="${stuff}"/>')
1827    >>> e.tag
1828    '{${stuff}}localname'
1829    >>> t = ET.ElementTree(e)
1830    >>> ET.tostring(e)
1831    '<ns0:localname xmlns:ns0="${stuff}" />'
1832
1833    """
1834
1835def check_issue6565():
1836    """
1837
1838    >>> elem = ET.XML("<body><tag/></body>")
1839    >>> summarize_list(elem)
1840    ['tag']
1841    >>> newelem = ET.XML(SAMPLE_XML)
1842    >>> elem[:] = newelem[:]
1843    >>> summarize_list(elem)
1844    ['tag', 'tag', 'section']
1845
1846    """
1847
1848def check_html_empty_elems_serialization(self):
1849    # issue 15970
1850    # from http://www.w3.org/TR/html401/index/elements.html
1851    """
1852
1853    >>> empty_elems = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
1854    ...                'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']
1855    >>> elems = ''.join('<%s />' % elem for elem in empty_elems)
1856    >>> serialize(ET.XML('<html>%s</html>' % elems), method='html')
1857    '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>'
1858    >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html')
1859    '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>'
1860    >>> elems = ''.join('<%s></%s>' % (elem, elem) for elem in empty_elems)
1861    >>> serialize(ET.XML('<html>%s</html>' % elems), method='html')
1862    '<html><AREA><BASE><BASEFONT><BR><COL><FRAME><HR><IMG><INPUT><ISINDEX><LINK><META><PARAM></html>'
1863    >>> serialize(ET.XML('<html>%s</html>' % elems.lower()), method='html')
1864    '<html><area><base><basefont><br><col><frame><hr><img><input><isindex><link><meta><param></html>'
1865
1866    """
1867
1868# --------------------------------------------------------------------
1869
1870
1871class CleanContext(object):
1872    """Provide default namespace mapping and path cache."""
1873    checkwarnings = None
1874
1875    def __init__(self, quiet=False):
1876        if sys.flags.optimize >= 2:
1877            # under -OO, doctests cannot be run and therefore not all warnings
1878            # will be emitted
1879            quiet = True
1880        deprecations = (
1881            # Search behaviour is broken if search path starts with "/".
1882            ("This search is broken in 1.3 and earlier, and will be fixed "
1883             "in a future version.  If you rely on the current behaviour, "
1884             "change it to '.+'", FutureWarning),
1885            # Element.getchildren() and Element.getiterator() are deprecated.
1886            ("This method will be removed in future versions.  "
1887             "Use .+ instead.", DeprecationWarning),
1888            ("This method will be removed in future versions.  "
1889             "Use .+ instead.", PendingDeprecationWarning),
1890            # XMLParser.doctype() is deprecated.
1891            ("This method of XMLParser is deprecated.  Define doctype.. "
1892             "method on the TreeBuilder target.", DeprecationWarning))
1893        self.checkwarnings = test_support.check_warnings(*deprecations,
1894                                                         quiet=quiet)
1895
1896    def __enter__(self):
1897        from xml.etree import ElementTree
1898        self._nsmap = ElementTree._namespace_map
1899        self._path_cache = ElementTree.ElementPath._cache
1900        # Copy the default namespace mapping
1901        ElementTree._namespace_map = self._nsmap.copy()
1902        # Copy the path cache (should be empty)
1903        ElementTree.ElementPath._cache = self._path_cache.copy()
1904        self.checkwarnings.__enter__()
1905
1906    def __exit__(self, *args):
1907        from xml.etree import ElementTree
1908        # Restore mapping and path cache
1909        ElementTree._namespace_map = self._nsmap
1910        ElementTree.ElementPath._cache = self._path_cache
1911        self.checkwarnings.__exit__(*args)
1912
1913
1914def test_main(module_name='xml.etree.ElementTree'):
1915    from test import test_xml_etree
1916
1917    use_py_module = (module_name == 'xml.etree.ElementTree')
1918
1919    # The same doctests are used for both the Python and the C implementations
1920    assert test_xml_etree.ET.__name__ == module_name
1921
1922    # XXX the C module should give the same warnings as the Python module
1923    with CleanContext(quiet=not use_py_module):
1924        test_support.run_doctest(test_xml_etree, verbosity=True)
1925
1926    # The module should not be changed by the tests
1927    assert test_xml_etree.ET.__name__ == module_name
1928
1929if __name__ == '__main__':
1930    test_main()
1931