• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# XXX TypeErrors on calling handlers, or on bad return values from a
2# handler, are obscure and unhelpful.
3
4from io import BytesIO
5import os
6import platform
7import sys
8import sysconfig
9import unittest
10import traceback
11
12from xml.parsers import expat
13from xml.parsers.expat import errors
14
15from test.support import sortdict
16
17
18class SetAttributeTest(unittest.TestCase):
19    def setUp(self):
20        self.parser = expat.ParserCreate(namespace_separator='!')
21
22    def test_buffer_text(self):
23        self.assertIs(self.parser.buffer_text, False)
24        for x in 0, 1, 2, 0:
25            self.parser.buffer_text = x
26            self.assertIs(self.parser.buffer_text, bool(x))
27
28    def test_namespace_prefixes(self):
29        self.assertIs(self.parser.namespace_prefixes, False)
30        for x in 0, 1, 2, 0:
31            self.parser.namespace_prefixes = x
32            self.assertIs(self.parser.namespace_prefixes, bool(x))
33
34    def test_ordered_attributes(self):
35        self.assertIs(self.parser.ordered_attributes, False)
36        for x in 0, 1, 2, 0:
37            self.parser.ordered_attributes = x
38            self.assertIs(self.parser.ordered_attributes, bool(x))
39
40    def test_specified_attributes(self):
41        self.assertIs(self.parser.specified_attributes, False)
42        for x in 0, 1, 2, 0:
43            self.parser.specified_attributes = x
44            self.assertIs(self.parser.specified_attributes, bool(x))
45
46    def test_invalid_attributes(self):
47        with self.assertRaises(AttributeError):
48            self.parser.returns_unicode = 1
49        with self.assertRaises(AttributeError):
50            self.parser.returns_unicode
51
52        # Issue #25019
53        self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0)
54        self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0)
55        self.assertRaises(TypeError, getattr, self.parser, range(0xF))
56
57
58data = b'''\
59<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
60<?xml-stylesheet href="stylesheet.css"?>
61<!-- comment data -->
62<!DOCTYPE quotations SYSTEM "quotations.dtd" [
63<!ELEMENT root ANY>
64<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED>
65<!NOTATION notation SYSTEM "notation.jpeg">
66<!ENTITY acirc "&#226;">
67<!ENTITY external_entity SYSTEM "entity.file">
68<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation>
69%unparsed_entity;
70]>
71
72<root attr1="value1" attr2="value2&#8000;">
73<myns:subelement xmlns:myns="http://www.python.org/namespace">
74     Contents of subelements
75</myns:subelement>
76<sub2><![CDATA[contents of CDATA section]]></sub2>
77&external_entity;
78&skipped_entity;
79\xb5
80</root>
81'''
82
83
84# Produce UTF-8 output
85class ParseTest(unittest.TestCase):
86    class Outputter:
87        def __init__(self):
88            self.out = []
89
90        def StartElementHandler(self, name, attrs):
91            self.out.append('Start element: ' + repr(name) + ' ' +
92                            sortdict(attrs))
93
94        def EndElementHandler(self, name):
95            self.out.append('End element: ' + repr(name))
96
97        def CharacterDataHandler(self, data):
98            data = data.strip()
99            if data:
100                self.out.append('Character data: ' + repr(data))
101
102        def ProcessingInstructionHandler(self, target, data):
103            self.out.append('PI: ' + repr(target) + ' ' + repr(data))
104
105        def StartNamespaceDeclHandler(self, prefix, uri):
106            self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri))
107
108        def EndNamespaceDeclHandler(self, prefix):
109            self.out.append('End of NS decl: ' + repr(prefix))
110
111        def StartCdataSectionHandler(self):
112            self.out.append('Start of CDATA section')
113
114        def EndCdataSectionHandler(self):
115            self.out.append('End of CDATA section')
116
117        def CommentHandler(self, text):
118            self.out.append('Comment: ' + repr(text))
119
120        def NotationDeclHandler(self, *args):
121            name, base, sysid, pubid = args
122            self.out.append('Notation declared: %s' %(args,))
123
124        def UnparsedEntityDeclHandler(self, *args):
125            entityName, base, systemId, publicId, notationName = args
126            self.out.append('Unparsed entity decl: %s' %(args,))
127
128        def NotStandaloneHandler(self):
129            self.out.append('Not standalone')
130            return 1
131
132        def ExternalEntityRefHandler(self, *args):
133            context, base, sysId, pubId = args
134            self.out.append('External entity ref: %s' %(args[1:],))
135            return 1
136
137        def StartDoctypeDeclHandler(self, *args):
138            self.out.append(('Start doctype', args))
139            return 1
140
141        def EndDoctypeDeclHandler(self):
142            self.out.append("End doctype")
143            return 1
144
145        def EntityDeclHandler(self, *args):
146            self.out.append(('Entity declaration', args))
147            return 1
148
149        def XmlDeclHandler(self, *args):
150            self.out.append(('XML declaration', args))
151            return 1
152
153        def ElementDeclHandler(self, *args):
154            self.out.append(('Element declaration', args))
155            return 1
156
157        def AttlistDeclHandler(self, *args):
158            self.out.append(('Attribute list declaration', args))
159            return 1
160
161        def SkippedEntityHandler(self, *args):
162            self.out.append(("Skipped entity", args))
163            return 1
164
165        def DefaultHandler(self, userData):
166            pass
167
168        def DefaultHandlerExpand(self, userData):
169            pass
170
171    handler_names = [
172        'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler',
173        'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler',
174        'NotationDeclHandler', 'StartNamespaceDeclHandler',
175        'EndNamespaceDeclHandler', 'CommentHandler',
176        'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler',
177        'DefaultHandlerExpand', 'NotStandaloneHandler',
178        'ExternalEntityRefHandler', 'StartDoctypeDeclHandler',
179        'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler',
180        'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler',
181        ]
182
183    def _hookup_callbacks(self, parser, handler):
184        """
185        Set each of the callbacks defined on handler and named in
186        self.handler_names on the given parser.
187        """
188        for name in self.handler_names:
189            setattr(parser, name, getattr(handler, name))
190
191    def _verify_parse_output(self, operations):
192        expected_operations = [
193            ('XML declaration', ('1.0', 'iso-8859-1', 0)),
194            'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'',
195            "Comment: ' comment data '",
196            "Not standalone",
197            ("Start doctype", ('quotations', 'quotations.dtd', None, 1)),
198            ('Element declaration', ('root', (2, 0, None, ()))),
199            ('Attribute list declaration', ('root', 'attr1', 'CDATA', None,
200                1)),
201            ('Attribute list declaration', ('root', 'attr2', 'CDATA', None,
202                0)),
203            "Notation declared: ('notation', None, 'notation.jpeg', None)",
204            ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)),
205            ('Entity declaration', ('external_entity', 0, None, None,
206                'entity.file', None, None)),
207            "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')",
208            "Not standalone",
209            "End doctype",
210            "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}",
211            "NS decl: 'myns' 'http://www.python.org/namespace'",
212            "Start element: 'http://www.python.org/namespace!subelement' {}",
213            "Character data: 'Contents of subelements'",
214            "End element: 'http://www.python.org/namespace!subelement'",
215            "End of NS decl: 'myns'",
216            "Start element: 'sub2' {}",
217            'Start of CDATA section',
218            "Character data: 'contents of CDATA section'",
219            'End of CDATA section',
220            "End element: 'sub2'",
221            "External entity ref: (None, 'entity.file', None)",
222            ('Skipped entity', ('skipped_entity', 0)),
223            "Character data: '\xb5'",
224            "End element: 'root'",
225        ]
226        for operation, expected_operation in zip(operations, expected_operations):
227            self.assertEqual(operation, expected_operation)
228
229    def test_parse_bytes(self):
230        out = self.Outputter()
231        parser = expat.ParserCreate(namespace_separator='!')
232        self._hookup_callbacks(parser, out)
233
234        parser.Parse(data, True)
235
236        operations = out.out
237        self._verify_parse_output(operations)
238        # Issue #6697.
239        self.assertRaises(AttributeError, getattr, parser, '\uD800')
240
241    def test_parse_str(self):
242        out = self.Outputter()
243        parser = expat.ParserCreate(namespace_separator='!')
244        self._hookup_callbacks(parser, out)
245
246        parser.Parse(data.decode('iso-8859-1'), True)
247
248        operations = out.out
249        self._verify_parse_output(operations)
250
251    def test_parse_file(self):
252        # Try parsing a file
253        out = self.Outputter()
254        parser = expat.ParserCreate(namespace_separator='!')
255        self._hookup_callbacks(parser, out)
256        file = BytesIO(data)
257
258        parser.ParseFile(file)
259
260        operations = out.out
261        self._verify_parse_output(operations)
262
263    def test_parse_again(self):
264        parser = expat.ParserCreate()
265        file = BytesIO(data)
266        parser.ParseFile(file)
267        # Issue 6676: ensure a meaningful exception is raised when attempting
268        # to parse more than one XML document per xmlparser instance,
269        # a limitation of the Expat library.
270        with self.assertRaises(expat.error) as cm:
271            parser.ParseFile(file)
272        self.assertEqual(expat.ErrorString(cm.exception.code),
273                          expat.errors.XML_ERROR_FINISHED)
274
275class NamespaceSeparatorTest(unittest.TestCase):
276    def test_legal(self):
277        # Tests that make sure we get errors when the namespace_separator value
278        # is illegal, and that we don't for good values:
279        expat.ParserCreate()
280        expat.ParserCreate(namespace_separator=None)
281        expat.ParserCreate(namespace_separator=' ')
282
283    def test_illegal(self):
284        try:
285            expat.ParserCreate(namespace_separator=42)
286            self.fail()
287        except TypeError as e:
288            self.assertEqual(str(e),
289                "ParserCreate() argument 'namespace_separator' must be str or None, not int")
290
291        try:
292            expat.ParserCreate(namespace_separator='too long')
293            self.fail()
294        except ValueError as e:
295            self.assertEqual(str(e),
296                'namespace_separator must be at most one character, omitted, or None')
297
298    def test_zero_length(self):
299        # ParserCreate() needs to accept a namespace_separator of zero length
300        # to satisfy the requirements of RDF applications that are required
301        # to simply glue together the namespace URI and the localname.  Though
302        # considered a wart of the RDF specifications, it needs to be supported.
303        #
304        # See XML-SIG mailing list thread starting with
305        # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
306        #
307        expat.ParserCreate(namespace_separator='') # too short
308
309
310class InterningTest(unittest.TestCase):
311    def test(self):
312        # Test the interning machinery.
313        p = expat.ParserCreate()
314        L = []
315        def collector(name, *args):
316            L.append(name)
317        p.StartElementHandler = collector
318        p.EndElementHandler = collector
319        p.Parse(b"<e> <e/> <e></e> </e>", True)
320        tag = L[0]
321        self.assertEqual(len(L), 6)
322        for entry in L:
323            # L should have the same string repeated over and over.
324            self.assertTrue(tag is entry)
325
326    def test_issue9402(self):
327        # create an ExternalEntityParserCreate with buffer text
328        class ExternalOutputter:
329            def __init__(self, parser):
330                self.parser = parser
331                self.parser_result = None
332
333            def ExternalEntityRefHandler(self, context, base, sysId, pubId):
334                external_parser = self.parser.ExternalEntityParserCreate("")
335                self.parser_result = external_parser.Parse(b"", True)
336                return 1
337
338        parser = expat.ParserCreate(namespace_separator='!')
339        parser.buffer_text = 1
340        out = ExternalOutputter(parser)
341        parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler
342        parser.Parse(data, True)
343        self.assertEqual(out.parser_result, 1)
344
345
346class BufferTextTest(unittest.TestCase):
347    def setUp(self):
348        self.stuff = []
349        self.parser = expat.ParserCreate()
350        self.parser.buffer_text = 1
351        self.parser.CharacterDataHandler = self.CharacterDataHandler
352
353    def check(self, expected, label):
354        self.assertEqual(self.stuff, expected,
355                "%s\nstuff    = %r\nexpected = %r"
356                % (label, self.stuff, map(str, expected)))
357
358    def CharacterDataHandler(self, text):
359        self.stuff.append(text)
360
361    def StartElementHandler(self, name, attrs):
362        self.stuff.append("<%s>" % name)
363        bt = attrs.get("buffer-text")
364        if bt == "yes":
365            self.parser.buffer_text = 1
366        elif bt == "no":
367            self.parser.buffer_text = 0
368
369    def EndElementHandler(self, name):
370        self.stuff.append("</%s>" % name)
371
372    def CommentHandler(self, data):
373        self.stuff.append("<!--%s-->" % data)
374
375    def setHandlers(self, handlers=[]):
376        for name in handlers:
377            setattr(self.parser, name, getattr(self, name))
378
379    def test_default_to_disabled(self):
380        parser = expat.ParserCreate()
381        self.assertFalse(parser.buffer_text)
382
383    def test_buffering_enabled(self):
384        # Make sure buffering is turned on
385        self.assertTrue(self.parser.buffer_text)
386        self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
387        self.assertEqual(self.stuff, ['123'],
388                         "buffered text not properly collapsed")
389
390    def test1(self):
391        # XXX This test exposes more detail of Expat's text chunking than we
392        # XXX like, but it tests what we need to concisely.
393        self.setHandlers(["StartElementHandler"])
394        self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", True)
395        self.assertEqual(self.stuff,
396                         ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"],
397                         "buffering control not reacting as expected")
398
399    def test2(self):
400        self.parser.Parse(b"<a>1<b/>&lt;2&gt;<c/>&#32;\n&#x20;3</a>", True)
401        self.assertEqual(self.stuff, ["1<2> \n 3"],
402                         "buffered text not properly collapsed")
403
404    def test3(self):
405        self.setHandlers(["StartElementHandler"])
406        self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
407        self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"],
408                         "buffered text not properly split")
409
410    def test4(self):
411        self.setHandlers(["StartElementHandler", "EndElementHandler"])
412        self.parser.CharacterDataHandler = None
413        self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True)
414        self.assertEqual(self.stuff,
415                         ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"])
416
417    def test5(self):
418        self.setHandlers(["StartElementHandler", "EndElementHandler"])
419        self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", True)
420        self.assertEqual(self.stuff,
421            ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"])
422
423    def test6(self):
424        self.setHandlers(["CommentHandler", "EndElementHandler",
425                    "StartElementHandler"])
426        self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", True)
427        self.assertEqual(self.stuff,
428            ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"],
429            "buffered text not properly split")
430
431    def test7(self):
432        self.setHandlers(["CommentHandler", "EndElementHandler",
433                    "StartElementHandler"])
434        self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
435        self.assertEqual(self.stuff,
436                         ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3",
437                          "<!--abc-->", "4", "<!--def-->", "5", "</a>"],
438                         "buffered text not properly split")
439
440
441# Test handling of exception from callback:
442class HandlerExceptionTest(unittest.TestCase):
443    def StartElementHandler(self, name, attrs):
444        raise RuntimeError(name)
445
446    def check_traceback_entry(self, entry, filename, funcname):
447        self.assertEqual(os.path.basename(entry[0]), filename)
448        self.assertEqual(entry[2], funcname)
449
450    def test_exception(self):
451        parser = expat.ParserCreate()
452        parser.StartElementHandler = self.StartElementHandler
453        try:
454            parser.Parse(b"<a><b><c/></b></a>", True)
455            self.fail()
456        except RuntimeError as e:
457            self.assertEqual(e.args[0], 'a',
458                             "Expected RuntimeError for element 'a', but" + \
459                             " found %r" % e.args[0])
460            # Check that the traceback contains the relevant line in pyexpat.c
461            entries = traceback.extract_tb(e.__traceback__)
462            self.assertEqual(len(entries), 3)
463            self.check_traceback_entry(entries[0],
464                                       "test_pyexpat.py", "test_exception")
465            self.check_traceback_entry(entries[1],
466                                       "pyexpat.c", "StartElement")
467            self.check_traceback_entry(entries[2],
468                                       "test_pyexpat.py", "StartElementHandler")
469            if sysconfig.is_python_build() and not (sys.platform == 'win32' and platform.machine() == 'ARM'):
470                self.assertIn('call_with_frame("StartElement"', entries[1][3])
471
472
473# Test Current* members:
474class PositionTest(unittest.TestCase):
475    def StartElementHandler(self, name, attrs):
476        self.check_pos('s')
477
478    def EndElementHandler(self, name):
479        self.check_pos('e')
480
481    def check_pos(self, event):
482        pos = (event,
483               self.parser.CurrentByteIndex,
484               self.parser.CurrentLineNumber,
485               self.parser.CurrentColumnNumber)
486        self.assertTrue(self.upto < len(self.expected_list),
487                        'too many parser events')
488        expected = self.expected_list[self.upto]
489        self.assertEqual(pos, expected,
490                'Expected position %s, got position %s' %(pos, expected))
491        self.upto += 1
492
493    def test(self):
494        self.parser = expat.ParserCreate()
495        self.parser.StartElementHandler = self.StartElementHandler
496        self.parser.EndElementHandler = self.EndElementHandler
497        self.upto = 0
498        self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
499                              ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)]
500
501        xml = b'<a>\n <b>\n  <c/>\n </b>\n</a>'
502        self.parser.Parse(xml, True)
503
504
505class sf1296433Test(unittest.TestCase):
506    def test_parse_only_xml_data(self):
507        # http://python.org/sf/1296433
508        #
509        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025)
510        # this one doesn't crash
511        #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000)
512
513        class SpecificException(Exception):
514            pass
515
516        def handler(text):
517            raise SpecificException
518
519        parser = expat.ParserCreate()
520        parser.CharacterDataHandler = handler
521
522        self.assertRaises(Exception, parser.Parse, xml.encode('iso8859'))
523
524class ChardataBufferTest(unittest.TestCase):
525    """
526    test setting of chardata buffer size
527    """
528
529    def test_1025_bytes(self):
530        self.assertEqual(self.small_buffer_test(1025), 2)
531
532    def test_1000_bytes(self):
533        self.assertEqual(self.small_buffer_test(1000), 1)
534
535    def test_wrong_size(self):
536        parser = expat.ParserCreate()
537        parser.buffer_text = 1
538        with self.assertRaises(ValueError):
539            parser.buffer_size = -1
540        with self.assertRaises(ValueError):
541            parser.buffer_size = 0
542        with self.assertRaises((ValueError, OverflowError)):
543            parser.buffer_size = sys.maxsize + 1
544        with self.assertRaises(TypeError):
545            parser.buffer_size = 512.0
546
547    def test_unchanged_size(self):
548        xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512
549        xml2 = b'a'*512 + b'</s>'
550        parser = expat.ParserCreate()
551        parser.CharacterDataHandler = self.counting_handler
552        parser.buffer_size = 512
553        parser.buffer_text = 1
554
555        # Feed 512 bytes of character data: the handler should be called
556        # once.
557        self.n = 0
558        parser.Parse(xml1)
559        self.assertEqual(self.n, 1)
560
561        # Reassign to buffer_size, but assign the same size.
562        parser.buffer_size = parser.buffer_size
563        self.assertEqual(self.n, 1)
564
565        # Try parsing rest of the document
566        parser.Parse(xml2)
567        self.assertEqual(self.n, 2)
568
569
570    def test_disabling_buffer(self):
571        xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
572        xml2 = b'b' * 1024
573        xml3 = b'c' * 1024 + b'</a>';
574        parser = expat.ParserCreate()
575        parser.CharacterDataHandler = self.counting_handler
576        parser.buffer_text = 1
577        parser.buffer_size = 1024
578        self.assertEqual(parser.buffer_size, 1024)
579
580        # Parse one chunk of XML
581        self.n = 0
582        parser.Parse(xml1, False)
583        self.assertEqual(parser.buffer_size, 1024)
584        self.assertEqual(self.n, 1)
585
586        # Turn off buffering and parse the next chunk.
587        parser.buffer_text = 0
588        self.assertFalse(parser.buffer_text)
589        self.assertEqual(parser.buffer_size, 1024)
590        for i in range(10):
591            parser.Parse(xml2, False)
592        self.assertEqual(self.n, 11)
593
594        parser.buffer_text = 1
595        self.assertTrue(parser.buffer_text)
596        self.assertEqual(parser.buffer_size, 1024)
597        parser.Parse(xml3, True)
598        self.assertEqual(self.n, 12)
599
600    def counting_handler(self, text):
601        self.n += 1
602
603    def small_buffer_test(self, buffer_len):
604        xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>'
605        parser = expat.ParserCreate()
606        parser.CharacterDataHandler = self.counting_handler
607        parser.buffer_size = 1024
608        parser.buffer_text = 1
609
610        self.n = 0
611        parser.Parse(xml)
612        return self.n
613
614    def test_change_size_1(self):
615        xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024
616        xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
617        parser = expat.ParserCreate()
618        parser.CharacterDataHandler = self.counting_handler
619        parser.buffer_text = 1
620        parser.buffer_size = 1024
621        self.assertEqual(parser.buffer_size, 1024)
622
623        self.n = 0
624        parser.Parse(xml1, False)
625        parser.buffer_size *= 2
626        self.assertEqual(parser.buffer_size, 2048)
627        parser.Parse(xml2, True)
628        self.assertEqual(self.n, 2)
629
630    def test_change_size_2(self):
631        xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023
632        xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>'
633        parser = expat.ParserCreate()
634        parser.CharacterDataHandler = self.counting_handler
635        parser.buffer_text = 1
636        parser.buffer_size = 2048
637        self.assertEqual(parser.buffer_size, 2048)
638
639        self.n=0
640        parser.Parse(xml1, False)
641        parser.buffer_size = parser.buffer_size // 2
642        self.assertEqual(parser.buffer_size, 1024)
643        parser.Parse(xml2, True)
644        self.assertEqual(self.n, 4)
645
646class MalformedInputTest(unittest.TestCase):
647    def test1(self):
648        xml = b"\0\r\n"
649        parser = expat.ParserCreate()
650        try:
651            parser.Parse(xml, True)
652            self.fail()
653        except expat.ExpatError as e:
654            self.assertEqual(str(e), 'unclosed token: line 2, column 0')
655
656    def test2(self):
657        # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE)
658        xml = b"<?xml version\xc2\x85='1.0'?>\r\n"
659        parser = expat.ParserCreate()
660        err_pattern = r'XML declaration not well-formed: line 1, column \d+'
661        with self.assertRaisesRegex(expat.ExpatError, err_pattern):
662            parser.Parse(xml, True)
663
664class ErrorMessageTest(unittest.TestCase):
665    def test_codes(self):
666        # verify mapping of errors.codes and errors.messages
667        self.assertEqual(errors.XML_ERROR_SYNTAX,
668                         errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]])
669
670    def test_expaterror(self):
671        xml = b'<'
672        parser = expat.ParserCreate()
673        try:
674            parser.Parse(xml, True)
675            self.fail()
676        except expat.ExpatError as e:
677            self.assertEqual(e.code,
678                             errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN])
679
680
681class ForeignDTDTests(unittest.TestCase):
682    """
683    Tests for the UseForeignDTD method of expat parser objects.
684    """
685    def test_use_foreign_dtd(self):
686        """
687        If UseForeignDTD is passed True and a document without an external
688        entity reference is parsed, ExternalEntityRefHandler is first called
689        with None for the public and system ids.
690        """
691        handler_call_args = []
692        def resolve_entity(context, base, system_id, public_id):
693            handler_call_args.append((public_id, system_id))
694            return 1
695
696        parser = expat.ParserCreate()
697        parser.UseForeignDTD(True)
698        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
699        parser.ExternalEntityRefHandler = resolve_entity
700        parser.Parse(b"<?xml version='1.0'?><element/>")
701        self.assertEqual(handler_call_args, [(None, None)])
702
703        # test UseForeignDTD() is equal to UseForeignDTD(True)
704        handler_call_args[:] = []
705
706        parser = expat.ParserCreate()
707        parser.UseForeignDTD()
708        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
709        parser.ExternalEntityRefHandler = resolve_entity
710        parser.Parse(b"<?xml version='1.0'?><element/>")
711        self.assertEqual(handler_call_args, [(None, None)])
712
713    def test_ignore_use_foreign_dtd(self):
714        """
715        If UseForeignDTD is passed True and a document with an external
716        entity reference is parsed, ExternalEntityRefHandler is called with
717        the public and system ids from the document.
718        """
719        handler_call_args = []
720        def resolve_entity(context, base, system_id, public_id):
721            handler_call_args.append((public_id, system_id))
722            return 1
723
724        parser = expat.ParserCreate()
725        parser.UseForeignDTD(True)
726        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
727        parser.ExternalEntityRefHandler = resolve_entity
728        parser.Parse(
729            b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>")
730        self.assertEqual(handler_call_args, [("bar", "baz")])
731
732
733if __name__ == "__main__":
734    unittest.main()
735