1# XXX TypeErrors on calling handlers, or on bad return values from a 2# handler, are obscure and unhelpful. 3 4from io import BytesIO 5import os 6import platform 7import sys 8import sysconfig 9import unittest 10import traceback 11 12from xml.parsers import expat 13from xml.parsers.expat import errors 14 15from test.support import sortdict 16 17 18class SetAttributeTest(unittest.TestCase): 19 def setUp(self): 20 self.parser = expat.ParserCreate(namespace_separator='!') 21 22 def test_buffer_text(self): 23 self.assertIs(self.parser.buffer_text, False) 24 for x in 0, 1, 2, 0: 25 self.parser.buffer_text = x 26 self.assertIs(self.parser.buffer_text, bool(x)) 27 28 def test_namespace_prefixes(self): 29 self.assertIs(self.parser.namespace_prefixes, False) 30 for x in 0, 1, 2, 0: 31 self.parser.namespace_prefixes = x 32 self.assertIs(self.parser.namespace_prefixes, bool(x)) 33 34 def test_ordered_attributes(self): 35 self.assertIs(self.parser.ordered_attributes, False) 36 for x in 0, 1, 2, 0: 37 self.parser.ordered_attributes = x 38 self.assertIs(self.parser.ordered_attributes, bool(x)) 39 40 def test_specified_attributes(self): 41 self.assertIs(self.parser.specified_attributes, False) 42 for x in 0, 1, 2, 0: 43 self.parser.specified_attributes = x 44 self.assertIs(self.parser.specified_attributes, bool(x)) 45 46 def test_invalid_attributes(self): 47 with self.assertRaises(AttributeError): 48 self.parser.returns_unicode = 1 49 with self.assertRaises(AttributeError): 50 self.parser.returns_unicode 51 52 # Issue #25019 53 self.assertRaises(TypeError, setattr, self.parser, range(0xF), 0) 54 self.assertRaises(TypeError, self.parser.__setattr__, range(0xF), 0) 55 self.assertRaises(TypeError, getattr, self.parser, range(0xF)) 56 57 58data = b'''\ 59<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> 60<?xml-stylesheet href="stylesheet.css"?> 61<!-- comment data --> 62<!DOCTYPE quotations SYSTEM "quotations.dtd" [ 63<!ELEMENT root ANY> 64<!ATTLIST root attr1 CDATA #REQUIRED attr2 CDATA #IMPLIED> 65<!NOTATION notation SYSTEM "notation.jpeg"> 66<!ENTITY acirc "â"> 67<!ENTITY external_entity SYSTEM "entity.file"> 68<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> 69%unparsed_entity; 70]> 71 72<root attr1="value1" attr2="value2ὀ"> 73<myns:subelement xmlns:myns="http://www.python.org/namespace"> 74 Contents of subelements 75</myns:subelement> 76<sub2><![CDATA[contents of CDATA section]]></sub2> 77&external_entity; 78&skipped_entity; 79\xb5 80</root> 81''' 82 83 84# Produce UTF-8 output 85class ParseTest(unittest.TestCase): 86 class Outputter: 87 def __init__(self): 88 self.out = [] 89 90 def StartElementHandler(self, name, attrs): 91 self.out.append('Start element: ' + repr(name) + ' ' + 92 sortdict(attrs)) 93 94 def EndElementHandler(self, name): 95 self.out.append('End element: ' + repr(name)) 96 97 def CharacterDataHandler(self, data): 98 data = data.strip() 99 if data: 100 self.out.append('Character data: ' + repr(data)) 101 102 def ProcessingInstructionHandler(self, target, data): 103 self.out.append('PI: ' + repr(target) + ' ' + repr(data)) 104 105 def StartNamespaceDeclHandler(self, prefix, uri): 106 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) 107 108 def EndNamespaceDeclHandler(self, prefix): 109 self.out.append('End of NS decl: ' + repr(prefix)) 110 111 def StartCdataSectionHandler(self): 112 self.out.append('Start of CDATA section') 113 114 def EndCdataSectionHandler(self): 115 self.out.append('End of CDATA section') 116 117 def CommentHandler(self, text): 118 self.out.append('Comment: ' + repr(text)) 119 120 def NotationDeclHandler(self, *args): 121 name, base, sysid, pubid = args 122 self.out.append('Notation declared: %s' %(args,)) 123 124 def UnparsedEntityDeclHandler(self, *args): 125 entityName, base, systemId, publicId, notationName = args 126 self.out.append('Unparsed entity decl: %s' %(args,)) 127 128 def NotStandaloneHandler(self): 129 self.out.append('Not standalone') 130 return 1 131 132 def ExternalEntityRefHandler(self, *args): 133 context, base, sysId, pubId = args 134 self.out.append('External entity ref: %s' %(args[1:],)) 135 return 1 136 137 def StartDoctypeDeclHandler(self, *args): 138 self.out.append(('Start doctype', args)) 139 return 1 140 141 def EndDoctypeDeclHandler(self): 142 self.out.append("End doctype") 143 return 1 144 145 def EntityDeclHandler(self, *args): 146 self.out.append(('Entity declaration', args)) 147 return 1 148 149 def XmlDeclHandler(self, *args): 150 self.out.append(('XML declaration', args)) 151 return 1 152 153 def ElementDeclHandler(self, *args): 154 self.out.append(('Element declaration', args)) 155 return 1 156 157 def AttlistDeclHandler(self, *args): 158 self.out.append(('Attribute list declaration', args)) 159 return 1 160 161 def SkippedEntityHandler(self, *args): 162 self.out.append(("Skipped entity", args)) 163 return 1 164 165 def DefaultHandler(self, userData): 166 pass 167 168 def DefaultHandlerExpand(self, userData): 169 pass 170 171 handler_names = [ 172 'StartElementHandler', 'EndElementHandler', 'CharacterDataHandler', 173 'ProcessingInstructionHandler', 'UnparsedEntityDeclHandler', 174 'NotationDeclHandler', 'StartNamespaceDeclHandler', 175 'EndNamespaceDeclHandler', 'CommentHandler', 176 'StartCdataSectionHandler', 'EndCdataSectionHandler', 'DefaultHandler', 177 'DefaultHandlerExpand', 'NotStandaloneHandler', 178 'ExternalEntityRefHandler', 'StartDoctypeDeclHandler', 179 'EndDoctypeDeclHandler', 'EntityDeclHandler', 'XmlDeclHandler', 180 'ElementDeclHandler', 'AttlistDeclHandler', 'SkippedEntityHandler', 181 ] 182 183 def _hookup_callbacks(self, parser, handler): 184 """ 185 Set each of the callbacks defined on handler and named in 186 self.handler_names on the given parser. 187 """ 188 for name in self.handler_names: 189 setattr(parser, name, getattr(handler, name)) 190 191 def _verify_parse_output(self, operations): 192 expected_operations = [ 193 ('XML declaration', ('1.0', 'iso-8859-1', 0)), 194 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'', 195 "Comment: ' comment data '", 196 "Not standalone", 197 ("Start doctype", ('quotations', 'quotations.dtd', None, 1)), 198 ('Element declaration', ('root', (2, 0, None, ()))), 199 ('Attribute list declaration', ('root', 'attr1', 'CDATA', None, 200 1)), 201 ('Attribute list declaration', ('root', 'attr2', 'CDATA', None, 202 0)), 203 "Notation declared: ('notation', None, 'notation.jpeg', None)", 204 ('Entity declaration', ('acirc', 0, '\xe2', None, None, None, None)), 205 ('Entity declaration', ('external_entity', 0, None, None, 206 'entity.file', None, None)), 207 "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')", 208 "Not standalone", 209 "End doctype", 210 "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\u1f40'}", 211 "NS decl: 'myns' 'http://www.python.org/namespace'", 212 "Start element: 'http://www.python.org/namespace!subelement' {}", 213 "Character data: 'Contents of subelements'", 214 "End element: 'http://www.python.org/namespace!subelement'", 215 "End of NS decl: 'myns'", 216 "Start element: 'sub2' {}", 217 'Start of CDATA section', 218 "Character data: 'contents of CDATA section'", 219 'End of CDATA section', 220 "End element: 'sub2'", 221 "External entity ref: (None, 'entity.file', None)", 222 ('Skipped entity', ('skipped_entity', 0)), 223 "Character data: '\xb5'", 224 "End element: 'root'", 225 ] 226 for operation, expected_operation in zip(operations, expected_operations): 227 self.assertEqual(operation, expected_operation) 228 229 def test_parse_bytes(self): 230 out = self.Outputter() 231 parser = expat.ParserCreate(namespace_separator='!') 232 self._hookup_callbacks(parser, out) 233 234 parser.Parse(data, True) 235 236 operations = out.out 237 self._verify_parse_output(operations) 238 # Issue #6697. 239 self.assertRaises(AttributeError, getattr, parser, '\uD800') 240 241 def test_parse_str(self): 242 out = self.Outputter() 243 parser = expat.ParserCreate(namespace_separator='!') 244 self._hookup_callbacks(parser, out) 245 246 parser.Parse(data.decode('iso-8859-1'), True) 247 248 operations = out.out 249 self._verify_parse_output(operations) 250 251 def test_parse_file(self): 252 # Try parsing a file 253 out = self.Outputter() 254 parser = expat.ParserCreate(namespace_separator='!') 255 self._hookup_callbacks(parser, out) 256 file = BytesIO(data) 257 258 parser.ParseFile(file) 259 260 operations = out.out 261 self._verify_parse_output(operations) 262 263 def test_parse_again(self): 264 parser = expat.ParserCreate() 265 file = BytesIO(data) 266 parser.ParseFile(file) 267 # Issue 6676: ensure a meaningful exception is raised when attempting 268 # to parse more than one XML document per xmlparser instance, 269 # a limitation of the Expat library. 270 with self.assertRaises(expat.error) as cm: 271 parser.ParseFile(file) 272 self.assertEqual(expat.ErrorString(cm.exception.code), 273 expat.errors.XML_ERROR_FINISHED) 274 275class NamespaceSeparatorTest(unittest.TestCase): 276 def test_legal(self): 277 # Tests that make sure we get errors when the namespace_separator value 278 # is illegal, and that we don't for good values: 279 expat.ParserCreate() 280 expat.ParserCreate(namespace_separator=None) 281 expat.ParserCreate(namespace_separator=' ') 282 283 def test_illegal(self): 284 try: 285 expat.ParserCreate(namespace_separator=42) 286 self.fail() 287 except TypeError as e: 288 self.assertEqual(str(e), 289 "ParserCreate() argument 'namespace_separator' must be str or None, not int") 290 291 try: 292 expat.ParserCreate(namespace_separator='too long') 293 self.fail() 294 except ValueError as e: 295 self.assertEqual(str(e), 296 'namespace_separator must be at most one character, omitted, or None') 297 298 def test_zero_length(self): 299 # ParserCreate() needs to accept a namespace_separator of zero length 300 # to satisfy the requirements of RDF applications that are required 301 # to simply glue together the namespace URI and the localname. Though 302 # considered a wart of the RDF specifications, it needs to be supported. 303 # 304 # See XML-SIG mailing list thread starting with 305 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html 306 # 307 expat.ParserCreate(namespace_separator='') # too short 308 309 310class InterningTest(unittest.TestCase): 311 def test(self): 312 # Test the interning machinery. 313 p = expat.ParserCreate() 314 L = [] 315 def collector(name, *args): 316 L.append(name) 317 p.StartElementHandler = collector 318 p.EndElementHandler = collector 319 p.Parse(b"<e> <e/> <e></e> </e>", True) 320 tag = L[0] 321 self.assertEqual(len(L), 6) 322 for entry in L: 323 # L should have the same string repeated over and over. 324 self.assertTrue(tag is entry) 325 326 def test_issue9402(self): 327 # create an ExternalEntityParserCreate with buffer text 328 class ExternalOutputter: 329 def __init__(self, parser): 330 self.parser = parser 331 self.parser_result = None 332 333 def ExternalEntityRefHandler(self, context, base, sysId, pubId): 334 external_parser = self.parser.ExternalEntityParserCreate("") 335 self.parser_result = external_parser.Parse(b"", True) 336 return 1 337 338 parser = expat.ParserCreate(namespace_separator='!') 339 parser.buffer_text = 1 340 out = ExternalOutputter(parser) 341 parser.ExternalEntityRefHandler = out.ExternalEntityRefHandler 342 parser.Parse(data, True) 343 self.assertEqual(out.parser_result, 1) 344 345 346class BufferTextTest(unittest.TestCase): 347 def setUp(self): 348 self.stuff = [] 349 self.parser = expat.ParserCreate() 350 self.parser.buffer_text = 1 351 self.parser.CharacterDataHandler = self.CharacterDataHandler 352 353 def check(self, expected, label): 354 self.assertEqual(self.stuff, expected, 355 "%s\nstuff = %r\nexpected = %r" 356 % (label, self.stuff, map(str, expected))) 357 358 def CharacterDataHandler(self, text): 359 self.stuff.append(text) 360 361 def StartElementHandler(self, name, attrs): 362 self.stuff.append("<%s>" % name) 363 bt = attrs.get("buffer-text") 364 if bt == "yes": 365 self.parser.buffer_text = 1 366 elif bt == "no": 367 self.parser.buffer_text = 0 368 369 def EndElementHandler(self, name): 370 self.stuff.append("</%s>" % name) 371 372 def CommentHandler(self, data): 373 self.stuff.append("<!--%s-->" % data) 374 375 def setHandlers(self, handlers=[]): 376 for name in handlers: 377 setattr(self.parser, name, getattr(self, name)) 378 379 def test_default_to_disabled(self): 380 parser = expat.ParserCreate() 381 self.assertFalse(parser.buffer_text) 382 383 def test_buffering_enabled(self): 384 # Make sure buffering is turned on 385 self.assertTrue(self.parser.buffer_text) 386 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True) 387 self.assertEqual(self.stuff, ['123'], 388 "buffered text not properly collapsed") 389 390 def test1(self): 391 # XXX This test exposes more detail of Expat's text chunking than we 392 # XXX like, but it tests what we need to concisely. 393 self.setHandlers(["StartElementHandler"]) 394 self.parser.Parse(b"<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", True) 395 self.assertEqual(self.stuff, 396 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], 397 "buffering control not reacting as expected") 398 399 def test2(self): 400 self.parser.Parse(b"<a>1<b/><2><c/> \n 3</a>", True) 401 self.assertEqual(self.stuff, ["1<2> \n 3"], 402 "buffered text not properly collapsed") 403 404 def test3(self): 405 self.setHandlers(["StartElementHandler"]) 406 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True) 407 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], 408 "buffered text not properly split") 409 410 def test4(self): 411 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 412 self.parser.CharacterDataHandler = None 413 self.parser.Parse(b"<a>1<b/>2<c/>3</a>", True) 414 self.assertEqual(self.stuff, 415 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) 416 417 def test5(self): 418 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 419 self.parser.Parse(b"<a>1<b></b>2<c/>3</a>", True) 420 self.assertEqual(self.stuff, 421 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) 422 423 def test6(self): 424 self.setHandlers(["CommentHandler", "EndElementHandler", 425 "StartElementHandler"]) 426 self.parser.Parse(b"<a>1<b/>2<c></c>345</a> ", True) 427 self.assertEqual(self.stuff, 428 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], 429 "buffered text not properly split") 430 431 def test7(self): 432 self.setHandlers(["CommentHandler", "EndElementHandler", 433 "StartElementHandler"]) 434 self.parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True) 435 self.assertEqual(self.stuff, 436 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", 437 "<!--abc-->", "4", "<!--def-->", "5", "</a>"], 438 "buffered text not properly split") 439 440 441# Test handling of exception from callback: 442class HandlerExceptionTest(unittest.TestCase): 443 def StartElementHandler(self, name, attrs): 444 raise RuntimeError(name) 445 446 def check_traceback_entry(self, entry, filename, funcname): 447 self.assertEqual(os.path.basename(entry[0]), filename) 448 self.assertEqual(entry[2], funcname) 449 450 def test_exception(self): 451 parser = expat.ParserCreate() 452 parser.StartElementHandler = self.StartElementHandler 453 try: 454 parser.Parse(b"<a><b><c/></b></a>", True) 455 self.fail() 456 except RuntimeError as e: 457 self.assertEqual(e.args[0], 'a', 458 "Expected RuntimeError for element 'a', but" + \ 459 " found %r" % e.args[0]) 460 # Check that the traceback contains the relevant line in pyexpat.c 461 entries = traceback.extract_tb(e.__traceback__) 462 self.assertEqual(len(entries), 3) 463 self.check_traceback_entry(entries[0], 464 "test_pyexpat.py", "test_exception") 465 self.check_traceback_entry(entries[1], 466 "pyexpat.c", "StartElement") 467 self.check_traceback_entry(entries[2], 468 "test_pyexpat.py", "StartElementHandler") 469 if sysconfig.is_python_build() and not (sys.platform == 'win32' and platform.machine() == 'ARM'): 470 self.assertIn('call_with_frame("StartElement"', entries[1][3]) 471 472 473# Test Current* members: 474class PositionTest(unittest.TestCase): 475 def StartElementHandler(self, name, attrs): 476 self.check_pos('s') 477 478 def EndElementHandler(self, name): 479 self.check_pos('e') 480 481 def check_pos(self, event): 482 pos = (event, 483 self.parser.CurrentByteIndex, 484 self.parser.CurrentLineNumber, 485 self.parser.CurrentColumnNumber) 486 self.assertTrue(self.upto < len(self.expected_list), 487 'too many parser events') 488 expected = self.expected_list[self.upto] 489 self.assertEqual(pos, expected, 490 'Expected position %s, got position %s' %(pos, expected)) 491 self.upto += 1 492 493 def test(self): 494 self.parser = expat.ParserCreate() 495 self.parser.StartElementHandler = self.StartElementHandler 496 self.parser.EndElementHandler = self.EndElementHandler 497 self.upto = 0 498 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), 499 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] 500 501 xml = b'<a>\n <b>\n <c/>\n </b>\n</a>' 502 self.parser.Parse(xml, True) 503 504 505class sf1296433Test(unittest.TestCase): 506 def test_parse_only_xml_data(self): 507 # http://python.org/sf/1296433 508 # 509 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) 510 # this one doesn't crash 511 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) 512 513 class SpecificException(Exception): 514 pass 515 516 def handler(text): 517 raise SpecificException 518 519 parser = expat.ParserCreate() 520 parser.CharacterDataHandler = handler 521 522 self.assertRaises(Exception, parser.Parse, xml.encode('iso8859')) 523 524class ChardataBufferTest(unittest.TestCase): 525 """ 526 test setting of chardata buffer size 527 """ 528 529 def test_1025_bytes(self): 530 self.assertEqual(self.small_buffer_test(1025), 2) 531 532 def test_1000_bytes(self): 533 self.assertEqual(self.small_buffer_test(1000), 1) 534 535 def test_wrong_size(self): 536 parser = expat.ParserCreate() 537 parser.buffer_text = 1 538 with self.assertRaises(ValueError): 539 parser.buffer_size = -1 540 with self.assertRaises(ValueError): 541 parser.buffer_size = 0 542 with self.assertRaises((ValueError, OverflowError)): 543 parser.buffer_size = sys.maxsize + 1 544 with self.assertRaises(TypeError): 545 parser.buffer_size = 512.0 546 547 def test_unchanged_size(self): 548 xml1 = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * 512 549 xml2 = b'a'*512 + b'</s>' 550 parser = expat.ParserCreate() 551 parser.CharacterDataHandler = self.counting_handler 552 parser.buffer_size = 512 553 parser.buffer_text = 1 554 555 # Feed 512 bytes of character data: the handler should be called 556 # once. 557 self.n = 0 558 parser.Parse(xml1) 559 self.assertEqual(self.n, 1) 560 561 # Reassign to buffer_size, but assign the same size. 562 parser.buffer_size = parser.buffer_size 563 self.assertEqual(self.n, 1) 564 565 # Try parsing rest of the document 566 parser.Parse(xml2) 567 self.assertEqual(self.n, 2) 568 569 570 def test_disabling_buffer(self): 571 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512 572 xml2 = b'b' * 1024 573 xml3 = b'c' * 1024 + b'</a>'; 574 parser = expat.ParserCreate() 575 parser.CharacterDataHandler = self.counting_handler 576 parser.buffer_text = 1 577 parser.buffer_size = 1024 578 self.assertEqual(parser.buffer_size, 1024) 579 580 # Parse one chunk of XML 581 self.n = 0 582 parser.Parse(xml1, False) 583 self.assertEqual(parser.buffer_size, 1024) 584 self.assertEqual(self.n, 1) 585 586 # Turn off buffering and parse the next chunk. 587 parser.buffer_text = 0 588 self.assertFalse(parser.buffer_text) 589 self.assertEqual(parser.buffer_size, 1024) 590 for i in range(10): 591 parser.Parse(xml2, False) 592 self.assertEqual(self.n, 11) 593 594 parser.buffer_text = 1 595 self.assertTrue(parser.buffer_text) 596 self.assertEqual(parser.buffer_size, 1024) 597 parser.Parse(xml3, True) 598 self.assertEqual(self.n, 12) 599 600 def counting_handler(self, text): 601 self.n += 1 602 603 def small_buffer_test(self, buffer_len): 604 xml = b"<?xml version='1.0' encoding='iso8859'?><s>" + b'a' * buffer_len + b'</s>' 605 parser = expat.ParserCreate() 606 parser.CharacterDataHandler = self.counting_handler 607 parser.buffer_size = 1024 608 parser.buffer_text = 1 609 610 self.n = 0 611 parser.Parse(xml) 612 return self.n 613 614 def test_change_size_1(self): 615 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a><s>" + b'a' * 1024 616 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>' 617 parser = expat.ParserCreate() 618 parser.CharacterDataHandler = self.counting_handler 619 parser.buffer_text = 1 620 parser.buffer_size = 1024 621 self.assertEqual(parser.buffer_size, 1024) 622 623 self.n = 0 624 parser.Parse(xml1, False) 625 parser.buffer_size *= 2 626 self.assertEqual(parser.buffer_size, 2048) 627 parser.Parse(xml2, True) 628 self.assertEqual(self.n, 2) 629 630 def test_change_size_2(self): 631 xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>a<s>" + b'a' * 1023 632 xml2 = b'aaa</s><s>' + b'a' * 1025 + b'</s></a>' 633 parser = expat.ParserCreate() 634 parser.CharacterDataHandler = self.counting_handler 635 parser.buffer_text = 1 636 parser.buffer_size = 2048 637 self.assertEqual(parser.buffer_size, 2048) 638 639 self.n=0 640 parser.Parse(xml1, False) 641 parser.buffer_size = parser.buffer_size // 2 642 self.assertEqual(parser.buffer_size, 1024) 643 parser.Parse(xml2, True) 644 self.assertEqual(self.n, 4) 645 646class MalformedInputTest(unittest.TestCase): 647 def test1(self): 648 xml = b"\0\r\n" 649 parser = expat.ParserCreate() 650 try: 651 parser.Parse(xml, True) 652 self.fail() 653 except expat.ExpatError as e: 654 self.assertEqual(str(e), 'unclosed token: line 2, column 0') 655 656 def test2(self): 657 # \xc2\x85 is UTF-8 encoded U+0085 (NEXT LINE) 658 xml = b"<?xml version\xc2\x85='1.0'?>\r\n" 659 parser = expat.ParserCreate() 660 err_pattern = r'XML declaration not well-formed: line 1, column \d+' 661 with self.assertRaisesRegex(expat.ExpatError, err_pattern): 662 parser.Parse(xml, True) 663 664class ErrorMessageTest(unittest.TestCase): 665 def test_codes(self): 666 # verify mapping of errors.codes and errors.messages 667 self.assertEqual(errors.XML_ERROR_SYNTAX, 668 errors.messages[errors.codes[errors.XML_ERROR_SYNTAX]]) 669 670 def test_expaterror(self): 671 xml = b'<' 672 parser = expat.ParserCreate() 673 try: 674 parser.Parse(xml, True) 675 self.fail() 676 except expat.ExpatError as e: 677 self.assertEqual(e.code, 678 errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]) 679 680 681class ForeignDTDTests(unittest.TestCase): 682 """ 683 Tests for the UseForeignDTD method of expat parser objects. 684 """ 685 def test_use_foreign_dtd(self): 686 """ 687 If UseForeignDTD is passed True and a document without an external 688 entity reference is parsed, ExternalEntityRefHandler is first called 689 with None for the public and system ids. 690 """ 691 handler_call_args = [] 692 def resolve_entity(context, base, system_id, public_id): 693 handler_call_args.append((public_id, system_id)) 694 return 1 695 696 parser = expat.ParserCreate() 697 parser.UseForeignDTD(True) 698 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 699 parser.ExternalEntityRefHandler = resolve_entity 700 parser.Parse(b"<?xml version='1.0'?><element/>") 701 self.assertEqual(handler_call_args, [(None, None)]) 702 703 # test UseForeignDTD() is equal to UseForeignDTD(True) 704 handler_call_args[:] = [] 705 706 parser = expat.ParserCreate() 707 parser.UseForeignDTD() 708 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 709 parser.ExternalEntityRefHandler = resolve_entity 710 parser.Parse(b"<?xml version='1.0'?><element/>") 711 self.assertEqual(handler_call_args, [(None, None)]) 712 713 def test_ignore_use_foreign_dtd(self): 714 """ 715 If UseForeignDTD is passed True and a document with an external 716 entity reference is parsed, ExternalEntityRefHandler is called with 717 the public and system ids from the document. 718 """ 719 handler_call_args = [] 720 def resolve_entity(context, base, system_id, public_id): 721 handler_call_args.append((public_id, system_id)) 722 return 1 723 724 parser = expat.ParserCreate() 725 parser.UseForeignDTD(True) 726 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 727 parser.ExternalEntityRefHandler = resolve_entity 728 parser.Parse( 729 b"<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") 730 self.assertEqual(handler_call_args, [("bar", "baz")]) 731 732 733if __name__ == "__main__": 734 unittest.main() 735