1# XXX TypeErrors on calling handlers, or on bad return values from a 2# handler, are obscure and unhelpful. 3 4import StringIO, sys 5import unittest 6 7from xml.parsers import expat 8 9from test import test_support 10from test.test_support import sortdict, run_unittest 11 12 13class SetAttributeTest(unittest.TestCase): 14 def setUp(self): 15 self.parser = expat.ParserCreate(namespace_separator='!') 16 17 def test_buffer_text(self): 18 self.assertIs(self.parser.buffer_text, False) 19 for x in 0, 1, 2, 0: 20 self.parser.buffer_text = x 21 self.assertIs(self.parser.buffer_text, bool(x)) 22 23 def test_namespace_prefixes(self): 24 self.assertIs(self.parser.namespace_prefixes, False) 25 for x in 0, 1, 2, 0: 26 self.parser.namespace_prefixes = x 27 self.assertIs(self.parser.namespace_prefixes, bool(x)) 28 29 def test_returns_unicode(self): 30 self.assertIs(self.parser.returns_unicode, test_support.have_unicode) 31 for x in 0, 1, 2, 0: 32 self.parser.returns_unicode = x 33 self.assertIs(self.parser.returns_unicode, bool(x)) 34 35 def test_ordered_attributes(self): 36 self.assertIs(self.parser.ordered_attributes, False) 37 for x in 0, 1, 2, 0: 38 self.parser.ordered_attributes = x 39 self.assertIs(self.parser.ordered_attributes, bool(x)) 40 41 def test_specified_attributes(self): 42 self.assertIs(self.parser.specified_attributes, False) 43 for x in 0, 1, 2, 0: 44 self.parser.specified_attributes = x 45 self.assertIs(self.parser.specified_attributes, bool(x)) 46 47 def test_invalid_attributes(self): 48 with self.assertRaises(AttributeError): 49 self.parser.foo = 1 50 with self.assertRaises(AttributeError): 51 self.parser.foo 52 53 54data = '''\ 55<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> 56<?xml-stylesheet href="stylesheet.css"?> 57<!-- comment data --> 58<!DOCTYPE quotations SYSTEM "quotations.dtd" [ 59<!ELEMENT root ANY> 60<!NOTATION notation SYSTEM "notation.jpeg"> 61<!ENTITY acirc "â"> 62<!ENTITY external_entity SYSTEM "entity.file"> 63<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> 64%unparsed_entity; 65]> 66 67<root attr1="value1" attr2="value2ὀ"> 68<myns:subelement xmlns:myns="http://www.python.org/namespace"> 69 Contents of subelements 70</myns:subelement> 71<sub2><![CDATA[contents of CDATA section]]></sub2> 72&external_entity; 73</root> 74''' 75 76 77# Produce UTF-8 output 78class ParseTest(unittest.TestCase): 79 class Outputter: 80 def __init__(self): 81 self.out = [] 82 83 def StartElementHandler(self, name, attrs): 84 self.out.append('Start element: ' + repr(name) + ' ' + 85 sortdict(attrs)) 86 87 def EndElementHandler(self, name): 88 self.out.append('End element: ' + repr(name)) 89 90 def CharacterDataHandler(self, data): 91 data = data.strip() 92 if data: 93 self.out.append('Character data: ' + repr(data)) 94 95 def ProcessingInstructionHandler(self, target, data): 96 self.out.append('PI: ' + repr(target) + ' ' + repr(data)) 97 98 def StartNamespaceDeclHandler(self, prefix, uri): 99 self.out.append('NS decl: ' + repr(prefix) + ' ' + repr(uri)) 100 101 def EndNamespaceDeclHandler(self, prefix): 102 self.out.append('End of NS decl: ' + repr(prefix)) 103 104 def StartCdataSectionHandler(self): 105 self.out.append('Start of CDATA section') 106 107 def EndCdataSectionHandler(self): 108 self.out.append('End of CDATA section') 109 110 def CommentHandler(self, text): 111 self.out.append('Comment: ' + repr(text)) 112 113 def NotationDeclHandler(self, *args): 114 name, base, sysid, pubid = args 115 self.out.append('Notation declared: %s' %(args,)) 116 117 def UnparsedEntityDeclHandler(self, *args): 118 entityName, base, systemId, publicId, notationName = args 119 self.out.append('Unparsed entity decl: %s' %(args,)) 120 121 def NotStandaloneHandler(self, userData): 122 self.out.append('Not standalone') 123 return 1 124 125 def ExternalEntityRefHandler(self, *args): 126 context, base, sysId, pubId = args 127 self.out.append('External entity ref: %s' %(args[1:],)) 128 return 1 129 130 def DefaultHandler(self, userData): 131 pass 132 133 def DefaultHandlerExpand(self, userData): 134 pass 135 136 handler_names = [ 137 'StartElementHandler', 'EndElementHandler', 138 'CharacterDataHandler', 'ProcessingInstructionHandler', 139 'UnparsedEntityDeclHandler', 'NotationDeclHandler', 140 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', 141 'CommentHandler', 'StartCdataSectionHandler', 142 'EndCdataSectionHandler', 143 'DefaultHandler', 'DefaultHandlerExpand', 144 #'NotStandaloneHandler', 145 'ExternalEntityRefHandler' 146 ] 147 148 def test_utf8(self): 149 150 out = self.Outputter() 151 parser = expat.ParserCreate(namespace_separator='!') 152 for name in self.handler_names: 153 setattr(parser, name, getattr(out, name)) 154 parser.returns_unicode = 0 155 parser.Parse(data, 1) 156 157 # Verify output 158 op = out.out 159 self.assertEqual(op[0], 'PI: \'xml-stylesheet\' \'href="stylesheet.css"\'') 160 self.assertEqual(op[1], "Comment: ' comment data '") 161 self.assertEqual(op[2], "Notation declared: ('notation', None, 'notation.jpeg', None)") 162 self.assertEqual(op[3], "Unparsed entity decl: ('unparsed_entity', None, 'entity.file', None, 'notation')") 163 self.assertEqual(op[4], "Start element: 'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}") 164 self.assertEqual(op[5], "NS decl: 'myns' 'http://www.python.org/namespace'") 165 self.assertEqual(op[6], "Start element: 'http://www.python.org/namespace!subelement' {}") 166 self.assertEqual(op[7], "Character data: 'Contents of subelements'") 167 self.assertEqual(op[8], "End element: 'http://www.python.org/namespace!subelement'") 168 self.assertEqual(op[9], "End of NS decl: 'myns'") 169 self.assertEqual(op[10], "Start element: 'sub2' {}") 170 self.assertEqual(op[11], 'Start of CDATA section') 171 self.assertEqual(op[12], "Character data: 'contents of CDATA section'") 172 self.assertEqual(op[13], 'End of CDATA section') 173 self.assertEqual(op[14], "End element: 'sub2'") 174 self.assertEqual(op[15], "External entity ref: (None, 'entity.file', None)") 175 self.assertEqual(op[16], "End element: 'root'") 176 177 def test_unicode(self): 178 # Try the parse again, this time producing Unicode output 179 out = self.Outputter() 180 parser = expat.ParserCreate(namespace_separator='!') 181 parser.returns_unicode = 1 182 for name in self.handler_names: 183 setattr(parser, name, getattr(out, name)) 184 185 parser.Parse(data, 1) 186 187 op = out.out 188 self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') 189 self.assertEqual(op[1], "Comment: u' comment data '") 190 self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") 191 self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") 192 self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") 193 self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") 194 self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") 195 self.assertEqual(op[7], "Character data: u'Contents of subelements'") 196 self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") 197 self.assertEqual(op[9], "End of NS decl: u'myns'") 198 self.assertEqual(op[10], "Start element: u'sub2' {}") 199 self.assertEqual(op[11], 'Start of CDATA section') 200 self.assertEqual(op[12], "Character data: u'contents of CDATA section'") 201 self.assertEqual(op[13], 'End of CDATA section') 202 self.assertEqual(op[14], "End element: u'sub2'") 203 self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") 204 self.assertEqual(op[16], "End element: u'root'") 205 206 def test_parse_file(self): 207 # Try parsing a file 208 out = self.Outputter() 209 parser = expat.ParserCreate(namespace_separator='!') 210 parser.returns_unicode = 1 211 for name in self.handler_names: 212 setattr(parser, name, getattr(out, name)) 213 file = StringIO.StringIO(data) 214 215 parser.ParseFile(file) 216 217 op = out.out 218 self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') 219 self.assertEqual(op[1], "Comment: u' comment data '") 220 self.assertEqual(op[2], "Notation declared: (u'notation', None, u'notation.jpeg', None)") 221 self.assertEqual(op[3], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')") 222 self.assertEqual(op[4], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}") 223 self.assertEqual(op[5], "NS decl: u'myns' u'http://www.python.org/namespace'") 224 self.assertEqual(op[6], "Start element: u'http://www.python.org/namespace!subelement' {}") 225 self.assertEqual(op[7], "Character data: u'Contents of subelements'") 226 self.assertEqual(op[8], "End element: u'http://www.python.org/namespace!subelement'") 227 self.assertEqual(op[9], "End of NS decl: u'myns'") 228 self.assertEqual(op[10], "Start element: u'sub2' {}") 229 self.assertEqual(op[11], 'Start of CDATA section') 230 self.assertEqual(op[12], "Character data: u'contents of CDATA section'") 231 self.assertEqual(op[13], 'End of CDATA section') 232 self.assertEqual(op[14], "End element: u'sub2'") 233 self.assertEqual(op[15], "External entity ref: (None, u'entity.file', None)") 234 self.assertEqual(op[16], "End element: u'root'") 235 236 # Issue 4877: expat.ParseFile causes segfault on a closed file. 237 fp = open(test_support.TESTFN, 'wb') 238 try: 239 fp.close() 240 parser = expat.ParserCreate() 241 with self.assertRaises(ValueError): 242 parser.ParseFile(fp) 243 finally: 244 test_support.unlink(test_support.TESTFN) 245 246 def test_parse_again(self): 247 parser = expat.ParserCreate() 248 file = StringIO.StringIO(data) 249 parser.ParseFile(file) 250 # Issue 6676: ensure a meaningful exception is raised when attempting 251 # to parse more than one XML document per xmlparser instance, 252 # a limitation of the Expat library. 253 with self.assertRaises(expat.error) as cm: 254 parser.ParseFile(file) 255 self.assertEqual(expat.ErrorString(cm.exception.code), 256 expat.errors.XML_ERROR_FINISHED) 257 258class NamespaceSeparatorTest(unittest.TestCase): 259 def test_legal(self): 260 # Tests that make sure we get errors when the namespace_separator value 261 # is illegal, and that we don't for good values: 262 expat.ParserCreate() 263 expat.ParserCreate(namespace_separator=None) 264 expat.ParserCreate(namespace_separator=' ') 265 266 def test_illegal(self): 267 try: 268 expat.ParserCreate(namespace_separator=42) 269 self.fail() 270 except TypeError, e: 271 self.assertEqual(str(e), 272 'ParserCreate() argument 2 must be string or None, not int') 273 274 try: 275 expat.ParserCreate(namespace_separator='too long') 276 self.fail() 277 except ValueError, e: 278 self.assertEqual(str(e), 279 'namespace_separator must be at most one character, omitted, or None') 280 281 def test_zero_length(self): 282 # ParserCreate() needs to accept a namespace_separator of zero length 283 # to satisfy the requirements of RDF applications that are required 284 # to simply glue together the namespace URI and the localname. Though 285 # considered a wart of the RDF specifications, it needs to be supported. 286 # 287 # See XML-SIG mailing list thread starting with 288 # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html 289 # 290 expat.ParserCreate(namespace_separator='') # too short 291 292 293class InterningTest(unittest.TestCase): 294 def test(self): 295 # Test the interning machinery. 296 p = expat.ParserCreate() 297 L = [] 298 def collector(name, *args): 299 L.append(name) 300 p.StartElementHandler = collector 301 p.EndElementHandler = collector 302 p.Parse("<e> <e/> <e></e> </e>", 1) 303 tag = L[0] 304 self.assertEqual(len(L), 6) 305 for entry in L: 306 # L should have the same string repeated over and over. 307 self.assertTrue(tag is entry) 308 309 310class BufferTextTest(unittest.TestCase): 311 def setUp(self): 312 self.stuff = [] 313 self.parser = expat.ParserCreate() 314 self.parser.buffer_text = 1 315 self.parser.CharacterDataHandler = self.CharacterDataHandler 316 317 def check(self, expected, label): 318 self.assertEqual(self.stuff, expected, 319 "%s\nstuff = %r\nexpected = %r" 320 % (label, self.stuff, map(unicode, expected))) 321 322 def CharacterDataHandler(self, text): 323 self.stuff.append(text) 324 325 def StartElementHandler(self, name, attrs): 326 self.stuff.append("<%s>" % name) 327 bt = attrs.get("buffer-text") 328 if bt == "yes": 329 self.parser.buffer_text = 1 330 elif bt == "no": 331 self.parser.buffer_text = 0 332 333 def EndElementHandler(self, name): 334 self.stuff.append("</%s>" % name) 335 336 def CommentHandler(self, data): 337 self.stuff.append("<!--%s-->" % data) 338 339 def setHandlers(self, handlers=[]): 340 for name in handlers: 341 setattr(self.parser, name, getattr(self, name)) 342 343 def test_default_to_disabled(self): 344 parser = expat.ParserCreate() 345 self.assertFalse(parser.buffer_text) 346 347 def test_buffering_enabled(self): 348 # Make sure buffering is turned on 349 self.assertTrue(self.parser.buffer_text) 350 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) 351 self.assertEqual(self.stuff, ['123'], 352 "buffered text not properly collapsed") 353 354 def test1(self): 355 # XXX This test exposes more detail of Expat's text chunking than we 356 # XXX like, but it tests what we need to concisely. 357 self.setHandlers(["StartElementHandler"]) 358 self.parser.Parse("<a>1<b buffer-text='no'/>2\n3<c buffer-text='yes'/>4\n5</a>", 1) 359 self.assertEqual(self.stuff, 360 ["<a>", "1", "<b>", "2", "\n", "3", "<c>", "4\n5"], 361 "buffering control not reacting as expected") 362 363 def test2(self): 364 self.parser.Parse("<a>1<b/><2><c/> \n 3</a>", 1) 365 self.assertEqual(self.stuff, ["1<2> \n 3"], 366 "buffered text not properly collapsed") 367 368 def test3(self): 369 self.setHandlers(["StartElementHandler"]) 370 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) 371 self.assertEqual(self.stuff, ["<a>", "1", "<b>", "2", "<c>", "3"], 372 "buffered text not properly split") 373 374 def test4(self): 375 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 376 self.parser.CharacterDataHandler = None 377 self.parser.Parse("<a>1<b/>2<c/>3</a>", 1) 378 self.assertEqual(self.stuff, 379 ["<a>", "<b>", "</b>", "<c>", "</c>", "</a>"]) 380 381 def test5(self): 382 self.setHandlers(["StartElementHandler", "EndElementHandler"]) 383 self.parser.Parse("<a>1<b></b>2<c/>3</a>", 1) 384 self.assertEqual(self.stuff, 385 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", "</a>"]) 386 387 def test6(self): 388 self.setHandlers(["CommentHandler", "EndElementHandler", 389 "StartElementHandler"]) 390 self.parser.Parse("<a>1<b/>2<c></c>345</a> ", 1) 391 self.assertEqual(self.stuff, 392 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "345", "</a>"], 393 "buffered text not properly split") 394 395 def test7(self): 396 self.setHandlers(["CommentHandler", "EndElementHandler", 397 "StartElementHandler"]) 398 self.parser.Parse("<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", 1) 399 self.assertEqual(self.stuff, 400 ["<a>", "1", "<b>", "</b>", "2", "<c>", "</c>", "3", 401 "<!--abc-->", "4", "<!--def-->", "5", "</a>"], 402 "buffered text not properly split") 403 404 405# Test handling of exception from callback: 406class HandlerExceptionTest(unittest.TestCase): 407 def StartElementHandler(self, name, attrs): 408 raise RuntimeError(name) 409 410 def test(self): 411 parser = expat.ParserCreate() 412 parser.StartElementHandler = self.StartElementHandler 413 try: 414 parser.Parse("<a><b><c/></b></a>", 1) 415 self.fail() 416 except RuntimeError, e: 417 self.assertEqual(e.args[0], 'a', 418 "Expected RuntimeError for element 'a', but" + \ 419 " found %r" % e.args[0]) 420 421 422# Test Current* members: 423class PositionTest(unittest.TestCase): 424 def StartElementHandler(self, name, attrs): 425 self.check_pos('s') 426 427 def EndElementHandler(self, name): 428 self.check_pos('e') 429 430 def check_pos(self, event): 431 pos = (event, 432 self.parser.CurrentByteIndex, 433 self.parser.CurrentLineNumber, 434 self.parser.CurrentColumnNumber) 435 self.assertTrue(self.upto < len(self.expected_list), 436 'too many parser events') 437 expected = self.expected_list[self.upto] 438 self.assertEqual(pos, expected, 439 'Expected position %s, got position %s' %(pos, expected)) 440 self.upto += 1 441 442 def test(self): 443 self.parser = expat.ParserCreate() 444 self.parser.StartElementHandler = self.StartElementHandler 445 self.parser.EndElementHandler = self.EndElementHandler 446 self.upto = 0 447 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), 448 ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] 449 450 xml = '<a>\n <b>\n <c/>\n </b>\n</a>' 451 self.parser.Parse(xml, 1) 452 453 454class sf1296433Test(unittest.TestCase): 455 def test_parse_only_xml_data(self): 456 # http://python.org/sf/1296433 457 # 458 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * 1025) 459 # this one doesn't crash 460 #xml = "<?xml version='1.0'?><s>%s</s>" % ('a' * 10000) 461 462 class SpecificException(Exception): 463 pass 464 465 def handler(text): 466 raise SpecificException 467 468 parser = expat.ParserCreate() 469 parser.CharacterDataHandler = handler 470 471 self.assertRaises(Exception, parser.Parse, xml) 472 473class ChardataBufferTest(unittest.TestCase): 474 """ 475 test setting of chardata buffer size 476 """ 477 478 def test_1025_bytes(self): 479 self.assertEqual(self.small_buffer_test(1025), 2) 480 481 def test_1000_bytes(self): 482 self.assertEqual(self.small_buffer_test(1000), 1) 483 484 def test_wrong_size(self): 485 parser = expat.ParserCreate() 486 parser.buffer_text = 1 487 with self.assertRaises(ValueError): 488 parser.buffer_size = -1 489 with self.assertRaises(ValueError): 490 parser.buffer_size = 0 491 with self.assertRaises(TypeError): 492 parser.buffer_size = 512.0 493 with self.assertRaises(TypeError): 494 parser.buffer_size = sys.maxint+1 495 496 def test_unchanged_size(self): 497 xml1 = ("<?xml version='1.0' encoding='iso8859'?><s>%s" % ('a' * 512)) 498 xml2 = 'a'*512 + '</s>' 499 parser = expat.ParserCreate() 500 parser.CharacterDataHandler = self.counting_handler 501 parser.buffer_size = 512 502 parser.buffer_text = 1 503 504 # Feed 512 bytes of character data: the handler should be called 505 # once. 506 self.n = 0 507 parser.Parse(xml1) 508 self.assertEqual(self.n, 1) 509 510 # Reassign to buffer_size, but assign the same size. 511 parser.buffer_size = parser.buffer_size 512 self.assertEqual(self.n, 1) 513 514 # Try parsing rest of the document 515 parser.Parse(xml2) 516 self.assertEqual(self.n, 2) 517 518 519 def test_disabling_buffer(self): 520 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512) 521 xml2 = ('b' * 1024) 522 xml3 = "%s</a>" % ('c' * 1024) 523 parser = expat.ParserCreate() 524 parser.CharacterDataHandler = self.counting_handler 525 parser.buffer_text = 1 526 parser.buffer_size = 1024 527 self.assertEqual(parser.buffer_size, 1024) 528 529 # Parse one chunk of XML 530 self.n = 0 531 parser.Parse(xml1, 0) 532 self.assertEqual(parser.buffer_size, 1024) 533 self.assertEqual(self.n, 1) 534 535 # Turn off buffering and parse the next chunk. 536 parser.buffer_text = 0 537 self.assertFalse(parser.buffer_text) 538 self.assertEqual(parser.buffer_size, 1024) 539 for i in range(10): 540 parser.Parse(xml2, 0) 541 self.assertEqual(self.n, 11) 542 543 parser.buffer_text = 1 544 self.assertTrue(parser.buffer_text) 545 self.assertEqual(parser.buffer_size, 1024) 546 parser.Parse(xml3, 1) 547 self.assertEqual(self.n, 12) 548 549 550 551 def make_document(self, bytes): 552 return ("<?xml version='1.0'?><tag>" + bytes * 'a' + '</tag>') 553 554 def counting_handler(self, text): 555 self.n += 1 556 557 def small_buffer_test(self, buffer_len): 558 xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ('a' * buffer_len) 559 parser = expat.ParserCreate() 560 parser.CharacterDataHandler = self.counting_handler 561 parser.buffer_size = 1024 562 parser.buffer_text = 1 563 564 self.n = 0 565 parser.Parse(xml) 566 return self.n 567 568 def test_change_size_1(self): 569 xml1 = "<?xml version='1.0' encoding='iso8859'?><a><s>%s" % ('a' * 1024) 570 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) 571 parser = expat.ParserCreate() 572 parser.CharacterDataHandler = self.counting_handler 573 parser.buffer_text = 1 574 parser.buffer_size = 1024 575 self.assertEqual(parser.buffer_size, 1024) 576 577 self.n = 0 578 parser.Parse(xml1, 0) 579 parser.buffer_size *= 2 580 self.assertEqual(parser.buffer_size, 2048) 581 parser.Parse(xml2, 1) 582 self.assertEqual(self.n, 2) 583 584 def test_change_size_2(self): 585 xml1 = "<?xml version='1.0' encoding='iso8859'?><a>a<s>%s" % ('a' * 1023) 586 xml2 = "aaa</s><s>%s</s></a>" % ('a' * 1025) 587 parser = expat.ParserCreate() 588 parser.CharacterDataHandler = self.counting_handler 589 parser.buffer_text = 1 590 parser.buffer_size = 2048 591 self.assertEqual(parser.buffer_size, 2048) 592 593 self.n=0 594 parser.Parse(xml1, 0) 595 parser.buffer_size //= 2 596 self.assertEqual(parser.buffer_size, 1024) 597 parser.Parse(xml2, 1) 598 self.assertEqual(self.n, 4) 599 600class MalformedInputText(unittest.TestCase): 601 def test1(self): 602 xml = "\0\r\n" 603 parser = expat.ParserCreate() 604 try: 605 parser.Parse(xml, True) 606 self.fail() 607 except expat.ExpatError as e: 608 self.assertEqual(str(e), 'unclosed token: line 2, column 0') 609 610 def test2(self): 611 xml = "<?xml version\xc2\x85='1.0'?>\r\n" 612 parser = expat.ParserCreate() 613 err_pattern = r'XML declaration not well-formed: line 1, column \d+' 614 with self.assertRaisesRegexp(expat.ExpatError, err_pattern): 615 parser.Parse(xml, True) 616 617class ForeignDTDTests(unittest.TestCase): 618 """ 619 Tests for the UseForeignDTD method of expat parser objects. 620 """ 621 def test_use_foreign_dtd(self): 622 """ 623 If UseForeignDTD is passed True and a document without an external 624 entity reference is parsed, ExternalEntityRefHandler is first called 625 with None for the public and system ids. 626 """ 627 handler_call_args = [] 628 def resolve_entity(context, base, system_id, public_id): 629 handler_call_args.append((public_id, system_id)) 630 return 1 631 632 parser = expat.ParserCreate() 633 parser.UseForeignDTD(True) 634 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 635 parser.ExternalEntityRefHandler = resolve_entity 636 parser.Parse("<?xml version='1.0'?><element/>") 637 self.assertEqual(handler_call_args, [(None, None)]) 638 639 # test UseForeignDTD() is equal to UseForeignDTD(True) 640 handler_call_args[:] = [] 641 642 parser = expat.ParserCreate() 643 parser.UseForeignDTD() 644 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 645 parser.ExternalEntityRefHandler = resolve_entity 646 parser.Parse("<?xml version='1.0'?><element/>") 647 self.assertEqual(handler_call_args, [(None, None)]) 648 649 def test_ignore_use_foreign_dtd(self): 650 """ 651 If UseForeignDTD is passed True and a document with an external 652 entity reference is parsed, ExternalEntityRefHandler is called with 653 the public and system ids from the document. 654 """ 655 handler_call_args = [] 656 def resolve_entity(context, base, system_id, public_id): 657 handler_call_args.append((public_id, system_id)) 658 return 1 659 660 parser = expat.ParserCreate() 661 parser.UseForeignDTD(True) 662 parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) 663 parser.ExternalEntityRefHandler = resolve_entity 664 parser.Parse( 665 "<?xml version='1.0'?><!DOCTYPE foo PUBLIC 'bar' 'baz'><element/>") 666 self.assertEqual(handler_call_args, [("bar", "baz")]) 667 668 669def test_main(): 670 run_unittest(SetAttributeTest, 671 ParseTest, 672 NamespaceSeparatorTest, 673 InterningTest, 674 BufferTextTest, 675 HandlerExceptionTest, 676 PositionTest, 677 sf1296433Test, 678 ChardataBufferTest, 679 MalformedInputText, 680 ForeignDTDTests) 681 682if __name__ == "__main__": 683 test_main() 684