1# regression test for SAX 2.0 -*- coding: utf-8 -*- 2# $Id$ 3 4from xml.sax import make_parser, ContentHandler, \ 5 SAXException, SAXReaderNotAvailable, SAXParseException, \ 6 saxutils 7try: 8 make_parser() 9except SAXReaderNotAvailable: 10 # don't try to test this module if we cannot create a parser 11 raise ImportError("no XML parsers available") 12from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ 13 XMLFilterBase, prepare_input_source 14from xml.sax.expatreader import create_parser 15from xml.sax.handler import feature_namespaces 16from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl 17from cStringIO import StringIO 18import io 19import gc 20import os.path 21import shutil 22import test.test_support as support 23from test.test_support import findfile, run_unittest, TESTFN 24import unittest 25 26TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") 27TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") 28 29supports_unicode_filenames = True 30if not os.path.supports_unicode_filenames: 31 try: 32 support.TESTFN_UNICODE.encode(support.TESTFN_ENCODING) 33 except (AttributeError, UnicodeError, TypeError): 34 # Either the file system encoding is None, or the file name 35 # cannot be encoded in the file system encoding. 36 supports_unicode_filenames = False 37requires_unicode_filenames = unittest.skipUnless( 38 supports_unicode_filenames, 39 'Requires unicode filenames support') 40 41ns_uri = "http://www.python.org/xml-ns/saxtest/" 42 43class XmlTestBase(unittest.TestCase): 44 def verify_empty_attrs(self, attrs): 45 self.assertRaises(KeyError, attrs.getValue, "attr") 46 self.assertRaises(KeyError, attrs.getValueByQName, "attr") 47 self.assertRaises(KeyError, attrs.getNameByQName, "attr") 48 self.assertRaises(KeyError, attrs.getQNameByName, "attr") 49 self.assertRaises(KeyError, attrs.__getitem__, "attr") 50 self.assertEqual(attrs.getLength(), 0) 51 self.assertEqual(attrs.getNames(), []) 52 self.assertEqual(attrs.getQNames(), []) 53 self.assertEqual(len(attrs), 0) 54 self.assertFalse(attrs.has_key("attr")) 55 self.assertEqual(attrs.keys(), []) 56 self.assertEqual(attrs.get("attrs"), None) 57 self.assertEqual(attrs.get("attrs", 25), 25) 58 self.assertEqual(attrs.items(), []) 59 self.assertEqual(attrs.values(), []) 60 61 def verify_empty_nsattrs(self, attrs): 62 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr")) 63 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr") 64 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr") 65 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr")) 66 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr")) 67 self.assertEqual(attrs.getLength(), 0) 68 self.assertEqual(attrs.getNames(), []) 69 self.assertEqual(attrs.getQNames(), []) 70 self.assertEqual(len(attrs), 0) 71 self.assertFalse(attrs.has_key((ns_uri, "attr"))) 72 self.assertEqual(attrs.keys(), []) 73 self.assertEqual(attrs.get((ns_uri, "attr")), None) 74 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25) 75 self.assertEqual(attrs.items(), []) 76 self.assertEqual(attrs.values(), []) 77 78 def verify_attrs_wattr(self, attrs): 79 self.assertEqual(attrs.getLength(), 1) 80 self.assertEqual(attrs.getNames(), ["attr"]) 81 self.assertEqual(attrs.getQNames(), ["attr"]) 82 self.assertEqual(len(attrs), 1) 83 self.assertTrue(attrs.has_key("attr")) 84 self.assertEqual(attrs.keys(), ["attr"]) 85 self.assertEqual(attrs.get("attr"), "val") 86 self.assertEqual(attrs.get("attr", 25), "val") 87 self.assertEqual(attrs.items(), [("attr", "val")]) 88 self.assertEqual(attrs.values(), ["val"]) 89 self.assertEqual(attrs.getValue("attr"), "val") 90 self.assertEqual(attrs.getValueByQName("attr"), "val") 91 self.assertEqual(attrs.getNameByQName("attr"), "attr") 92 self.assertEqual(attrs["attr"], "val") 93 self.assertEqual(attrs.getQNameByName("attr"), "attr") 94 95 96def xml_unicode(doc, encoding=None): 97 if encoding is None: 98 return doc 99 return u'<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) 100 101def xml_bytes(doc, encoding, decl_encoding=Ellipsis): 102 if decl_encoding is Ellipsis: 103 decl_encoding = encoding 104 return xml_unicode(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') 105 106def make_xml_file(doc, encoding, decl_encoding=Ellipsis): 107 if decl_encoding is Ellipsis: 108 decl_encoding = encoding 109 with io.open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: 110 f.write(xml_unicode(doc, decl_encoding)) 111 112 113class ParseTest(unittest.TestCase): 114 data = support.u(r'<money value="$\xa3\u20ac\U0001017b">' 115 r'$\xa3\u20ac\U0001017b</money>') 116 117 def tearDown(self): 118 support.unlink(TESTFN) 119 120 def check_parse(self, f): 121 from xml.sax import parse 122 result = StringIO() 123 parse(f, XMLGenerator(result, 'utf-8')) 124 self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) 125 126 def test_parse_bytes(self): 127 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 128 # UTF-16 is autodetected 129 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 130 for encoding in encodings: 131 self.check_parse(io.BytesIO(xml_bytes(self.data, encoding))) 132 make_xml_file(self.data, encoding) 133 self.check_parse(TESTFN) 134 with io.open(TESTFN, 'rb') as f: 135 self.check_parse(f) 136 self.check_parse(io.BytesIO(xml_bytes(self.data, encoding, None))) 137 make_xml_file(self.data, encoding, None) 138 self.check_parse(TESTFN) 139 with io.open(TESTFN, 'rb') as f: 140 self.check_parse(f) 141 # accept UTF-8 with BOM 142 self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) 143 make_xml_file(self.data, 'utf-8-sig', 'utf-8') 144 self.check_parse(TESTFN) 145 with io.open(TESTFN, 'rb') as f: 146 self.check_parse(f) 147 self.check_parse(io.BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) 148 make_xml_file(self.data, 'utf-8-sig', None) 149 self.check_parse(TESTFN) 150 with io.open(TESTFN, 'rb') as f: 151 self.check_parse(f) 152 # accept data with declared encoding 153 self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1'))) 154 make_xml_file(self.data, 'iso-8859-1') 155 self.check_parse(TESTFN) 156 with io.open(TESTFN, 'rb') as f: 157 self.check_parse(f) 158 # fail on non-UTF-8 incompatible data without declared encoding 159 with self.assertRaises(SAXException): 160 self.check_parse(io.BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) 161 make_xml_file(self.data, 'iso-8859-1', None) 162 with self.assertRaises(SAXException): 163 self.check_parse(TESTFN) 164 with io.open(TESTFN, 'rb') as f: 165 with self.assertRaises(SAXException): 166 self.check_parse(f) 167 168 def test_parse_InputSource(self): 169 # accept data without declared but with explicitly specified encoding 170 make_xml_file(self.data, 'iso-8859-1', None) 171 with io.open(TESTFN, 'rb') as f: 172 input = InputSource() 173 input.setByteStream(f) 174 input.setEncoding('iso-8859-1') 175 self.check_parse(input) 176 177 def test_parse_close_source(self): 178 builtin_open = open 179 non_local = {'fileobj': None} 180 181 def mock_open(*args): 182 fileobj = builtin_open(*args) 183 non_local['fileobj'] = fileobj 184 return fileobj 185 186 with support.swap_attr(saxutils, 'open', mock_open): 187 make_xml_file(self.data, 'iso-8859-1', None) 188 with self.assertRaises(SAXException): 189 self.check_parse(TESTFN) 190 self.assertTrue(non_local['fileobj'].closed) 191 192 def check_parseString(self, s): 193 from xml.sax import parseString 194 result = StringIO() 195 parseString(s, XMLGenerator(result, 'utf-8')) 196 self.assertEqual(result.getvalue(), xml_bytes(self.data, 'utf-8')) 197 198 def test_parseString_bytes(self): 199 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 200 # UTF-16 is autodetected 201 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 202 for encoding in encodings: 203 self.check_parseString(xml_bytes(self.data, encoding)) 204 self.check_parseString(xml_bytes(self.data, encoding, None)) 205 # accept UTF-8 with BOM 206 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) 207 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) 208 # accept data with declared encoding 209 self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) 210 # fail on non-UTF-8 incompatible data without declared encoding 211 with self.assertRaises(SAXException): 212 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) 213 214 215class MakeParserTest(unittest.TestCase): 216 def test_make_parser2(self): 217 # Creating parsers several times in a row should succeed. 218 # Testing this because there have been failures of this kind 219 # before. 220 from xml.sax import make_parser 221 p = make_parser() 222 from xml.sax import make_parser 223 p = make_parser() 224 from xml.sax import make_parser 225 p = make_parser() 226 from xml.sax import make_parser 227 p = make_parser() 228 from xml.sax import make_parser 229 p = make_parser() 230 from xml.sax import make_parser 231 p = make_parser() 232 233 234# =========================================================================== 235# 236# saxutils tests 237# 238# =========================================================================== 239 240class SaxutilsTest(unittest.TestCase): 241 # ===== escape 242 def test_escape_basic(self): 243 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co") 244 245 def test_escape_all(self): 246 self.assertEqual(escape("<Donald Duck & Co>"), 247 "<Donald Duck & Co>") 248 249 def test_escape_extra(self): 250 self.assertEqual(escape("Hei på deg", {"å" : "å"}), 251 "Hei på deg") 252 253 # ===== unescape 254 def test_unescape_basic(self): 255 self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co") 256 257 def test_unescape_all(self): 258 self.assertEqual(unescape("<Donald Duck & Co>"), 259 "<Donald Duck & Co>") 260 261 def test_unescape_extra(self): 262 self.assertEqual(unescape("Hei på deg", {"å" : "å"}), 263 "Hei på deg") 264 265 def test_unescape_amp_extra(self): 266 self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;") 267 268 # ===== quoteattr 269 def test_quoteattr_basic(self): 270 self.assertEqual(quoteattr("Donald Duck & Co"), 271 '"Donald Duck & Co"') 272 273 def test_single_quoteattr(self): 274 self.assertEqual(quoteattr('Includes "double" quotes'), 275 '\'Includes "double" quotes\'') 276 277 def test_double_quoteattr(self): 278 self.assertEqual(quoteattr("Includes 'single' quotes"), 279 "\"Includes 'single' quotes\"") 280 281 def test_single_double_quoteattr(self): 282 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), 283 "\"Includes 'single' and "double" quotes\"") 284 285 # ===== make_parser 286 def test_make_parser(self): 287 # Creating a parser should succeed - it should fall back 288 # to the expatreader 289 p = make_parser(['xml.parsers.no_such_parser']) 290 291 292class PrepareInputSourceTest(unittest.TestCase): 293 294 def setUp(self): 295 self.file = support.TESTFN 296 with open(self.file, "w") as tmp: 297 tmp.write("This was read from a file.") 298 299 def tearDown(self): 300 support.unlink(self.file) 301 302 def make_byte_stream(self): 303 return io.BytesIO(b"This is a byte stream.") 304 305 def checkContent(self, stream, content): 306 self.assertIsNotNone(stream) 307 self.assertEqual(stream.read(), content) 308 stream.close() 309 310 311 def test_byte_stream(self): 312 # If the source is an InputSource that does not have a character 313 # stream but does have a byte stream, use the byte stream. 314 src = InputSource(self.file) 315 src.setByteStream(self.make_byte_stream()) 316 prep = prepare_input_source(src) 317 self.assertIsNone(prep.getCharacterStream()) 318 self.checkContent(prep.getByteStream(), 319 b"This is a byte stream.") 320 321 def test_system_id(self): 322 # If the source is an InputSource that has neither a character 323 # stream nor a byte stream, open the system ID. 324 src = InputSource(self.file) 325 prep = prepare_input_source(src) 326 self.assertIsNone(prep.getCharacterStream()) 327 self.checkContent(prep.getByteStream(), 328 b"This was read from a file.") 329 330 def test_string(self): 331 # If the source is a string, use it as a system ID and open it. 332 prep = prepare_input_source(self.file) 333 self.assertIsNone(prep.getCharacterStream()) 334 self.checkContent(prep.getByteStream(), 335 b"This was read from a file.") 336 337 def test_binary_file(self): 338 # If the source is a binary file-like object, use it as a byte 339 # stream. 340 prep = prepare_input_source(self.make_byte_stream()) 341 self.assertIsNone(prep.getCharacterStream()) 342 self.checkContent(prep.getByteStream(), 343 b"This is a byte stream.") 344 345 346# ===== XMLGenerator 347 348start = '<?xml version="1.0" encoding="iso-8859-1"?>\n' 349 350class XmlgenTest: 351 def test_xmlgen_basic(self): 352 result = self.ioclass() 353 gen = XMLGenerator(result) 354 gen.startDocument() 355 gen.startElement("doc", {}) 356 gen.endElement("doc") 357 gen.endDocument() 358 359 self.assertEqual(result.getvalue(), start + "<doc></doc>") 360 361 def test_xmlgen_content(self): 362 result = self.ioclass() 363 gen = XMLGenerator(result) 364 365 gen.startDocument() 366 gen.startElement("doc", {}) 367 gen.characters("huhei") 368 gen.endElement("doc") 369 gen.endDocument() 370 371 self.assertEqual(result.getvalue(), start + "<doc>huhei</doc>") 372 373 def test_xmlgen_pi(self): 374 result = self.ioclass() 375 gen = XMLGenerator(result) 376 377 gen.startDocument() 378 gen.processingInstruction("test", "data") 379 gen.startElement("doc", {}) 380 gen.endElement("doc") 381 gen.endDocument() 382 383 self.assertEqual(result.getvalue(), start + "<?test data?><doc></doc>") 384 385 def test_xmlgen_content_escape(self): 386 result = self.ioclass() 387 gen = XMLGenerator(result) 388 389 gen.startDocument() 390 gen.startElement("doc", {}) 391 gen.characters("<huhei&") 392 gen.endElement("doc") 393 gen.endDocument() 394 395 self.assertEqual(result.getvalue(), 396 start + "<doc><huhei&</doc>") 397 398 def test_xmlgen_attr_escape(self): 399 result = self.ioclass() 400 gen = XMLGenerator(result) 401 402 gen.startDocument() 403 gen.startElement("doc", {"a": '"'}) 404 gen.startElement("e", {"a": "'"}) 405 gen.endElement("e") 406 gen.startElement("e", {"a": "'\""}) 407 gen.endElement("e") 408 gen.startElement("e", {"a": "\n\r\t"}) 409 gen.endElement("e") 410 gen.endElement("doc") 411 gen.endDocument() 412 413 self.assertEqual(result.getvalue(), start + 414 ("<doc a='\"'><e a=\"'\"></e>" 415 "<e a=\"'"\"></e>" 416 "<e a=\" 	\"></e></doc>")) 417 418 def test_xmlgen_encoding(self): 419 encodings = ('iso-8859-15', 'utf-8', 420 'utf-16be', 'utf-16le', 421 'utf-32be', 'utf-32le') 422 for encoding in encodings: 423 result = self.ioclass() 424 gen = XMLGenerator(result, encoding=encoding) 425 426 gen.startDocument() 427 gen.startElement("doc", {"a": u'\u20ac'}) 428 gen.characters(u"\u20ac") 429 gen.endElement("doc") 430 gen.endDocument() 431 432 self.assertEqual(result.getvalue(), ( 433 u'<?xml version="1.0" encoding="%s"?>\n' 434 u'<doc a="\u20ac">\u20ac</doc>' % encoding 435 ).encode(encoding, 'xmlcharrefreplace')) 436 437 def test_xmlgen_unencodable(self): 438 result = self.ioclass() 439 gen = XMLGenerator(result, encoding='ascii') 440 441 gen.startDocument() 442 gen.startElement("doc", {"a": u'\u20ac'}) 443 gen.characters(u"\u20ac") 444 gen.endElement("doc") 445 gen.endDocument() 446 447 self.assertEqual(result.getvalue(), 448 '<?xml version="1.0" encoding="ascii"?>\n' 449 '<doc a="€">€</doc>') 450 451 def test_xmlgen_ignorable(self): 452 result = self.ioclass() 453 gen = XMLGenerator(result) 454 455 gen.startDocument() 456 gen.startElement("doc", {}) 457 gen.ignorableWhitespace(" ") 458 gen.endElement("doc") 459 gen.endDocument() 460 461 self.assertEqual(result.getvalue(), start + "<doc> </doc>") 462 463 def test_xmlgen_encoding_bytes(self): 464 encodings = ('iso-8859-15', 'utf-8', 465 'utf-16be', 'utf-16le', 466 'utf-32be', 'utf-32le') 467 for encoding in encodings: 468 result = self.ioclass() 469 gen = XMLGenerator(result, encoding=encoding) 470 471 gen.startDocument() 472 gen.startElement("doc", {"a": u'\u20ac'}) 473 gen.characters(u"\u20ac".encode(encoding)) 474 gen.ignorableWhitespace(" ".encode(encoding)) 475 gen.endElement("doc") 476 gen.endDocument() 477 478 self.assertEqual(result.getvalue(), ( 479 u'<?xml version="1.0" encoding="%s"?>\n' 480 u'<doc a="\u20ac">\u20ac </doc>' % encoding 481 ).encode(encoding, 'xmlcharrefreplace')) 482 483 def test_xmlgen_ns(self): 484 result = self.ioclass() 485 gen = XMLGenerator(result) 486 487 gen.startDocument() 488 gen.startPrefixMapping("ns1", ns_uri) 489 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) 490 # add an unqualified name 491 gen.startElementNS((None, "udoc"), None, {}) 492 gen.endElementNS((None, "udoc"), None) 493 gen.endElementNS((ns_uri, "doc"), "ns1:doc") 494 gen.endPrefixMapping("ns1") 495 gen.endDocument() 496 497 self.assertEqual(result.getvalue(), start + \ 498 ('<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' % 499 ns_uri)) 500 501 def test_1463026_1(self): 502 result = self.ioclass() 503 gen = XMLGenerator(result) 504 505 gen.startDocument() 506 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) 507 gen.endElementNS((None, 'a'), 'a') 508 gen.endDocument() 509 510 self.assertEqual(result.getvalue(), start+'<a b="c"></a>') 511 512 def test_1463026_2(self): 513 result = self.ioclass() 514 gen = XMLGenerator(result) 515 516 gen.startDocument() 517 gen.startPrefixMapping(None, 'qux') 518 gen.startElementNS(('qux', 'a'), 'a', {}) 519 gen.endElementNS(('qux', 'a'), 'a') 520 gen.endPrefixMapping(None) 521 gen.endDocument() 522 523 self.assertEqual(result.getvalue(), start+'<a xmlns="qux"></a>') 524 525 def test_1463026_3(self): 526 result = self.ioclass() 527 gen = XMLGenerator(result) 528 529 gen.startDocument() 530 gen.startPrefixMapping('my', 'qux') 531 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) 532 gen.endElementNS(('qux', 'a'), 'a') 533 gen.endPrefixMapping('my') 534 gen.endDocument() 535 536 self.assertEqual(result.getvalue(), 537 start+'<my:a xmlns:my="qux" b="c"></my:a>') 538 539 def test_5027_1(self): 540 # The xml prefix (as in xml:lang below) is reserved and bound by 541 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 542 # a bug whereby a KeyError is raised because this namespace is missing 543 # from a dictionary. 544 # 545 # This test demonstrates the bug by parsing a document. 546 test_xml = StringIO( 547 '<?xml version="1.0"?>' 548 '<a:g1 xmlns:a="http://example.com/ns">' 549 '<a:g2 xml:lang="en">Hello</a:g2>' 550 '</a:g1>') 551 552 parser = make_parser() 553 parser.setFeature(feature_namespaces, True) 554 result = self.ioclass() 555 gen = XMLGenerator(result) 556 parser.setContentHandler(gen) 557 parser.parse(test_xml) 558 559 self.assertEqual(result.getvalue(), 560 start + ( 561 '<a:g1 xmlns:a="http://example.com/ns">' 562 '<a:g2 xml:lang="en">Hello</a:g2>' 563 '</a:g1>')) 564 565 def test_5027_2(self): 566 # The xml prefix (as in xml:lang below) is reserved and bound by 567 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 568 # a bug whereby a KeyError is raised because this namespace is missing 569 # from a dictionary. 570 # 571 # This test demonstrates the bug by direct manipulation of the 572 # XMLGenerator. 573 result = self.ioclass() 574 gen = XMLGenerator(result) 575 576 gen.startDocument() 577 gen.startPrefixMapping('a', 'http://example.com/ns') 578 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {}) 579 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'} 580 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr) 581 gen.characters('Hello') 582 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2') 583 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1') 584 gen.endPrefixMapping('a') 585 gen.endDocument() 586 587 self.assertEqual(result.getvalue(), 588 start + ( 589 '<a:g1 xmlns:a="http://example.com/ns">' 590 '<a:g2 xml:lang="en">Hello</a:g2>' 591 '</a:g1>')) 592 593 def test_no_close_file(self): 594 result = self.ioclass() 595 def func(out): 596 gen = XMLGenerator(out) 597 gen.startDocument() 598 gen.startElement("doc", {}) 599 func(result) 600 self.assertFalse(result.closed) 601 602 def test_xmlgen_fragment(self): 603 result = self.ioclass() 604 gen = XMLGenerator(result) 605 606 # Don't call gen.startDocument() 607 gen.startElement("foo", {"a": "1.0"}) 608 gen.characters("Hello") 609 gen.endElement("foo") 610 gen.startElement("bar", {"b": "2.0"}) 611 gen.endElement("bar") 612 # Don't call gen.endDocument() 613 614 self.assertEqual(result.getvalue(), 615 '<foo a="1.0">Hello</foo><bar b="2.0"></bar>') 616 617class StringXmlgenTest(XmlgenTest, unittest.TestCase): 618 ioclass = StringIO 619 620class BytesIOXmlgenTest(XmlgenTest, unittest.TestCase): 621 ioclass = io.BytesIO 622 623class WriterXmlgenTest(XmlgenTest, unittest.TestCase): 624 class ioclass(list): 625 write = list.append 626 closed = False 627 628 def getvalue(self): 629 return b''.join(self) 630 631 632class XMLFilterBaseTest(unittest.TestCase): 633 def test_filter_basic(self): 634 result = StringIO() 635 gen = XMLGenerator(result) 636 filter = XMLFilterBase() 637 filter.setContentHandler(gen) 638 639 filter.startDocument() 640 filter.startElement("doc", {}) 641 filter.characters("content") 642 filter.ignorableWhitespace(" ") 643 filter.endElement("doc") 644 filter.endDocument() 645 646 self.assertEqual(result.getvalue(), start + "<doc>content </doc>") 647 648# =========================================================================== 649# 650# expatreader tests 651# 652# =========================================================================== 653 654xml_test_out = open(TEST_XMLFILE_OUT).read() 655 656class ExpatReaderTest(XmlTestBase): 657 658 # ===== XMLReader support 659 660 def test_expat_binary_file(self): 661 parser = create_parser() 662 result = StringIO() 663 xmlgen = XMLGenerator(result) 664 665 parser.setContentHandler(xmlgen) 666 parser.parse(open(TEST_XMLFILE)) 667 668 self.assertEqual(result.getvalue(), xml_test_out) 669 670 @requires_unicode_filenames 671 def test_expat_file_unicode(self): 672 fname = support.TESTFN_UNICODE 673 shutil.copyfile(TEST_XMLFILE, fname) 674 self.addCleanup(support.unlink, fname) 675 676 parser = create_parser() 677 result = StringIO() 678 xmlgen = XMLGenerator(result) 679 680 parser.setContentHandler(xmlgen) 681 parser.parse(open(fname)) 682 683 self.assertEqual(result.getvalue(), xml_test_out) 684 685 # ===== DTDHandler support 686 687 class TestDTDHandler: 688 689 def __init__(self): 690 self._notations = [] 691 self._entities = [] 692 693 def notationDecl(self, name, publicId, systemId): 694 self._notations.append((name, publicId, systemId)) 695 696 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 697 self._entities.append((name, publicId, systemId, ndata)) 698 699 def test_expat_dtdhandler(self): 700 parser = create_parser() 701 handler = self.TestDTDHandler() 702 parser.setDTDHandler(handler) 703 704 parser.feed('<!DOCTYPE doc [\n') 705 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n') 706 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n') 707 parser.feed(']>\n') 708 parser.feed('<doc></doc>') 709 parser.close() 710 711 self.assertEqual(handler._notations, 712 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) 713 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) 714 715 # ===== EntityResolver support 716 717 class TestEntityResolver: 718 719 def resolveEntity(self, publicId, systemId): 720 inpsrc = InputSource() 721 inpsrc.setByteStream(StringIO("<entity/>")) 722 return inpsrc 723 724 def test_expat_entityresolver(self): 725 parser = create_parser() 726 parser.setEntityResolver(self.TestEntityResolver()) 727 result = StringIO() 728 parser.setContentHandler(XMLGenerator(result)) 729 730 parser.feed('<!DOCTYPE doc [\n') 731 parser.feed(' <!ENTITY test SYSTEM "whatever">\n') 732 parser.feed(']>\n') 733 parser.feed('<doc>&test;</doc>') 734 parser.close() 735 736 self.assertEqual(result.getvalue(), start + 737 "<doc><entity></entity></doc>") 738 739 # ===== Attributes support 740 741 class AttrGatherer(ContentHandler): 742 743 def startElement(self, name, attrs): 744 self._attrs = attrs 745 746 def startElementNS(self, name, qname, attrs): 747 self._attrs = attrs 748 749 def test_expat_attrs_empty(self): 750 parser = create_parser() 751 gather = self.AttrGatherer() 752 parser.setContentHandler(gather) 753 754 parser.feed("<doc/>") 755 parser.close() 756 757 self.verify_empty_attrs(gather._attrs) 758 759 def test_expat_attrs_wattr(self): 760 parser = create_parser() 761 gather = self.AttrGatherer() 762 parser.setContentHandler(gather) 763 764 parser.feed("<doc attr='val'/>") 765 parser.close() 766 767 self.verify_attrs_wattr(gather._attrs) 768 769 def test_expat_nsattrs_empty(self): 770 parser = create_parser(1) 771 gather = self.AttrGatherer() 772 parser.setContentHandler(gather) 773 774 parser.feed("<doc/>") 775 parser.close() 776 777 self.verify_empty_nsattrs(gather._attrs) 778 779 def test_expat_nsattrs_wattr(self): 780 parser = create_parser(1) 781 gather = self.AttrGatherer() 782 parser.setContentHandler(gather) 783 784 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri) 785 parser.close() 786 787 attrs = gather._attrs 788 789 self.assertEqual(attrs.getLength(), 1) 790 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 791 self.assertTrue((attrs.getQNames() == [] or 792 attrs.getQNames() == ["ns:attr"])) 793 self.assertEqual(len(attrs), 1) 794 self.assertTrue(attrs.has_key((ns_uri, "attr"))) 795 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 796 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 797 self.assertEqual(attrs.items(), [((ns_uri, "attr"), "val")]) 798 self.assertEqual(attrs.values(), ["val"]) 799 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 800 self.assertEqual(attrs[(ns_uri, "attr")], "val") 801 802 # ===== InputSource support 803 804 def test_expat_inpsource_filename(self): 805 parser = create_parser() 806 result = StringIO() 807 xmlgen = XMLGenerator(result) 808 809 parser.setContentHandler(xmlgen) 810 parser.parse(TEST_XMLFILE) 811 812 self.assertEqual(result.getvalue(), xml_test_out) 813 814 def test_expat_inpsource_sysid(self): 815 parser = create_parser() 816 result = StringIO() 817 xmlgen = XMLGenerator(result) 818 819 parser.setContentHandler(xmlgen) 820 parser.parse(InputSource(TEST_XMLFILE)) 821 822 self.assertEqual(result.getvalue(), xml_test_out) 823 824 @requires_unicode_filenames 825 def test_expat_inpsource_sysid_unicode(self): 826 fname = support.TESTFN_UNICODE 827 shutil.copyfile(TEST_XMLFILE, fname) 828 self.addCleanup(support.unlink, fname) 829 830 parser = create_parser() 831 result = StringIO() 832 xmlgen = XMLGenerator(result) 833 834 parser.setContentHandler(xmlgen) 835 parser.parse(InputSource(fname)) 836 837 self.assertEqual(result.getvalue(), xml_test_out) 838 839 def test_expat_inpsource_byte_stream(self): 840 parser = create_parser() 841 result = StringIO() 842 xmlgen = XMLGenerator(result) 843 844 parser.setContentHandler(xmlgen) 845 inpsrc = InputSource() 846 inpsrc.setByteStream(open(TEST_XMLFILE)) 847 parser.parse(inpsrc) 848 849 self.assertEqual(result.getvalue(), xml_test_out) 850 851 # ===== IncrementalParser support 852 853 def test_expat_incremental(self): 854 result = StringIO() 855 xmlgen = XMLGenerator(result) 856 parser = create_parser() 857 parser.setContentHandler(xmlgen) 858 859 parser.feed("<doc>") 860 parser.feed("</doc>") 861 parser.close() 862 863 self.assertEqual(result.getvalue(), start + "<doc></doc>") 864 865 def test_expat_incremental_reset(self): 866 result = StringIO() 867 xmlgen = XMLGenerator(result) 868 parser = create_parser() 869 parser.setContentHandler(xmlgen) 870 871 parser.feed("<doc>") 872 parser.feed("text") 873 874 result = StringIO() 875 xmlgen = XMLGenerator(result) 876 parser.setContentHandler(xmlgen) 877 parser.reset() 878 879 parser.feed("<doc>") 880 parser.feed("text") 881 parser.feed("</doc>") 882 parser.close() 883 884 self.assertEqual(result.getvalue(), start + "<doc>text</doc>") 885 886 # ===== Locator support 887 888 def test_expat_locator_noinfo(self): 889 result = StringIO() 890 xmlgen = XMLGenerator(result) 891 parser = create_parser() 892 parser.setContentHandler(xmlgen) 893 894 parser.feed("<doc>") 895 parser.feed("</doc>") 896 parser.close() 897 898 self.assertEqual(parser.getSystemId(), None) 899 self.assertEqual(parser.getPublicId(), None) 900 self.assertEqual(parser.getLineNumber(), 1) 901 902 def test_expat_locator_withinfo(self): 903 result = StringIO() 904 xmlgen = XMLGenerator(result) 905 parser = create_parser() 906 parser.setContentHandler(xmlgen) 907 parser.parse(TEST_XMLFILE) 908 909 self.assertEqual(parser.getSystemId(), TEST_XMLFILE) 910 self.assertEqual(parser.getPublicId(), None) 911 912 @requires_unicode_filenames 913 def test_expat_locator_withinfo_unicode(self): 914 fname = support.TESTFN_UNICODE 915 shutil.copyfile(TEST_XMLFILE, fname) 916 self.addCleanup(support.unlink, fname) 917 918 result = StringIO() 919 xmlgen = XMLGenerator(result) 920 parser = create_parser() 921 parser.setContentHandler(xmlgen) 922 parser.parse(fname) 923 924 self.assertEqual(parser.getSystemId(), fname) 925 self.assertEqual(parser.getPublicId(), None) 926 927 928# =========================================================================== 929# 930# error reporting 931# 932# =========================================================================== 933 934class ErrorReportingTest(unittest.TestCase): 935 def test_expat_inpsource_location(self): 936 parser = create_parser() 937 parser.setContentHandler(ContentHandler()) # do nothing 938 source = InputSource() 939 source.setByteStream(StringIO("<foo bar foobar>")) #ill-formed 940 name = "a file name" 941 source.setSystemId(name) 942 try: 943 parser.parse(source) 944 self.fail() 945 except SAXException, e: 946 self.assertEqual(e.getSystemId(), name) 947 948 def test_expat_incomplete(self): 949 parser = create_parser() 950 parser.setContentHandler(ContentHandler()) # do nothing 951 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>")) 952 self.assertEqual(parser.getColumnNumber(), 5) 953 self.assertEqual(parser.getLineNumber(), 1) 954 955 def test_sax_parse_exception_str(self): 956 # pass various values from a locator to the SAXParseException to 957 # make sure that the __str__() doesn't fall apart when None is 958 # passed instead of an integer line and column number 959 # 960 # use "normal" values for the locator: 961 str(SAXParseException("message", None, 962 self.DummyLocator(1, 1))) 963 # use None for the line number: 964 str(SAXParseException("message", None, 965 self.DummyLocator(None, 1))) 966 # use None for the column number: 967 str(SAXParseException("message", None, 968 self.DummyLocator(1, None))) 969 # use None for both: 970 str(SAXParseException("message", None, 971 self.DummyLocator(None, None))) 972 973 class DummyLocator: 974 def __init__(self, lineno, colno): 975 self._lineno = lineno 976 self._colno = colno 977 978 def getPublicId(self): 979 return "pubid" 980 981 def getSystemId(self): 982 return "sysid" 983 984 def getLineNumber(self): 985 return self._lineno 986 987 def getColumnNumber(self): 988 return self._colno 989 990# =========================================================================== 991# 992# xmlreader tests 993# 994# =========================================================================== 995 996class XmlReaderTest(XmlTestBase): 997 998 # ===== AttributesImpl 999 def test_attrs_empty(self): 1000 self.verify_empty_attrs(AttributesImpl({})) 1001 1002 def test_attrs_wattr(self): 1003 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"})) 1004 1005 def test_nsattrs_empty(self): 1006 self.verify_empty_nsattrs(AttributesNSImpl({}, {})) 1007 1008 def test_nsattrs_wattr(self): 1009 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"}, 1010 {(ns_uri, "attr") : "ns:attr"}) 1011 1012 self.assertEqual(attrs.getLength(), 1) 1013 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 1014 self.assertEqual(attrs.getQNames(), ["ns:attr"]) 1015 self.assertEqual(len(attrs), 1) 1016 self.assertTrue(attrs.has_key((ns_uri, "attr"))) 1017 self.assertEqual(attrs.keys(), [(ns_uri, "attr")]) 1018 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 1019 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 1020 self.assertEqual(attrs.items(), [((ns_uri, "attr"), "val")]) 1021 self.assertEqual(attrs.values(), ["val"]) 1022 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 1023 self.assertEqual(attrs.getValueByQName("ns:attr"), "val") 1024 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr")) 1025 self.assertEqual(attrs[(ns_uri, "attr")], "val") 1026 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") 1027 1028 1029 # During the development of Python 2.5, an attempt to move the "xml" 1030 # package implementation to a new package ("xmlcore") proved painful. 1031 # The goal of this change was to allow applications to be able to 1032 # obtain and rely on behavior in the standard library implementation 1033 # of the XML support without needing to be concerned about the 1034 # availability of the PyXML implementation. 1035 # 1036 # While the existing import hackery in Lib/xml/__init__.py can cause 1037 # PyXML's _xmlpus package to supplant the "xml" package, that only 1038 # works because either implementation uses the "xml" package name for 1039 # imports. 1040 # 1041 # The move resulted in a number of problems related to the fact that 1042 # the import machinery's "package context" is based on the name that's 1043 # being imported rather than the __name__ of the actual package 1044 # containment; it wasn't possible for the "xml" package to be replaced 1045 # by a simple module that indirected imports to the "xmlcore" package. 1046 # 1047 # The following two tests exercised bugs that were introduced in that 1048 # attempt. Keeping these tests around will help detect problems with 1049 # other attempts to provide reliable access to the standard library's 1050 # implementation of the XML support. 1051 1052 def test_sf_1511497(self): 1053 # Bug report: http://www.python.org/sf/1511497 1054 import sys 1055 old_modules = sys.modules.copy() 1056 for modname in sys.modules.keys(): 1057 if modname.startswith("xml."): 1058 del sys.modules[modname] 1059 try: 1060 import xml.sax.expatreader 1061 module = xml.sax.expatreader 1062 self.assertEqual(module.__name__, "xml.sax.expatreader") 1063 finally: 1064 sys.modules.update(old_modules) 1065 1066 def test_sf_1513611(self): 1067 # Bug report: http://www.python.org/sf/1513611 1068 sio = StringIO("invalid") 1069 parser = make_parser() 1070 from xml.sax import SAXParseException 1071 self.assertRaises(SAXParseException, parser.parse, sio) 1072 1073 1074def test_main(): 1075 run_unittest(MakeParserTest, 1076 ParseTest, 1077 SaxutilsTest, 1078 PrepareInputSourceTest, 1079 StringXmlgenTest, 1080 BytesIOXmlgenTest, 1081 WriterXmlgenTest, 1082 ExpatReaderTest, 1083 ErrorReportingTest, 1084 XmlReaderTest) 1085 1086if __name__ == "__main__": 1087 test_main() 1088