1# regression test for SAX 2.0 2# $Id$ 3 4from xml.sax import make_parser, ContentHandler, \ 5 SAXException, SAXReaderNotAvailable, SAXParseException 6import unittest 7from unittest import mock 8try: 9 make_parser() 10except SAXReaderNotAvailable: 11 # don't try to test this module if we cannot create a parser 12 raise unittest.SkipTest("no XML parsers available") 13from xml.sax.saxutils import XMLGenerator, escape, unescape, quoteattr, \ 14 XMLFilterBase, prepare_input_source 15from xml.sax.expatreader import create_parser 16from xml.sax.handler import (feature_namespaces, feature_external_ges, 17 LexicalHandler) 18from xml.sax.xmlreader import InputSource, AttributesImpl, AttributesNSImpl 19from io import BytesIO, StringIO 20import codecs 21import os.path 22import shutil 23import sys 24from urllib.error import URLError 25import urllib.request 26from test.support import os_helper 27from test.support import findfile 28from test.support.os_helper import FakePath, TESTFN 29 30 31TEST_XMLFILE = findfile("test.xml", subdir="xmltestdata") 32TEST_XMLFILE_OUT = findfile("test.xml.out", subdir="xmltestdata") 33try: 34 TEST_XMLFILE.encode("utf-8") 35 TEST_XMLFILE_OUT.encode("utf-8") 36except UnicodeEncodeError: 37 raise unittest.SkipTest("filename is not encodable to utf8") 38 39supports_nonascii_filenames = True 40if not os.path.supports_unicode_filenames: 41 try: 42 os_helper.TESTFN_UNICODE.encode(sys.getfilesystemencoding()) 43 except (UnicodeError, TypeError): 44 # Either the file system encoding is None, or the file name 45 # cannot be encoded in the file system encoding. 46 supports_nonascii_filenames = False 47requires_nonascii_filenames = unittest.skipUnless( 48 supports_nonascii_filenames, 49 'Requires non-ascii filenames support') 50 51ns_uri = "http://www.python.org/xml-ns/saxtest/" 52 53class XmlTestBase(unittest.TestCase): 54 def verify_empty_attrs(self, attrs): 55 self.assertRaises(KeyError, attrs.getValue, "attr") 56 self.assertRaises(KeyError, attrs.getValueByQName, "attr") 57 self.assertRaises(KeyError, attrs.getNameByQName, "attr") 58 self.assertRaises(KeyError, attrs.getQNameByName, "attr") 59 self.assertRaises(KeyError, attrs.__getitem__, "attr") 60 self.assertEqual(attrs.getLength(), 0) 61 self.assertEqual(attrs.getNames(), []) 62 self.assertEqual(attrs.getQNames(), []) 63 self.assertEqual(len(attrs), 0) 64 self.assertNotIn("attr", attrs) 65 self.assertEqual(list(attrs.keys()), []) 66 self.assertEqual(attrs.get("attrs"), None) 67 self.assertEqual(attrs.get("attrs", 25), 25) 68 self.assertEqual(list(attrs.items()), []) 69 self.assertEqual(list(attrs.values()), []) 70 71 def verify_empty_nsattrs(self, attrs): 72 self.assertRaises(KeyError, attrs.getValue, (ns_uri, "attr")) 73 self.assertRaises(KeyError, attrs.getValueByQName, "ns:attr") 74 self.assertRaises(KeyError, attrs.getNameByQName, "ns:attr") 75 self.assertRaises(KeyError, attrs.getQNameByName, (ns_uri, "attr")) 76 self.assertRaises(KeyError, attrs.__getitem__, (ns_uri, "attr")) 77 self.assertEqual(attrs.getLength(), 0) 78 self.assertEqual(attrs.getNames(), []) 79 self.assertEqual(attrs.getQNames(), []) 80 self.assertEqual(len(attrs), 0) 81 self.assertNotIn((ns_uri, "attr"), attrs) 82 self.assertEqual(list(attrs.keys()), []) 83 self.assertEqual(attrs.get((ns_uri, "attr")), None) 84 self.assertEqual(attrs.get((ns_uri, "attr"), 25), 25) 85 self.assertEqual(list(attrs.items()), []) 86 self.assertEqual(list(attrs.values()), []) 87 88 def verify_attrs_wattr(self, attrs): 89 self.assertEqual(attrs.getLength(), 1) 90 self.assertEqual(attrs.getNames(), ["attr"]) 91 self.assertEqual(attrs.getQNames(), ["attr"]) 92 self.assertEqual(len(attrs), 1) 93 self.assertIn("attr", attrs) 94 self.assertEqual(list(attrs.keys()), ["attr"]) 95 self.assertEqual(attrs.get("attr"), "val") 96 self.assertEqual(attrs.get("attr", 25), "val") 97 self.assertEqual(list(attrs.items()), [("attr", "val")]) 98 self.assertEqual(list(attrs.values()), ["val"]) 99 self.assertEqual(attrs.getValue("attr"), "val") 100 self.assertEqual(attrs.getValueByQName("attr"), "val") 101 self.assertEqual(attrs.getNameByQName("attr"), "attr") 102 self.assertEqual(attrs["attr"], "val") 103 self.assertEqual(attrs.getQNameByName("attr"), "attr") 104 105 106def xml_str(doc, encoding=None): 107 if encoding is None: 108 return doc 109 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) 110 111def xml_bytes(doc, encoding, decl_encoding=...): 112 if decl_encoding is ...: 113 decl_encoding = encoding 114 return xml_str(doc, decl_encoding).encode(encoding, 'xmlcharrefreplace') 115 116def make_xml_file(doc, encoding, decl_encoding=...): 117 if decl_encoding is ...: 118 decl_encoding = encoding 119 with open(TESTFN, 'w', encoding=encoding, errors='xmlcharrefreplace') as f: 120 f.write(xml_str(doc, decl_encoding)) 121 122 123class ParseTest(unittest.TestCase): 124 data = '<money value="$\xa3\u20ac\U0001017b">$\xa3\u20ac\U0001017b</money>' 125 126 def tearDown(self): 127 os_helper.unlink(TESTFN) 128 129 def check_parse(self, f): 130 from xml.sax import parse 131 result = StringIO() 132 parse(f, XMLGenerator(result, 'utf-8')) 133 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) 134 135 def test_parse_text(self): 136 encodings = ('us-ascii', 'iso-8859-1', 'utf-8', 137 'utf-16', 'utf-16le', 'utf-16be') 138 for encoding in encodings: 139 self.check_parse(StringIO(xml_str(self.data, encoding))) 140 make_xml_file(self.data, encoding) 141 with open(TESTFN, 'r', encoding=encoding) as f: 142 self.check_parse(f) 143 self.check_parse(StringIO(self.data)) 144 make_xml_file(self.data, encoding, None) 145 with open(TESTFN, 'r', encoding=encoding) as f: 146 self.check_parse(f) 147 148 def test_parse_bytes(self): 149 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 150 # UTF-16 is autodetected 151 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 152 for encoding in encodings: 153 self.check_parse(BytesIO(xml_bytes(self.data, encoding))) 154 make_xml_file(self.data, encoding) 155 self.check_parse(TESTFN) 156 with open(TESTFN, 'rb') as f: 157 self.check_parse(f) 158 self.check_parse(BytesIO(xml_bytes(self.data, encoding, None))) 159 make_xml_file(self.data, encoding, None) 160 self.check_parse(TESTFN) 161 with open(TESTFN, 'rb') as f: 162 self.check_parse(f) 163 # accept UTF-8 with BOM 164 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', 'utf-8'))) 165 make_xml_file(self.data, 'utf-8-sig', 'utf-8') 166 self.check_parse(TESTFN) 167 with open(TESTFN, 'rb') as f: 168 self.check_parse(f) 169 self.check_parse(BytesIO(xml_bytes(self.data, 'utf-8-sig', None))) 170 make_xml_file(self.data, 'utf-8-sig', None) 171 self.check_parse(TESTFN) 172 with open(TESTFN, 'rb') as f: 173 self.check_parse(f) 174 # accept data with declared encoding 175 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1'))) 176 make_xml_file(self.data, 'iso-8859-1') 177 self.check_parse(TESTFN) 178 with open(TESTFN, 'rb') as f: 179 self.check_parse(f) 180 # fail on non-UTF-8 incompatible data without declared encoding 181 with self.assertRaises(SAXException): 182 self.check_parse(BytesIO(xml_bytes(self.data, 'iso-8859-1', None))) 183 make_xml_file(self.data, 'iso-8859-1', None) 184 with self.assertRaises(SAXException): 185 self.check_parse(TESTFN) 186 with open(TESTFN, 'rb') as f: 187 with self.assertRaises(SAXException): 188 self.check_parse(f) 189 190 def test_parse_path_object(self): 191 make_xml_file(self.data, 'utf-8', None) 192 self.check_parse(FakePath(TESTFN)) 193 194 def test_parse_InputSource(self): 195 # accept data without declared but with explicitly specified encoding 196 make_xml_file(self.data, 'iso-8859-1', None) 197 with open(TESTFN, 'rb') as f: 198 input = InputSource() 199 input.setByteStream(f) 200 input.setEncoding('iso-8859-1') 201 self.check_parse(input) 202 203 def test_parse_close_source(self): 204 builtin_open = open 205 fileobj = None 206 207 def mock_open(*args): 208 nonlocal fileobj 209 fileobj = builtin_open(*args) 210 return fileobj 211 212 with mock.patch('xml.sax.saxutils.open', side_effect=mock_open): 213 make_xml_file(self.data, 'iso-8859-1', None) 214 with self.assertRaises(SAXException): 215 self.check_parse(TESTFN) 216 self.assertTrue(fileobj.closed) 217 218 def check_parseString(self, s): 219 from xml.sax import parseString 220 result = StringIO() 221 parseString(s, XMLGenerator(result, 'utf-8')) 222 self.assertEqual(result.getvalue(), xml_str(self.data, 'utf-8')) 223 224 def test_parseString_text(self): 225 encodings = ('us-ascii', 'iso-8859-1', 'utf-8', 226 'utf-16', 'utf-16le', 'utf-16be') 227 for encoding in encodings: 228 self.check_parseString(xml_str(self.data, encoding)) 229 self.check_parseString(self.data) 230 231 def test_parseString_bytes(self): 232 # UTF-8 is default encoding, US-ASCII is compatible with UTF-8, 233 # UTF-16 is autodetected 234 encodings = ('us-ascii', 'utf-8', 'utf-16', 'utf-16le', 'utf-16be') 235 for encoding in encodings: 236 self.check_parseString(xml_bytes(self.data, encoding)) 237 self.check_parseString(xml_bytes(self.data, encoding, None)) 238 # accept UTF-8 with BOM 239 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', 'utf-8')) 240 self.check_parseString(xml_bytes(self.data, 'utf-8-sig', None)) 241 # accept data with declared encoding 242 self.check_parseString(xml_bytes(self.data, 'iso-8859-1')) 243 # fail on non-UTF-8 incompatible data without declared encoding 244 with self.assertRaises(SAXException): 245 self.check_parseString(xml_bytes(self.data, 'iso-8859-1', None)) 246 247class MakeParserTest(unittest.TestCase): 248 def test_make_parser2(self): 249 # Creating parsers several times in a row should succeed. 250 # Testing this because there have been failures of this kind 251 # before. 252 from xml.sax import make_parser 253 p = make_parser() 254 from xml.sax import make_parser 255 p = make_parser() 256 from xml.sax import make_parser 257 p = make_parser() 258 from xml.sax import make_parser 259 p = make_parser() 260 from xml.sax import make_parser 261 p = make_parser() 262 from xml.sax import make_parser 263 p = make_parser() 264 265 def test_make_parser3(self): 266 # Testing that make_parser can handle different types of 267 # iterables. 268 make_parser(['module']) 269 make_parser(('module', )) 270 make_parser({'module'}) 271 make_parser(frozenset({'module'})) 272 make_parser({'module': None}) 273 make_parser(iter(['module'])) 274 275 def test_make_parser4(self): 276 # Testing that make_parser can handle empty iterables. 277 make_parser([]) 278 make_parser(tuple()) 279 make_parser(set()) 280 make_parser(frozenset()) 281 make_parser({}) 282 make_parser(iter([])) 283 284 def test_make_parser5(self): 285 # Testing that make_parser can handle iterables with more than 286 # one item. 287 make_parser(['module1', 'module2']) 288 make_parser(('module1', 'module2')) 289 make_parser({'module1', 'module2'}) 290 make_parser(frozenset({'module1', 'module2'})) 291 make_parser({'module1': None, 'module2': None}) 292 make_parser(iter(['module1', 'module2'])) 293 294# =========================================================================== 295# 296# saxutils tests 297# 298# =========================================================================== 299 300class SaxutilsTest(unittest.TestCase): 301 # ===== escape 302 def test_escape_basic(self): 303 self.assertEqual(escape("Donald Duck & Co"), "Donald Duck & Co") 304 305 def test_escape_all(self): 306 self.assertEqual(escape("<Donald Duck & Co>"), 307 "<Donald Duck & Co>") 308 309 def test_escape_extra(self): 310 self.assertEqual(escape("Hei på deg", {"å" : "å"}), 311 "Hei på deg") 312 313 # ===== unescape 314 def test_unescape_basic(self): 315 self.assertEqual(unescape("Donald Duck & Co"), "Donald Duck & Co") 316 317 def test_unescape_all(self): 318 self.assertEqual(unescape("<Donald Duck & Co>"), 319 "<Donald Duck & Co>") 320 321 def test_unescape_extra(self): 322 self.assertEqual(unescape("Hei på deg", {"å" : "å"}), 323 "Hei på deg") 324 325 def test_unescape_amp_extra(self): 326 self.assertEqual(unescape("&foo;", {"&foo;": "splat"}), "&foo;") 327 328 # ===== quoteattr 329 def test_quoteattr_basic(self): 330 self.assertEqual(quoteattr("Donald Duck & Co"), 331 '"Donald Duck & Co"') 332 333 def test_single_quoteattr(self): 334 self.assertEqual(quoteattr('Includes "double" quotes'), 335 '\'Includes "double" quotes\'') 336 337 def test_double_quoteattr(self): 338 self.assertEqual(quoteattr("Includes 'single' quotes"), 339 "\"Includes 'single' quotes\"") 340 341 def test_single_double_quoteattr(self): 342 self.assertEqual(quoteattr("Includes 'single' and \"double\" quotes"), 343 "\"Includes 'single' and "double" quotes\"") 344 345 # ===== make_parser 346 def test_make_parser(self): 347 # Creating a parser should succeed - it should fall back 348 # to the expatreader 349 p = make_parser(['xml.parsers.no_such_parser']) 350 351 352class PrepareInputSourceTest(unittest.TestCase): 353 354 def setUp(self): 355 self.file = os_helper.TESTFN 356 with open(self.file, "w") as tmp: 357 tmp.write("This was read from a file.") 358 359 def tearDown(self): 360 os_helper.unlink(self.file) 361 362 def make_byte_stream(self): 363 return BytesIO(b"This is a byte stream.") 364 365 def make_character_stream(self): 366 return StringIO("This is a character stream.") 367 368 def checkContent(self, stream, content): 369 self.assertIsNotNone(stream) 370 self.assertEqual(stream.read(), content) 371 stream.close() 372 373 374 def test_character_stream(self): 375 # If the source is an InputSource with a character stream, use it. 376 src = InputSource(self.file) 377 src.setCharacterStream(self.make_character_stream()) 378 prep = prepare_input_source(src) 379 self.assertIsNone(prep.getByteStream()) 380 self.checkContent(prep.getCharacterStream(), 381 "This is a character stream.") 382 383 def test_byte_stream(self): 384 # If the source is an InputSource that does not have a character 385 # stream but does have a byte stream, use the byte stream. 386 src = InputSource(self.file) 387 src.setByteStream(self.make_byte_stream()) 388 prep = prepare_input_source(src) 389 self.assertIsNone(prep.getCharacterStream()) 390 self.checkContent(prep.getByteStream(), 391 b"This is a byte stream.") 392 393 def test_system_id(self): 394 # If the source is an InputSource that has neither a character 395 # stream nor a byte stream, open the system ID. 396 src = InputSource(self.file) 397 prep = prepare_input_source(src) 398 self.assertIsNone(prep.getCharacterStream()) 399 self.checkContent(prep.getByteStream(), 400 b"This was read from a file.") 401 402 def test_string(self): 403 # If the source is a string, use it as a system ID and open it. 404 prep = prepare_input_source(self.file) 405 self.assertIsNone(prep.getCharacterStream()) 406 self.checkContent(prep.getByteStream(), 407 b"This was read from a file.") 408 409 def test_path_objects(self): 410 # If the source is a Path object, use it as a system ID and open it. 411 prep = prepare_input_source(FakePath(self.file)) 412 self.assertIsNone(prep.getCharacterStream()) 413 self.checkContent(prep.getByteStream(), 414 b"This was read from a file.") 415 416 def test_binary_file(self): 417 # If the source is a binary file-like object, use it as a byte 418 # stream. 419 prep = prepare_input_source(self.make_byte_stream()) 420 self.assertIsNone(prep.getCharacterStream()) 421 self.checkContent(prep.getByteStream(), 422 b"This is a byte stream.") 423 424 def test_text_file(self): 425 # If the source is a text file-like object, use it as a character 426 # stream. 427 prep = prepare_input_source(self.make_character_stream()) 428 self.assertIsNone(prep.getByteStream()) 429 self.checkContent(prep.getCharacterStream(), 430 "This is a character stream.") 431 432 433# ===== XMLGenerator 434 435class XmlgenTest: 436 def test_xmlgen_basic(self): 437 result = self.ioclass() 438 gen = XMLGenerator(result) 439 gen.startDocument() 440 gen.startElement("doc", {}) 441 gen.endElement("doc") 442 gen.endDocument() 443 444 self.assertEqual(result.getvalue(), self.xml("<doc></doc>")) 445 446 def test_xmlgen_basic_empty(self): 447 result = self.ioclass() 448 gen = XMLGenerator(result, short_empty_elements=True) 449 gen.startDocument() 450 gen.startElement("doc", {}) 451 gen.endElement("doc") 452 gen.endDocument() 453 454 self.assertEqual(result.getvalue(), self.xml("<doc/>")) 455 456 def test_xmlgen_content(self): 457 result = self.ioclass() 458 gen = XMLGenerator(result) 459 460 gen.startDocument() 461 gen.startElement("doc", {}) 462 gen.characters("huhei") 463 gen.endElement("doc") 464 gen.endDocument() 465 466 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>")) 467 468 def test_xmlgen_content_empty(self): 469 result = self.ioclass() 470 gen = XMLGenerator(result, short_empty_elements=True) 471 472 gen.startDocument() 473 gen.startElement("doc", {}) 474 gen.characters("huhei") 475 gen.endElement("doc") 476 gen.endDocument() 477 478 self.assertEqual(result.getvalue(), self.xml("<doc>huhei</doc>")) 479 480 def test_xmlgen_pi(self): 481 result = self.ioclass() 482 gen = XMLGenerator(result) 483 484 gen.startDocument() 485 gen.processingInstruction("test", "data") 486 gen.startElement("doc", {}) 487 gen.endElement("doc") 488 gen.endDocument() 489 490 self.assertEqual(result.getvalue(), 491 self.xml("<?test data?><doc></doc>")) 492 493 def test_xmlgen_content_escape(self): 494 result = self.ioclass() 495 gen = XMLGenerator(result) 496 497 gen.startDocument() 498 gen.startElement("doc", {}) 499 gen.characters("<huhei&") 500 gen.endElement("doc") 501 gen.endDocument() 502 503 self.assertEqual(result.getvalue(), 504 self.xml("<doc><huhei&</doc>")) 505 506 def test_xmlgen_attr_escape(self): 507 result = self.ioclass() 508 gen = XMLGenerator(result) 509 510 gen.startDocument() 511 gen.startElement("doc", {"a": '"'}) 512 gen.startElement("e", {"a": "'"}) 513 gen.endElement("e") 514 gen.startElement("e", {"a": "'\""}) 515 gen.endElement("e") 516 gen.startElement("e", {"a": "\n\r\t"}) 517 gen.endElement("e") 518 gen.endElement("doc") 519 gen.endDocument() 520 521 self.assertEqual(result.getvalue(), self.xml( 522 "<doc a='\"'><e a=\"'\"></e>" 523 "<e a=\"'"\"></e>" 524 "<e a=\" 	\"></e></doc>")) 525 526 def test_xmlgen_encoding(self): 527 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', 528 'utf-16', 'utf-16be', 'utf-16le', 529 'utf-32', 'utf-32be', 'utf-32le') 530 for encoding in encodings: 531 result = self.ioclass() 532 gen = XMLGenerator(result, encoding=encoding) 533 534 gen.startDocument() 535 gen.startElement("doc", {"a": '\u20ac'}) 536 gen.characters("\u20ac") 537 gen.endElement("doc") 538 gen.endDocument() 539 540 self.assertEqual(result.getvalue(), 541 self.xml('<doc a="\u20ac">\u20ac</doc>', encoding=encoding)) 542 543 def test_xmlgen_unencodable(self): 544 result = self.ioclass() 545 gen = XMLGenerator(result, encoding='ascii') 546 547 gen.startDocument() 548 gen.startElement("doc", {"a": '\u20ac'}) 549 gen.characters("\u20ac") 550 gen.endElement("doc") 551 gen.endDocument() 552 553 self.assertEqual(result.getvalue(), 554 self.xml('<doc a="€">€</doc>', encoding='ascii')) 555 556 def test_xmlgen_ignorable(self): 557 result = self.ioclass() 558 gen = XMLGenerator(result) 559 560 gen.startDocument() 561 gen.startElement("doc", {}) 562 gen.ignorableWhitespace(" ") 563 gen.endElement("doc") 564 gen.endDocument() 565 566 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>")) 567 568 def test_xmlgen_ignorable_empty(self): 569 result = self.ioclass() 570 gen = XMLGenerator(result, short_empty_elements=True) 571 572 gen.startDocument() 573 gen.startElement("doc", {}) 574 gen.ignorableWhitespace(" ") 575 gen.endElement("doc") 576 gen.endDocument() 577 578 self.assertEqual(result.getvalue(), self.xml("<doc> </doc>")) 579 580 def test_xmlgen_encoding_bytes(self): 581 encodings = ('iso-8859-15', 'utf-8', 'utf-8-sig', 582 'utf-16', 'utf-16be', 'utf-16le', 583 'utf-32', 'utf-32be', 'utf-32le') 584 for encoding in encodings: 585 result = self.ioclass() 586 gen = XMLGenerator(result, encoding=encoding) 587 588 gen.startDocument() 589 gen.startElement("doc", {"a": '\u20ac'}) 590 gen.characters("\u20ac".encode(encoding)) 591 gen.ignorableWhitespace(" ".encode(encoding)) 592 gen.endElement("doc") 593 gen.endDocument() 594 595 self.assertEqual(result.getvalue(), 596 self.xml('<doc a="\u20ac">\u20ac </doc>', encoding=encoding)) 597 598 def test_xmlgen_ns(self): 599 result = self.ioclass() 600 gen = XMLGenerator(result) 601 602 gen.startDocument() 603 gen.startPrefixMapping("ns1", ns_uri) 604 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) 605 # add an unqualified name 606 gen.startElementNS((None, "udoc"), None, {}) 607 gen.endElementNS((None, "udoc"), None) 608 gen.endElementNS((ns_uri, "doc"), "ns1:doc") 609 gen.endPrefixMapping("ns1") 610 gen.endDocument() 611 612 self.assertEqual(result.getvalue(), self.xml( 613 '<ns1:doc xmlns:ns1="%s"><udoc></udoc></ns1:doc>' % 614 ns_uri)) 615 616 def test_xmlgen_ns_empty(self): 617 result = self.ioclass() 618 gen = XMLGenerator(result, short_empty_elements=True) 619 620 gen.startDocument() 621 gen.startPrefixMapping("ns1", ns_uri) 622 gen.startElementNS((ns_uri, "doc"), "ns1:doc", {}) 623 # add an unqualified name 624 gen.startElementNS((None, "udoc"), None, {}) 625 gen.endElementNS((None, "udoc"), None) 626 gen.endElementNS((ns_uri, "doc"), "ns1:doc") 627 gen.endPrefixMapping("ns1") 628 gen.endDocument() 629 630 self.assertEqual(result.getvalue(), self.xml( 631 '<ns1:doc xmlns:ns1="%s"><udoc/></ns1:doc>' % 632 ns_uri)) 633 634 def test_1463026_1(self): 635 result = self.ioclass() 636 gen = XMLGenerator(result) 637 638 gen.startDocument() 639 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) 640 gen.endElementNS((None, 'a'), 'a') 641 gen.endDocument() 642 643 self.assertEqual(result.getvalue(), self.xml('<a b="c"></a>')) 644 645 def test_1463026_1_empty(self): 646 result = self.ioclass() 647 gen = XMLGenerator(result, short_empty_elements=True) 648 649 gen.startDocument() 650 gen.startElementNS((None, 'a'), 'a', {(None, 'b'):'c'}) 651 gen.endElementNS((None, 'a'), 'a') 652 gen.endDocument() 653 654 self.assertEqual(result.getvalue(), self.xml('<a b="c"/>')) 655 656 def test_1463026_2(self): 657 result = self.ioclass() 658 gen = XMLGenerator(result) 659 660 gen.startDocument() 661 gen.startPrefixMapping(None, 'qux') 662 gen.startElementNS(('qux', 'a'), 'a', {}) 663 gen.endElementNS(('qux', 'a'), 'a') 664 gen.endPrefixMapping(None) 665 gen.endDocument() 666 667 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"></a>')) 668 669 def test_1463026_2_empty(self): 670 result = self.ioclass() 671 gen = XMLGenerator(result, short_empty_elements=True) 672 673 gen.startDocument() 674 gen.startPrefixMapping(None, 'qux') 675 gen.startElementNS(('qux', 'a'), 'a', {}) 676 gen.endElementNS(('qux', 'a'), 'a') 677 gen.endPrefixMapping(None) 678 gen.endDocument() 679 680 self.assertEqual(result.getvalue(), self.xml('<a xmlns="qux"/>')) 681 682 def test_1463026_3(self): 683 result = self.ioclass() 684 gen = XMLGenerator(result) 685 686 gen.startDocument() 687 gen.startPrefixMapping('my', 'qux') 688 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) 689 gen.endElementNS(('qux', 'a'), 'a') 690 gen.endPrefixMapping('my') 691 gen.endDocument() 692 693 self.assertEqual(result.getvalue(), 694 self.xml('<my:a xmlns:my="qux" b="c"></my:a>')) 695 696 def test_1463026_3_empty(self): 697 result = self.ioclass() 698 gen = XMLGenerator(result, short_empty_elements=True) 699 700 gen.startDocument() 701 gen.startPrefixMapping('my', 'qux') 702 gen.startElementNS(('qux', 'a'), 'a', {(None, 'b'):'c'}) 703 gen.endElementNS(('qux', 'a'), 'a') 704 gen.endPrefixMapping('my') 705 gen.endDocument() 706 707 self.assertEqual(result.getvalue(), 708 self.xml('<my:a xmlns:my="qux" b="c"/>')) 709 710 def test_5027_1(self): 711 # The xml prefix (as in xml:lang below) is reserved and bound by 712 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 713 # a bug whereby a KeyError is raised because this namespace is missing 714 # from a dictionary. 715 # 716 # This test demonstrates the bug by parsing a document. 717 test_xml = StringIO( 718 '<?xml version="1.0"?>' 719 '<a:g1 xmlns:a="http://example.com/ns">' 720 '<a:g2 xml:lang="en">Hello</a:g2>' 721 '</a:g1>') 722 723 parser = make_parser() 724 parser.setFeature(feature_namespaces, True) 725 result = self.ioclass() 726 gen = XMLGenerator(result) 727 parser.setContentHandler(gen) 728 parser.parse(test_xml) 729 730 self.assertEqual(result.getvalue(), 731 self.xml( 732 '<a:g1 xmlns:a="http://example.com/ns">' 733 '<a:g2 xml:lang="en">Hello</a:g2>' 734 '</a:g1>')) 735 736 def test_5027_2(self): 737 # The xml prefix (as in xml:lang below) is reserved and bound by 738 # definition to http://www.w3.org/XML/1998/namespace. XMLGenerator had 739 # a bug whereby a KeyError is raised because this namespace is missing 740 # from a dictionary. 741 # 742 # This test demonstrates the bug by direct manipulation of the 743 # XMLGenerator. 744 result = self.ioclass() 745 gen = XMLGenerator(result) 746 747 gen.startDocument() 748 gen.startPrefixMapping('a', 'http://example.com/ns') 749 gen.startElementNS(('http://example.com/ns', 'g1'), 'g1', {}) 750 lang_attr = {('http://www.w3.org/XML/1998/namespace', 'lang'): 'en'} 751 gen.startElementNS(('http://example.com/ns', 'g2'), 'g2', lang_attr) 752 gen.characters('Hello') 753 gen.endElementNS(('http://example.com/ns', 'g2'), 'g2') 754 gen.endElementNS(('http://example.com/ns', 'g1'), 'g1') 755 gen.endPrefixMapping('a') 756 gen.endDocument() 757 758 self.assertEqual(result.getvalue(), 759 self.xml( 760 '<a:g1 xmlns:a="http://example.com/ns">' 761 '<a:g2 xml:lang="en">Hello</a:g2>' 762 '</a:g1>')) 763 764 def test_no_close_file(self): 765 result = self.ioclass() 766 def func(out): 767 gen = XMLGenerator(out) 768 gen.startDocument() 769 gen.startElement("doc", {}) 770 func(result) 771 self.assertFalse(result.closed) 772 773 def test_xmlgen_fragment(self): 774 result = self.ioclass() 775 gen = XMLGenerator(result) 776 777 # Don't call gen.startDocument() 778 gen.startElement("foo", {"a": "1.0"}) 779 gen.characters("Hello") 780 gen.endElement("foo") 781 gen.startElement("bar", {"b": "2.0"}) 782 gen.endElement("bar") 783 # Don't call gen.endDocument() 784 785 self.assertEqual(result.getvalue(), 786 self.xml('<foo a="1.0">Hello</foo><bar b="2.0"></bar>')[len(self.xml('')):]) 787 788class StringXmlgenTest(XmlgenTest, unittest.TestCase): 789 ioclass = StringIO 790 791 def xml(self, doc, encoding='iso-8859-1'): 792 return '<?xml version="1.0" encoding="%s"?>\n%s' % (encoding, doc) 793 794 test_xmlgen_unencodable = None 795 796class BytesXmlgenTest(XmlgenTest, unittest.TestCase): 797 ioclass = BytesIO 798 799 def xml(self, doc, encoding='iso-8859-1'): 800 return ('<?xml version="1.0" encoding="%s"?>\n%s' % 801 (encoding, doc)).encode(encoding, 'xmlcharrefreplace') 802 803class WriterXmlgenTest(BytesXmlgenTest): 804 class ioclass(list): 805 write = list.append 806 closed = False 807 808 def seekable(self): 809 return True 810 811 def tell(self): 812 # return 0 at start and not 0 after start 813 return len(self) 814 815 def getvalue(self): 816 return b''.join(self) 817 818class StreamWriterXmlgenTest(XmlgenTest, unittest.TestCase): 819 def ioclass(self): 820 raw = BytesIO() 821 writer = codecs.getwriter('ascii')(raw, 'xmlcharrefreplace') 822 writer.getvalue = raw.getvalue 823 return writer 824 825 def xml(self, doc, encoding='iso-8859-1'): 826 return ('<?xml version="1.0" encoding="%s"?>\n%s' % 827 (encoding, doc)).encode('ascii', 'xmlcharrefreplace') 828 829class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase): 830 fname = os_helper.TESTFN + '-codecs' 831 832 def ioclass(self): 833 writer = codecs.open(self.fname, 'w', encoding='ascii', 834 errors='xmlcharrefreplace', buffering=0) 835 def cleanup(): 836 writer.close() 837 os_helper.unlink(self.fname) 838 self.addCleanup(cleanup) 839 def getvalue(): 840 # Windows will not let use reopen without first closing 841 writer.close() 842 with open(writer.name, 'rb') as f: 843 return f.read() 844 writer.getvalue = getvalue 845 return writer 846 847 def xml(self, doc, encoding='iso-8859-1'): 848 return ('<?xml version="1.0" encoding="%s"?>\n%s' % 849 (encoding, doc)).encode('ascii', 'xmlcharrefreplace') 850 851start = b'<?xml version="1.0" encoding="iso-8859-1"?>\n' 852 853 854class XMLFilterBaseTest(unittest.TestCase): 855 def test_filter_basic(self): 856 result = BytesIO() 857 gen = XMLGenerator(result) 858 filter = XMLFilterBase() 859 filter.setContentHandler(gen) 860 861 filter.startDocument() 862 filter.startElement("doc", {}) 863 filter.characters("content") 864 filter.ignorableWhitespace(" ") 865 filter.endElement("doc") 866 filter.endDocument() 867 868 self.assertEqual(result.getvalue(), start + b"<doc>content </doc>") 869 870# =========================================================================== 871# 872# expatreader tests 873# 874# =========================================================================== 875 876with open(TEST_XMLFILE_OUT, 'rb') as f: 877 xml_test_out = f.read() 878 879class ExpatReaderTest(XmlTestBase): 880 881 # ===== XMLReader support 882 883 def test_expat_binary_file(self): 884 parser = create_parser() 885 result = BytesIO() 886 xmlgen = XMLGenerator(result) 887 888 parser.setContentHandler(xmlgen) 889 with open(TEST_XMLFILE, 'rb') as f: 890 parser.parse(f) 891 892 self.assertEqual(result.getvalue(), xml_test_out) 893 894 def test_expat_text_file(self): 895 parser = create_parser() 896 result = BytesIO() 897 xmlgen = XMLGenerator(result) 898 899 parser.setContentHandler(xmlgen) 900 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: 901 parser.parse(f) 902 903 self.assertEqual(result.getvalue(), xml_test_out) 904 905 @requires_nonascii_filenames 906 def test_expat_binary_file_nonascii(self): 907 fname = os_helper.TESTFN_UNICODE 908 shutil.copyfile(TEST_XMLFILE, fname) 909 self.addCleanup(os_helper.unlink, fname) 910 911 parser = create_parser() 912 result = BytesIO() 913 xmlgen = XMLGenerator(result) 914 915 parser.setContentHandler(xmlgen) 916 parser.parse(open(fname, 'rb')) 917 918 self.assertEqual(result.getvalue(), xml_test_out) 919 920 def test_expat_binary_file_bytes_name(self): 921 fname = os.fsencode(TEST_XMLFILE) 922 parser = create_parser() 923 result = BytesIO() 924 xmlgen = XMLGenerator(result) 925 926 parser.setContentHandler(xmlgen) 927 with open(fname, 'rb') as f: 928 parser.parse(f) 929 930 self.assertEqual(result.getvalue(), xml_test_out) 931 932 def test_expat_binary_file_int_name(self): 933 parser = create_parser() 934 result = BytesIO() 935 xmlgen = XMLGenerator(result) 936 937 parser.setContentHandler(xmlgen) 938 with open(TEST_XMLFILE, 'rb') as f: 939 with open(f.fileno(), 'rb', closefd=False) as f2: 940 parser.parse(f2) 941 942 self.assertEqual(result.getvalue(), xml_test_out) 943 944 # ===== DTDHandler support 945 946 class TestDTDHandler: 947 948 def __init__(self): 949 self._notations = [] 950 self._entities = [] 951 952 def notationDecl(self, name, publicId, systemId): 953 self._notations.append((name, publicId, systemId)) 954 955 def unparsedEntityDecl(self, name, publicId, systemId, ndata): 956 self._entities.append((name, publicId, systemId, ndata)) 957 958 959 class TestEntityRecorder: 960 def __init__(self): 961 self.entities = [] 962 963 def resolveEntity(self, publicId, systemId): 964 self.entities.append((publicId, systemId)) 965 source = InputSource() 966 source.setPublicId(publicId) 967 source.setSystemId(systemId) 968 return source 969 970 def test_expat_dtdhandler(self): 971 parser = create_parser() 972 handler = self.TestDTDHandler() 973 parser.setDTDHandler(handler) 974 975 parser.feed('<!DOCTYPE doc [\n') 976 parser.feed(' <!ENTITY img SYSTEM "expat.gif" NDATA GIF>\n') 977 parser.feed(' <!NOTATION GIF PUBLIC "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN">\n') 978 parser.feed(']>\n') 979 parser.feed('<doc></doc>') 980 parser.close() 981 982 self.assertEqual(handler._notations, 983 [("GIF", "-//CompuServe//NOTATION Graphics Interchange Format 89a//EN", None)]) 984 self.assertEqual(handler._entities, [("img", None, "expat.gif", "GIF")]) 985 986 def test_expat_external_dtd_enabled(self): 987 # clear _opener global variable 988 self.addCleanup(urllib.request.urlcleanup) 989 990 parser = create_parser() 991 parser.setFeature(feature_external_ges, True) 992 resolver = self.TestEntityRecorder() 993 parser.setEntityResolver(resolver) 994 995 with self.assertRaises(URLError): 996 parser.feed( 997 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n' 998 ) 999 self.assertEqual( 1000 resolver.entities, [(None, 'unsupported://non-existing')] 1001 ) 1002 1003 def test_expat_external_dtd_default(self): 1004 parser = create_parser() 1005 resolver = self.TestEntityRecorder() 1006 parser.setEntityResolver(resolver) 1007 1008 parser.feed( 1009 '<!DOCTYPE external SYSTEM "unsupported://non-existing">\n' 1010 ) 1011 parser.feed('<doc />') 1012 parser.close() 1013 self.assertEqual(resolver.entities, []) 1014 1015 # ===== EntityResolver support 1016 1017 class TestEntityResolver: 1018 1019 def resolveEntity(self, publicId, systemId): 1020 inpsrc = InputSource() 1021 inpsrc.setByteStream(BytesIO(b"<entity/>")) 1022 return inpsrc 1023 1024 def test_expat_entityresolver_enabled(self): 1025 parser = create_parser() 1026 parser.setFeature(feature_external_ges, True) 1027 parser.setEntityResolver(self.TestEntityResolver()) 1028 result = BytesIO() 1029 parser.setContentHandler(XMLGenerator(result)) 1030 1031 parser.feed('<!DOCTYPE doc [\n') 1032 parser.feed(' <!ENTITY test SYSTEM "whatever">\n') 1033 parser.feed(']>\n') 1034 parser.feed('<doc>&test;</doc>') 1035 parser.close() 1036 1037 self.assertEqual(result.getvalue(), start + 1038 b"<doc><entity></entity></doc>") 1039 1040 def test_expat_entityresolver_default(self): 1041 parser = create_parser() 1042 self.assertEqual(parser.getFeature(feature_external_ges), False) 1043 parser.setEntityResolver(self.TestEntityResolver()) 1044 result = BytesIO() 1045 parser.setContentHandler(XMLGenerator(result)) 1046 1047 parser.feed('<!DOCTYPE doc [\n') 1048 parser.feed(' <!ENTITY test SYSTEM "whatever">\n') 1049 parser.feed(']>\n') 1050 parser.feed('<doc>&test;</doc>') 1051 parser.close() 1052 1053 self.assertEqual(result.getvalue(), start + 1054 b"<doc></doc>") 1055 1056 # ===== Attributes support 1057 1058 class AttrGatherer(ContentHandler): 1059 1060 def startElement(self, name, attrs): 1061 self._attrs = attrs 1062 1063 def startElementNS(self, name, qname, attrs): 1064 self._attrs = attrs 1065 1066 def test_expat_attrs_empty(self): 1067 parser = create_parser() 1068 gather = self.AttrGatherer() 1069 parser.setContentHandler(gather) 1070 1071 parser.feed("<doc/>") 1072 parser.close() 1073 1074 self.verify_empty_attrs(gather._attrs) 1075 1076 def test_expat_attrs_wattr(self): 1077 parser = create_parser() 1078 gather = self.AttrGatherer() 1079 parser.setContentHandler(gather) 1080 1081 parser.feed("<doc attr='val'/>") 1082 parser.close() 1083 1084 self.verify_attrs_wattr(gather._attrs) 1085 1086 def test_expat_nsattrs_empty(self): 1087 parser = create_parser(1) 1088 gather = self.AttrGatherer() 1089 parser.setContentHandler(gather) 1090 1091 parser.feed("<doc/>") 1092 parser.close() 1093 1094 self.verify_empty_nsattrs(gather._attrs) 1095 1096 def test_expat_nsattrs_wattr(self): 1097 parser = create_parser(1) 1098 gather = self.AttrGatherer() 1099 parser.setContentHandler(gather) 1100 1101 parser.feed("<doc xmlns:ns='%s' ns:attr='val'/>" % ns_uri) 1102 parser.close() 1103 1104 attrs = gather._attrs 1105 1106 self.assertEqual(attrs.getLength(), 1) 1107 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 1108 self.assertTrue((attrs.getQNames() == [] or 1109 attrs.getQNames() == ["ns:attr"])) 1110 self.assertEqual(len(attrs), 1) 1111 self.assertIn((ns_uri, "attr"), attrs) 1112 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 1113 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 1114 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")]) 1115 self.assertEqual(list(attrs.values()), ["val"]) 1116 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 1117 self.assertEqual(attrs[(ns_uri, "attr")], "val") 1118 1119 # ===== InputSource support 1120 1121 def test_expat_inpsource_filename(self): 1122 parser = create_parser() 1123 result = BytesIO() 1124 xmlgen = XMLGenerator(result) 1125 1126 parser.setContentHandler(xmlgen) 1127 parser.parse(TEST_XMLFILE) 1128 1129 self.assertEqual(result.getvalue(), xml_test_out) 1130 1131 def test_expat_inpsource_sysid(self): 1132 parser = create_parser() 1133 result = BytesIO() 1134 xmlgen = XMLGenerator(result) 1135 1136 parser.setContentHandler(xmlgen) 1137 parser.parse(InputSource(TEST_XMLFILE)) 1138 1139 self.assertEqual(result.getvalue(), xml_test_out) 1140 1141 @requires_nonascii_filenames 1142 def test_expat_inpsource_sysid_nonascii(self): 1143 fname = os_helper.TESTFN_UNICODE 1144 shutil.copyfile(TEST_XMLFILE, fname) 1145 self.addCleanup(os_helper.unlink, fname) 1146 1147 parser = create_parser() 1148 result = BytesIO() 1149 xmlgen = XMLGenerator(result) 1150 1151 parser.setContentHandler(xmlgen) 1152 parser.parse(InputSource(fname)) 1153 1154 self.assertEqual(result.getvalue(), xml_test_out) 1155 1156 def test_expat_inpsource_byte_stream(self): 1157 parser = create_parser() 1158 result = BytesIO() 1159 xmlgen = XMLGenerator(result) 1160 1161 parser.setContentHandler(xmlgen) 1162 inpsrc = InputSource() 1163 with open(TEST_XMLFILE, 'rb') as f: 1164 inpsrc.setByteStream(f) 1165 parser.parse(inpsrc) 1166 1167 self.assertEqual(result.getvalue(), xml_test_out) 1168 1169 def test_expat_inpsource_character_stream(self): 1170 parser = create_parser() 1171 result = BytesIO() 1172 xmlgen = XMLGenerator(result) 1173 1174 parser.setContentHandler(xmlgen) 1175 inpsrc = InputSource() 1176 with open(TEST_XMLFILE, 'rt', encoding='iso-8859-1') as f: 1177 inpsrc.setCharacterStream(f) 1178 parser.parse(inpsrc) 1179 1180 self.assertEqual(result.getvalue(), xml_test_out) 1181 1182 # ===== IncrementalParser support 1183 1184 def test_expat_incremental(self): 1185 result = BytesIO() 1186 xmlgen = XMLGenerator(result) 1187 parser = create_parser() 1188 parser.setContentHandler(xmlgen) 1189 1190 parser.feed("<doc>") 1191 parser.feed("</doc>") 1192 parser.close() 1193 1194 self.assertEqual(result.getvalue(), start + b"<doc></doc>") 1195 1196 def test_expat_incremental_reset(self): 1197 result = BytesIO() 1198 xmlgen = XMLGenerator(result) 1199 parser = create_parser() 1200 parser.setContentHandler(xmlgen) 1201 1202 parser.feed("<doc>") 1203 parser.feed("text") 1204 1205 result = BytesIO() 1206 xmlgen = XMLGenerator(result) 1207 parser.setContentHandler(xmlgen) 1208 parser.reset() 1209 1210 parser.feed("<doc>") 1211 parser.feed("text") 1212 parser.feed("</doc>") 1213 parser.close() 1214 1215 self.assertEqual(result.getvalue(), start + b"<doc>text</doc>") 1216 1217 # ===== Locator support 1218 1219 def test_expat_locator_noinfo(self): 1220 result = BytesIO() 1221 xmlgen = XMLGenerator(result) 1222 parser = create_parser() 1223 parser.setContentHandler(xmlgen) 1224 1225 parser.feed("<doc>") 1226 parser.feed("</doc>") 1227 parser.close() 1228 1229 self.assertEqual(parser.getSystemId(), None) 1230 self.assertEqual(parser.getPublicId(), None) 1231 self.assertEqual(parser.getLineNumber(), 1) 1232 1233 def test_expat_locator_withinfo(self): 1234 result = BytesIO() 1235 xmlgen = XMLGenerator(result) 1236 parser = create_parser() 1237 parser.setContentHandler(xmlgen) 1238 parser.parse(TEST_XMLFILE) 1239 1240 self.assertEqual(parser.getSystemId(), TEST_XMLFILE) 1241 self.assertEqual(parser.getPublicId(), None) 1242 1243 @requires_nonascii_filenames 1244 def test_expat_locator_withinfo_nonascii(self): 1245 fname = os_helper.TESTFN_UNICODE 1246 shutil.copyfile(TEST_XMLFILE, fname) 1247 self.addCleanup(os_helper.unlink, fname) 1248 1249 result = BytesIO() 1250 xmlgen = XMLGenerator(result) 1251 parser = create_parser() 1252 parser.setContentHandler(xmlgen) 1253 parser.parse(fname) 1254 1255 self.assertEqual(parser.getSystemId(), fname) 1256 self.assertEqual(parser.getPublicId(), None) 1257 1258 1259# =========================================================================== 1260# 1261# error reporting 1262# 1263# =========================================================================== 1264 1265class ErrorReportingTest(unittest.TestCase): 1266 def test_expat_inpsource_location(self): 1267 parser = create_parser() 1268 parser.setContentHandler(ContentHandler()) # do nothing 1269 source = InputSource() 1270 source.setByteStream(BytesIO(b"<foo bar foobar>")) #ill-formed 1271 name = "a file name" 1272 source.setSystemId(name) 1273 try: 1274 parser.parse(source) 1275 self.fail() 1276 except SAXException as e: 1277 self.assertEqual(e.getSystemId(), name) 1278 1279 def test_expat_incomplete(self): 1280 parser = create_parser() 1281 parser.setContentHandler(ContentHandler()) # do nothing 1282 self.assertRaises(SAXParseException, parser.parse, StringIO("<foo>")) 1283 self.assertEqual(parser.getColumnNumber(), 5) 1284 self.assertEqual(parser.getLineNumber(), 1) 1285 1286 def test_sax_parse_exception_str(self): 1287 # pass various values from a locator to the SAXParseException to 1288 # make sure that the __str__() doesn't fall apart when None is 1289 # passed instead of an integer line and column number 1290 # 1291 # use "normal" values for the locator: 1292 str(SAXParseException("message", None, 1293 self.DummyLocator(1, 1))) 1294 # use None for the line number: 1295 str(SAXParseException("message", None, 1296 self.DummyLocator(None, 1))) 1297 # use None for the column number: 1298 str(SAXParseException("message", None, 1299 self.DummyLocator(1, None))) 1300 # use None for both: 1301 str(SAXParseException("message", None, 1302 self.DummyLocator(None, None))) 1303 1304 class DummyLocator: 1305 def __init__(self, lineno, colno): 1306 self._lineno = lineno 1307 self._colno = colno 1308 1309 def getPublicId(self): 1310 return "pubid" 1311 1312 def getSystemId(self): 1313 return "sysid" 1314 1315 def getLineNumber(self): 1316 return self._lineno 1317 1318 def getColumnNumber(self): 1319 return self._colno 1320 1321# =========================================================================== 1322# 1323# xmlreader tests 1324# 1325# =========================================================================== 1326 1327class XmlReaderTest(XmlTestBase): 1328 1329 # ===== AttributesImpl 1330 def test_attrs_empty(self): 1331 self.verify_empty_attrs(AttributesImpl({})) 1332 1333 def test_attrs_wattr(self): 1334 self.verify_attrs_wattr(AttributesImpl({"attr" : "val"})) 1335 1336 def test_nsattrs_empty(self): 1337 self.verify_empty_nsattrs(AttributesNSImpl({}, {})) 1338 1339 def test_nsattrs_wattr(self): 1340 attrs = AttributesNSImpl({(ns_uri, "attr") : "val"}, 1341 {(ns_uri, "attr") : "ns:attr"}) 1342 1343 self.assertEqual(attrs.getLength(), 1) 1344 self.assertEqual(attrs.getNames(), [(ns_uri, "attr")]) 1345 self.assertEqual(attrs.getQNames(), ["ns:attr"]) 1346 self.assertEqual(len(attrs), 1) 1347 self.assertIn((ns_uri, "attr"), attrs) 1348 self.assertEqual(list(attrs.keys()), [(ns_uri, "attr")]) 1349 self.assertEqual(attrs.get((ns_uri, "attr")), "val") 1350 self.assertEqual(attrs.get((ns_uri, "attr"), 25), "val") 1351 self.assertEqual(list(attrs.items()), [((ns_uri, "attr"), "val")]) 1352 self.assertEqual(list(attrs.values()), ["val"]) 1353 self.assertEqual(attrs.getValue((ns_uri, "attr")), "val") 1354 self.assertEqual(attrs.getValueByQName("ns:attr"), "val") 1355 self.assertEqual(attrs.getNameByQName("ns:attr"), (ns_uri, "attr")) 1356 self.assertEqual(attrs[(ns_uri, "attr")], "val") 1357 self.assertEqual(attrs.getQNameByName((ns_uri, "attr")), "ns:attr") 1358 1359 1360class LexicalHandlerTest(unittest.TestCase): 1361 def setUp(self): 1362 self.parser = None 1363 1364 self.specified_version = '1.0' 1365 self.specified_encoding = 'UTF-8' 1366 self.specified_doctype = 'wish' 1367 self.specified_entity_names = ('nbsp', 'source', 'target') 1368 self.specified_comment = ('Comment in a DTD', 1369 'Really! You think so?') 1370 self.test_data = StringIO() 1371 self.test_data.write('<?xml version="{}" encoding="{}"?>\n'. 1372 format(self.specified_version, 1373 self.specified_encoding)) 1374 self.test_data.write('<!DOCTYPE {} [\n'. 1375 format(self.specified_doctype)) 1376 self.test_data.write('<!-- {} -->\n'. 1377 format(self.specified_comment[0])) 1378 self.test_data.write('<!ELEMENT {} (to,from,heading,body,footer)>\n'. 1379 format(self.specified_doctype)) 1380 self.test_data.write('<!ELEMENT to (#PCDATA)>\n') 1381 self.test_data.write('<!ELEMENT from (#PCDATA)>\n') 1382 self.test_data.write('<!ELEMENT heading (#PCDATA)>\n') 1383 self.test_data.write('<!ELEMENT body (#PCDATA)>\n') 1384 self.test_data.write('<!ELEMENT footer (#PCDATA)>\n') 1385 self.test_data.write('<!ENTITY {} " ">\n'. 1386 format(self.specified_entity_names[0])) 1387 self.test_data.write('<!ENTITY {} "Written by: Alexander.">\n'. 1388 format(self.specified_entity_names[1])) 1389 self.test_data.write('<!ENTITY {} "Hope it gets to: Aristotle.">\n'. 1390 format(self.specified_entity_names[2])) 1391 self.test_data.write(']>\n') 1392 self.test_data.write('<{}>'.format(self.specified_doctype)) 1393 self.test_data.write('<to>Aristotle</to>\n') 1394 self.test_data.write('<from>Alexander</from>\n') 1395 self.test_data.write('<heading>Supplication</heading>\n') 1396 self.test_data.write('<body>Teach me patience!</body>\n') 1397 self.test_data.write('<footer>&{};&{};&{};</footer>\n'. 1398 format(self.specified_entity_names[1], 1399 self.specified_entity_names[0], 1400 self.specified_entity_names[2])) 1401 self.test_data.write('<!-- {} -->\n'.format(self.specified_comment[1])) 1402 self.test_data.write('</{}>\n'.format(self.specified_doctype)) 1403 self.test_data.seek(0) 1404 1405 # Data received from handlers - to be validated 1406 self.version = None 1407 self.encoding = None 1408 self.standalone = None 1409 self.doctype = None 1410 self.publicID = None 1411 self.systemID = None 1412 self.end_of_dtd = False 1413 self.comments = [] 1414 1415 def test_handlers(self): 1416 class TestLexicalHandler(LexicalHandler): 1417 def __init__(self, test_harness, *args, **kwargs): 1418 super().__init__(*args, **kwargs) 1419 self.test_harness = test_harness 1420 1421 def startDTD(self, doctype, publicID, systemID): 1422 self.test_harness.doctype = doctype 1423 self.test_harness.publicID = publicID 1424 self.test_harness.systemID = systemID 1425 1426 def endDTD(self): 1427 self.test_harness.end_of_dtd = True 1428 1429 def comment(self, text): 1430 self.test_harness.comments.append(text) 1431 1432 self.parser = create_parser() 1433 self.parser.setContentHandler(ContentHandler()) 1434 self.parser.setProperty( 1435 'http://xml.org/sax/properties/lexical-handler', 1436 TestLexicalHandler(self)) 1437 source = InputSource() 1438 source.setCharacterStream(self.test_data) 1439 self.parser.parse(source) 1440 self.assertEqual(self.doctype, self.specified_doctype) 1441 self.assertIsNone(self.publicID) 1442 self.assertIsNone(self.systemID) 1443 self.assertTrue(self.end_of_dtd) 1444 self.assertEqual(len(self.comments), 1445 len(self.specified_comment)) 1446 self.assertEqual(f' {self.specified_comment[0]} ', self.comments[0]) 1447 1448 1449class CDATAHandlerTest(unittest.TestCase): 1450 def setUp(self): 1451 self.parser = None 1452 self.specified_chars = [] 1453 self.specified_chars.append(('Parseable character data', False)) 1454 self.specified_chars.append(('<> &% - assorted other XML junk.', True)) 1455 self.char_index = 0 # Used to index specified results within handlers 1456 self.test_data = StringIO() 1457 self.test_data.write('<root_doc>\n') 1458 self.test_data.write('<some_pcdata>\n') 1459 self.test_data.write(f'{self.specified_chars[0][0]}\n') 1460 self.test_data.write('</some_pcdata>\n') 1461 self.test_data.write('<some_cdata>\n') 1462 self.test_data.write(f'<![CDATA[{self.specified_chars[1][0]}]]>\n') 1463 self.test_data.write('</some_cdata>\n') 1464 self.test_data.write('</root_doc>\n') 1465 self.test_data.seek(0) 1466 1467 # Data received from handlers - to be validated 1468 self.chardata = [] 1469 self.in_cdata = False 1470 1471 def test_handlers(self): 1472 class TestLexicalHandler(LexicalHandler): 1473 def __init__(self, test_harness, *args, **kwargs): 1474 super().__init__(*args, **kwargs) 1475 self.test_harness = test_harness 1476 1477 def startCDATA(self): 1478 self.test_harness.in_cdata = True 1479 1480 def endCDATA(self): 1481 self.test_harness.in_cdata = False 1482 1483 class TestCharHandler(ContentHandler): 1484 def __init__(self, test_harness, *args, **kwargs): 1485 super().__init__(*args, **kwargs) 1486 self.test_harness = test_harness 1487 1488 def characters(self, content): 1489 if content != '\n': 1490 h = self.test_harness 1491 t = h.specified_chars[h.char_index] 1492 h.assertEqual(t[0], content) 1493 h.assertEqual(t[1], h.in_cdata) 1494 h.char_index += 1 1495 1496 self.parser = create_parser() 1497 self.parser.setContentHandler(TestCharHandler(self)) 1498 self.parser.setProperty( 1499 'http://xml.org/sax/properties/lexical-handler', 1500 TestLexicalHandler(self)) 1501 source = InputSource() 1502 source.setCharacterStream(self.test_data) 1503 self.parser.parse(source) 1504 1505 self.assertFalse(self.in_cdata) 1506 self.assertEqual(self.char_index, 2) 1507 1508 1509if __name__ == "__main__": 1510 unittest.main() 1511