1import io 2import unittest 3import xml.sax 4 5from xml.sax.xmlreader import AttributesImpl 6from xml.sax.handler import feature_external_ges 7from xml.dom import pulldom 8 9from test.support import findfile 10 11 12tstfile = findfile("test.xml", subdir="xmltestdata") 13 14# A handy XML snippet, containing attributes, a namespace prefix, and a 15# self-closing tag: 16SMALL_SAMPLE = """<?xml version="1.0"?> 17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books"> 18<!-- A comment --> 19<title>Introduction to XSL</title> 20<hr/> 21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p> 22</html>""" 23 24 25class PullDOMTestCase(unittest.TestCase): 26 27 def test_parse(self): 28 """Minimal test of DOMEventStream.parse()""" 29 30 # This just tests that parsing from a stream works. Actual parser 31 # semantics are tested using parseString with a more focused XML 32 # fragment. 33 34 # Test with a filename: 35 handler = pulldom.parse(tstfile) 36 self.addCleanup(handler.stream.close) 37 list(handler) 38 39 # Test with a file object: 40 with open(tstfile, "rb") as fin: 41 list(pulldom.parse(fin)) 42 43 def test_parse_semantics(self): 44 """Test DOMEventStream parsing semantics.""" 45 46 items = pulldom.parseString(SMALL_SAMPLE) 47 evt, node = next(items) 48 # Just check the node is a Document: 49 self.assertTrue(hasattr(node, "createElement")) 50 self.assertEqual(pulldom.START_DOCUMENT, evt) 51 evt, node = next(items) 52 self.assertEqual(pulldom.START_ELEMENT, evt) 53 self.assertEqual("html", node.tagName) 54 self.assertEqual(2, len(node.attributes)) 55 self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, 56 "http://www.xml.com/books") 57 evt, node = next(items) 58 self.assertEqual(pulldom.CHARACTERS, evt) # Line break 59 evt, node = next(items) 60 # XXX - A comment should be reported here! 61 # self.assertEqual(pulldom.COMMENT, evt) 62 # Line break after swallowed comment: 63 self.assertEqual(pulldom.CHARACTERS, evt) 64 evt, node = next(items) 65 self.assertEqual("title", node.tagName) 66 title_node = node 67 evt, node = next(items) 68 self.assertEqual(pulldom.CHARACTERS, evt) 69 self.assertEqual("Introduction to XSL", node.data) 70 evt, node = next(items) 71 self.assertEqual(pulldom.END_ELEMENT, evt) 72 self.assertEqual("title", node.tagName) 73 self.assertTrue(title_node is node) 74 evt, node = next(items) 75 self.assertEqual(pulldom.CHARACTERS, evt) 76 evt, node = next(items) 77 self.assertEqual(pulldom.START_ELEMENT, evt) 78 self.assertEqual("hr", node.tagName) 79 evt, node = next(items) 80 self.assertEqual(pulldom.END_ELEMENT, evt) 81 self.assertEqual("hr", node.tagName) 82 evt, node = next(items) 83 self.assertEqual(pulldom.CHARACTERS, evt) 84 evt, node = next(items) 85 self.assertEqual(pulldom.START_ELEMENT, evt) 86 self.assertEqual("p", node.tagName) 87 evt, node = next(items) 88 self.assertEqual(pulldom.START_ELEMENT, evt) 89 self.assertEqual("xdc:author", node.tagName) 90 evt, node = next(items) 91 self.assertEqual(pulldom.CHARACTERS, evt) 92 evt, node = next(items) 93 self.assertEqual(pulldom.END_ELEMENT, evt) 94 self.assertEqual("xdc:author", node.tagName) 95 evt, node = next(items) 96 self.assertEqual(pulldom.END_ELEMENT, evt) 97 evt, node = next(items) 98 self.assertEqual(pulldom.CHARACTERS, evt) 99 evt, node = next(items) 100 self.assertEqual(pulldom.END_ELEMENT, evt) 101 # XXX No END_DOCUMENT item is ever obtained: 102 #evt, node = next(items) 103 #self.assertEqual(pulldom.END_DOCUMENT, evt) 104 105 def test_expandItem(self): 106 """Ensure expandItem works as expected.""" 107 items = pulldom.parseString(SMALL_SAMPLE) 108 # Loop through the nodes until we get to a "title" start tag: 109 for evt, item in items: 110 if evt == pulldom.START_ELEMENT and item.tagName == "title": 111 items.expandNode(item) 112 self.assertEqual(1, len(item.childNodes)) 113 break 114 else: 115 self.fail("No \"title\" element detected in SMALL_SAMPLE!") 116 # Loop until we get to the next start-element: 117 for evt, node in items: 118 if evt == pulldom.START_ELEMENT: 119 break 120 self.assertEqual("hr", node.tagName, 121 "expandNode did not leave DOMEventStream in the correct state.") 122 # Attempt to expand a standalone element: 123 items.expandNode(node) 124 self.assertEqual(next(items)[0], pulldom.CHARACTERS) 125 evt, node = next(items) 126 self.assertEqual(node.tagName, "p") 127 items.expandNode(node) 128 next(items) # Skip character data 129 evt, node = next(items) 130 self.assertEqual(node.tagName, "html") 131 with self.assertRaises(StopIteration): 132 next(items) 133 items.clear() 134 self.assertIsNone(items.parser) 135 self.assertIsNone(items.stream) 136 137 @unittest.expectedFailure 138 def test_comment(self): 139 """PullDOM does not receive "comment" events.""" 140 items = pulldom.parseString(SMALL_SAMPLE) 141 for evt, _ in items: 142 if evt == pulldom.COMMENT: 143 break 144 else: 145 self.fail("No comment was encountered") 146 147 @unittest.expectedFailure 148 def test_end_document(self): 149 """PullDOM does not receive "end-document" events.""" 150 items = pulldom.parseString(SMALL_SAMPLE) 151 # Read all of the nodes up to and including </html>: 152 for evt, node in items: 153 if evt == pulldom.END_ELEMENT and node.tagName == "html": 154 break 155 try: 156 # Assert that the next node is END_DOCUMENT: 157 evt, node = next(items) 158 self.assertEqual(pulldom.END_DOCUMENT, evt) 159 except StopIteration: 160 self.fail( 161 "Ran out of events, but should have received END_DOCUMENT") 162 163 def test_getitem_deprecation(self): 164 parser = pulldom.parseString(SMALL_SAMPLE) 165 with self.assertWarnsRegex(DeprecationWarning, 166 r'Use iterator protocol instead'): 167 # This should have returned 'END_ELEMENT'. 168 self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT) 169 170 def test_external_ges_default(self): 171 parser = pulldom.parseString(SMALL_SAMPLE) 172 saxparser = parser.parser 173 ges = saxparser.getFeature(feature_external_ges) 174 self.assertEqual(ges, False) 175 176 177class ThoroughTestCase(unittest.TestCase): 178 """Test the hard-to-reach parts of pulldom.""" 179 180 def test_thorough_parse(self): 181 """Test some of the hard-to-reach parts of PullDOM.""" 182 self._test_thorough(pulldom.parse(None, parser=SAXExerciser())) 183 184 @unittest.expectedFailure 185 def test_sax2dom_fail(self): 186 """SAX2DOM can"t handle a PI before the root element.""" 187 pd = SAX2DOMTestHelper(None, SAXExerciser(), 12) 188 self._test_thorough(pd) 189 190 def test_thorough_sax2dom(self): 191 """Test some of the hard-to-reach parts of SAX2DOM.""" 192 pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12) 193 self._test_thorough(pd, False) 194 195 def _test_thorough(self, pd, before_root=True): 196 """Test some of the hard-to-reach parts of the parser, using a mock 197 parser.""" 198 199 evt, node = next(pd) 200 self.assertEqual(pulldom.START_DOCUMENT, evt) 201 # Just check the node is a Document: 202 self.assertTrue(hasattr(node, "createElement")) 203 204 if before_root: 205 evt, node = next(pd) 206 self.assertEqual(pulldom.COMMENT, evt) 207 self.assertEqual("a comment", node.data) 208 evt, node = next(pd) 209 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) 210 self.assertEqual("target", node.target) 211 self.assertEqual("data", node.data) 212 213 evt, node = next(pd) 214 self.assertEqual(pulldom.START_ELEMENT, evt) 215 self.assertEqual("html", node.tagName) 216 217 evt, node = next(pd) 218 self.assertEqual(pulldom.COMMENT, evt) 219 self.assertEqual("a comment", node.data) 220 evt, node = next(pd) 221 self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) 222 self.assertEqual("target", node.target) 223 self.assertEqual("data", node.data) 224 225 evt, node = next(pd) 226 self.assertEqual(pulldom.START_ELEMENT, evt) 227 self.assertEqual("p", node.tagName) 228 229 evt, node = next(pd) 230 self.assertEqual(pulldom.CHARACTERS, evt) 231 self.assertEqual("text", node.data) 232 evt, node = next(pd) 233 self.assertEqual(pulldom.END_ELEMENT, evt) 234 self.assertEqual("p", node.tagName) 235 evt, node = next(pd) 236 self.assertEqual(pulldom.END_ELEMENT, evt) 237 self.assertEqual("html", node.tagName) 238 evt, node = next(pd) 239 self.assertEqual(pulldom.END_DOCUMENT, evt) 240 241 242class SAXExerciser(object): 243 """A fake sax parser that calls some of the harder-to-reach sax methods to 244 ensure it emits the correct events""" 245 246 def setContentHandler(self, handler): 247 self._handler = handler 248 249 def parse(self, _): 250 h = self._handler 251 h.startDocument() 252 253 # The next two items ensure that items preceding the first 254 # start_element are properly stored and emitted: 255 h.comment("a comment") 256 h.processingInstruction("target", "data") 257 258 h.startElement("html", AttributesImpl({})) 259 260 h.comment("a comment") 261 h.processingInstruction("target", "data") 262 263 h.startElement("p", AttributesImpl({"class": "paraclass"})) 264 h.characters("text") 265 h.endElement("p") 266 h.endElement("html") 267 h.endDocument() 268 269 def stub(self, *args, **kwargs): 270 """Stub method. Does nothing.""" 271 pass 272 setProperty = stub 273 setFeature = stub 274 275 276class SAX2DOMExerciser(SAXExerciser): 277 """The same as SAXExerciser, but without the processing instruction and 278 comment before the root element, because S2D can"t handle it""" 279 280 def parse(self, _): 281 h = self._handler 282 h.startDocument() 283 h.startElement("html", AttributesImpl({})) 284 h.comment("a comment") 285 h.processingInstruction("target", "data") 286 h.startElement("p", AttributesImpl({"class": "paraclass"})) 287 h.characters("text") 288 h.endElement("p") 289 h.endElement("html") 290 h.endDocument() 291 292 293class SAX2DOMTestHelper(pulldom.DOMEventStream): 294 """Allows us to drive SAX2DOM from a DOMEventStream.""" 295 296 def reset(self): 297 self.pulldom = pulldom.SAX2DOM() 298 # This content handler relies on namespace support 299 self.parser.setFeature(xml.sax.handler.feature_namespaces, 1) 300 self.parser.setContentHandler(self.pulldom) 301 302 303class SAX2DOMTestCase(unittest.TestCase): 304 305 def confirm(self, test, testname="Test"): 306 self.assertTrue(test, testname) 307 308 def test_basic(self): 309 """Ensure SAX2DOM can parse from a stream.""" 310 with io.StringIO(SMALL_SAMPLE) as fin: 311 sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), 312 len(SMALL_SAMPLE)) 313 for evt, node in sd: 314 if evt == pulldom.START_ELEMENT and node.tagName == "html": 315 break 316 # Because the buffer is the same length as the XML, all the 317 # nodes should have been parsed and added: 318 self.assertGreater(len(node.childNodes), 0) 319 320 def testSAX2DOM(self): 321 """Ensure SAX2DOM expands nodes as expected.""" 322 sax2dom = pulldom.SAX2DOM() 323 sax2dom.startDocument() 324 sax2dom.startElement("doc", {}) 325 sax2dom.characters("text") 326 sax2dom.startElement("subelm", {}) 327 sax2dom.characters("text") 328 sax2dom.endElement("subelm") 329 sax2dom.characters("text") 330 sax2dom.endElement("doc") 331 sax2dom.endDocument() 332 333 doc = sax2dom.document 334 root = doc.documentElement 335 (text1, elm1, text2) = root.childNodes 336 text3 = elm1.childNodes[0] 337 338 self.assertIsNone(text1.previousSibling) 339 self.assertIs(text1.nextSibling, elm1) 340 self.assertIs(elm1.previousSibling, text1) 341 self.assertIs(elm1.nextSibling, text2) 342 self.assertIs(text2.previousSibling, elm1) 343 self.assertIsNone(text2.nextSibling) 344 self.assertIsNone(text3.previousSibling) 345 self.assertIsNone(text3.nextSibling) 346 347 self.assertIs(root.parentNode, doc) 348 self.assertIs(text1.parentNode, root) 349 self.assertIs(elm1.parentNode, root) 350 self.assertIs(text2.parentNode, root) 351 self.assertIs(text3.parentNode, elm1) 352 doc.unlink() 353 354 355if __name__ == "__main__": 356 unittest.main() 357