• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import io
2import unittest
3import xml.sax
4
5from xml.sax.xmlreader import AttributesImpl
6from xml.sax.handler import feature_external_ges
7from xml.dom import pulldom
8
9from test.support import findfile
10
11
12tstfile = findfile("test.xml", subdir="xmltestdata")
13
14# A handy XML snippet, containing attributes, a namespace prefix, and a
15# self-closing tag:
16SMALL_SAMPLE = """<?xml version="1.0"?>
17<html xmlns="http://www.w3.org/1999/xhtml" xmlns:xdc="http://www.xml.com/books">
18<!-- A comment -->
19<title>Introduction to XSL</title>
20<hr/>
21<p><xdc:author xdc:attrib="prefixed attribute" attrib="other attrib">A. Namespace</xdc:author></p>
22</html>"""
23
24
25class PullDOMTestCase(unittest.TestCase):
26
27    def test_parse(self):
28        """Minimal test of DOMEventStream.parse()"""
29
30        # This just tests that parsing from a stream works. Actual parser
31        # semantics are tested using parseString with a more focused XML
32        # fragment.
33
34        # Test with a filename:
35        handler = pulldom.parse(tstfile)
36        self.addCleanup(handler.stream.close)
37        list(handler)
38
39        # Test with a file object:
40        with open(tstfile, "rb") as fin:
41            list(pulldom.parse(fin))
42
43    def test_parse_semantics(self):
44        """Test DOMEventStream parsing semantics."""
45
46        items = pulldom.parseString(SMALL_SAMPLE)
47        evt, node = next(items)
48        # Just check the node is a Document:
49        self.assertTrue(hasattr(node, "createElement"))
50        self.assertEqual(pulldom.START_DOCUMENT, evt)
51        evt, node = next(items)
52        self.assertEqual(pulldom.START_ELEMENT, evt)
53        self.assertEqual("html", node.tagName)
54        self.assertEqual(2, len(node.attributes))
55        self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value,
56              "http://www.xml.com/books")
57        evt, node = next(items)
58        self.assertEqual(pulldom.CHARACTERS, evt) # Line break
59        evt, node = next(items)
60        # XXX - A comment should be reported here!
61        # self.assertEqual(pulldom.COMMENT, evt)
62        # Line break after swallowed comment:
63        self.assertEqual(pulldom.CHARACTERS, evt)
64        evt, node = next(items)
65        self.assertEqual("title", node.tagName)
66        title_node = node
67        evt, node = next(items)
68        self.assertEqual(pulldom.CHARACTERS, evt)
69        self.assertEqual("Introduction to XSL", node.data)
70        evt, node = next(items)
71        self.assertEqual(pulldom.END_ELEMENT, evt)
72        self.assertEqual("title", node.tagName)
73        self.assertTrue(title_node is node)
74        evt, node = next(items)
75        self.assertEqual(pulldom.CHARACTERS, evt)
76        evt, node = next(items)
77        self.assertEqual(pulldom.START_ELEMENT, evt)
78        self.assertEqual("hr", node.tagName)
79        evt, node = next(items)
80        self.assertEqual(pulldom.END_ELEMENT, evt)
81        self.assertEqual("hr", node.tagName)
82        evt, node = next(items)
83        self.assertEqual(pulldom.CHARACTERS, evt)
84        evt, node = next(items)
85        self.assertEqual(pulldom.START_ELEMENT, evt)
86        self.assertEqual("p", node.tagName)
87        evt, node = next(items)
88        self.assertEqual(pulldom.START_ELEMENT, evt)
89        self.assertEqual("xdc:author", node.tagName)
90        evt, node = next(items)
91        self.assertEqual(pulldom.CHARACTERS, evt)
92        evt, node = next(items)
93        self.assertEqual(pulldom.END_ELEMENT, evt)
94        self.assertEqual("xdc:author", node.tagName)
95        evt, node = next(items)
96        self.assertEqual(pulldom.END_ELEMENT, evt)
97        evt, node = next(items)
98        self.assertEqual(pulldom.CHARACTERS, evt)
99        evt, node = next(items)
100        self.assertEqual(pulldom.END_ELEMENT, evt)
101        # XXX No END_DOCUMENT item is ever obtained:
102        #evt, node = next(items)
103        #self.assertEqual(pulldom.END_DOCUMENT, evt)
104
105    def test_expandItem(self):
106        """Ensure expandItem works as expected."""
107        items = pulldom.parseString(SMALL_SAMPLE)
108        # Loop through the nodes until we get to a "title" start tag:
109        for evt, item in items:
110            if evt == pulldom.START_ELEMENT and item.tagName == "title":
111                items.expandNode(item)
112                self.assertEqual(1, len(item.childNodes))
113                break
114        else:
115            self.fail("No \"title\" element detected in SMALL_SAMPLE!")
116        # Loop until we get to the next start-element:
117        for evt, node in items:
118            if evt == pulldom.START_ELEMENT:
119                break
120        self.assertEqual("hr", node.tagName,
121            "expandNode did not leave DOMEventStream in the correct state.")
122        # Attempt to expand a standalone element:
123        items.expandNode(node)
124        self.assertEqual(next(items)[0], pulldom.CHARACTERS)
125        evt, node = next(items)
126        self.assertEqual(node.tagName, "p")
127        items.expandNode(node)
128        next(items) # Skip character data
129        evt, node = next(items)
130        self.assertEqual(node.tagName, "html")
131        with self.assertRaises(StopIteration):
132            next(items)
133        items.clear()
134        self.assertIsNone(items.parser)
135        self.assertIsNone(items.stream)
136
137    @unittest.expectedFailure
138    def test_comment(self):
139        """PullDOM does not receive "comment" events."""
140        items = pulldom.parseString(SMALL_SAMPLE)
141        for evt, _ in items:
142            if evt == pulldom.COMMENT:
143                break
144        else:
145            self.fail("No comment was encountered")
146
147    @unittest.expectedFailure
148    def test_end_document(self):
149        """PullDOM does not receive "end-document" events."""
150        items = pulldom.parseString(SMALL_SAMPLE)
151        # Read all of the nodes up to and including </html>:
152        for evt, node in items:
153            if evt == pulldom.END_ELEMENT and node.tagName == "html":
154                break
155        try:
156            # Assert that the next node is END_DOCUMENT:
157            evt, node = next(items)
158            self.assertEqual(pulldom.END_DOCUMENT, evt)
159        except StopIteration:
160            self.fail(
161                "Ran out of events, but should have received END_DOCUMENT")
162
163    def test_getitem_deprecation(self):
164        parser = pulldom.parseString(SMALL_SAMPLE)
165        with self.assertWarnsRegex(DeprecationWarning,
166                                   r'Use iterator protocol instead'):
167            # This should have returned 'END_ELEMENT'.
168            self.assertEqual(parser[-1][0], pulldom.START_DOCUMENT)
169
170    def test_external_ges_default(self):
171        parser = pulldom.parseString(SMALL_SAMPLE)
172        saxparser = parser.parser
173        ges = saxparser.getFeature(feature_external_ges)
174        self.assertEqual(ges, False)
175
176
177class ThoroughTestCase(unittest.TestCase):
178    """Test the hard-to-reach parts of pulldom."""
179
180    def test_thorough_parse(self):
181        """Test some of the hard-to-reach parts of PullDOM."""
182        self._test_thorough(pulldom.parse(None, parser=SAXExerciser()))
183
184    @unittest.expectedFailure
185    def test_sax2dom_fail(self):
186        """SAX2DOM can"t handle a PI before the root element."""
187        pd = SAX2DOMTestHelper(None, SAXExerciser(), 12)
188        self._test_thorough(pd)
189
190    def test_thorough_sax2dom(self):
191        """Test some of the hard-to-reach parts of SAX2DOM."""
192        pd = SAX2DOMTestHelper(None, SAX2DOMExerciser(), 12)
193        self._test_thorough(pd, False)
194
195    def _test_thorough(self, pd, before_root=True):
196        """Test some of the hard-to-reach parts of the parser, using a mock
197        parser."""
198
199        evt, node = next(pd)
200        self.assertEqual(pulldom.START_DOCUMENT, evt)
201        # Just check the node is a Document:
202        self.assertTrue(hasattr(node, "createElement"))
203
204        if before_root:
205            evt, node = next(pd)
206            self.assertEqual(pulldom.COMMENT, evt)
207            self.assertEqual("a comment", node.data)
208            evt, node = next(pd)
209            self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
210            self.assertEqual("target", node.target)
211            self.assertEqual("data", node.data)
212
213        evt, node = next(pd)
214        self.assertEqual(pulldom.START_ELEMENT, evt)
215        self.assertEqual("html", node.tagName)
216
217        evt, node = next(pd)
218        self.assertEqual(pulldom.COMMENT, evt)
219        self.assertEqual("a comment", node.data)
220        evt, node = next(pd)
221        self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt)
222        self.assertEqual("target", node.target)
223        self.assertEqual("data", node.data)
224
225        evt, node = next(pd)
226        self.assertEqual(pulldom.START_ELEMENT, evt)
227        self.assertEqual("p", node.tagName)
228
229        evt, node = next(pd)
230        self.assertEqual(pulldom.CHARACTERS, evt)
231        self.assertEqual("text", node.data)
232        evt, node = next(pd)
233        self.assertEqual(pulldom.END_ELEMENT, evt)
234        self.assertEqual("p", node.tagName)
235        evt, node = next(pd)
236        self.assertEqual(pulldom.END_ELEMENT, evt)
237        self.assertEqual("html", node.tagName)
238        evt, node = next(pd)
239        self.assertEqual(pulldom.END_DOCUMENT, evt)
240
241
242class SAXExerciser(object):
243    """A fake sax parser that calls some of the harder-to-reach sax methods to
244    ensure it emits the correct events"""
245
246    def setContentHandler(self, handler):
247        self._handler = handler
248
249    def parse(self, _):
250        h = self._handler
251        h.startDocument()
252
253        # The next two items ensure that items preceding the first
254        # start_element are properly stored and emitted:
255        h.comment("a comment")
256        h.processingInstruction("target", "data")
257
258        h.startElement("html", AttributesImpl({}))
259
260        h.comment("a comment")
261        h.processingInstruction("target", "data")
262
263        h.startElement("p", AttributesImpl({"class": "paraclass"}))
264        h.characters("text")
265        h.endElement("p")
266        h.endElement("html")
267        h.endDocument()
268
269    def stub(self, *args, **kwargs):
270        """Stub method. Does nothing."""
271        pass
272    setProperty = stub
273    setFeature = stub
274
275
276class SAX2DOMExerciser(SAXExerciser):
277    """The same as SAXExerciser, but without the processing instruction and
278    comment before the root element, because S2D can"t handle it"""
279
280    def parse(self, _):
281        h = self._handler
282        h.startDocument()
283        h.startElement("html", AttributesImpl({}))
284        h.comment("a comment")
285        h.processingInstruction("target", "data")
286        h.startElement("p", AttributesImpl({"class": "paraclass"}))
287        h.characters("text")
288        h.endElement("p")
289        h.endElement("html")
290        h.endDocument()
291
292
293class SAX2DOMTestHelper(pulldom.DOMEventStream):
294    """Allows us to drive SAX2DOM from a DOMEventStream."""
295
296    def reset(self):
297        self.pulldom = pulldom.SAX2DOM()
298        # This content handler relies on namespace support
299        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
300        self.parser.setContentHandler(self.pulldom)
301
302
303class SAX2DOMTestCase(unittest.TestCase):
304
305    def confirm(self, test, testname="Test"):
306        self.assertTrue(test, testname)
307
308    def test_basic(self):
309        """Ensure SAX2DOM can parse from a stream."""
310        with io.StringIO(SMALL_SAMPLE) as fin:
311            sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(),
312                                   len(SMALL_SAMPLE))
313            for evt, node in sd:
314                if evt == pulldom.START_ELEMENT and node.tagName == "html":
315                    break
316            # Because the buffer is the same length as the XML, all the
317            # nodes should have been parsed and added:
318            self.assertGreater(len(node.childNodes), 0)
319
320    def testSAX2DOM(self):
321        """Ensure SAX2DOM expands nodes as expected."""
322        sax2dom = pulldom.SAX2DOM()
323        sax2dom.startDocument()
324        sax2dom.startElement("doc", {})
325        sax2dom.characters("text")
326        sax2dom.startElement("subelm", {})
327        sax2dom.characters("text")
328        sax2dom.endElement("subelm")
329        sax2dom.characters("text")
330        sax2dom.endElement("doc")
331        sax2dom.endDocument()
332
333        doc = sax2dom.document
334        root = doc.documentElement
335        (text1, elm1, text2) = root.childNodes
336        text3 = elm1.childNodes[0]
337
338        self.assertIsNone(text1.previousSibling)
339        self.assertIs(text1.nextSibling, elm1)
340        self.assertIs(elm1.previousSibling, text1)
341        self.assertIs(elm1.nextSibling, text2)
342        self.assertIs(text2.previousSibling, elm1)
343        self.assertIsNone(text2.nextSibling)
344        self.assertIsNone(text3.previousSibling)
345        self.assertIsNone(text3.nextSibling)
346
347        self.assertIs(root.parentNode, doc)
348        self.assertIs(text1.parentNode, root)
349        self.assertIs(elm1.parentNode, root)
350        self.assertIs(text2.parentNode, root)
351        self.assertIs(text3.parentNode, elm1)
352        doc.unlink()
353
354
355if __name__ == "__main__":
356    unittest.main()
357