• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1from __future__ import absolute_import, division, unicode_literals
2from six import text_type
3
4from ..constants import scopingElements, tableInsertModeElements, namespaces
5
6# The scope markers are inserted when entering object elements,
7# marquees, table cells, and table captions, and are used to prevent formatting
8# from "leaking" into tables, object elements, and marquees.
9Marker = None
10
11listElementsMap = {
12    None: (frozenset(scopingElements), False),
13    "button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
14    "list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
15                                              (namespaces["html"], "ul")])), False),
16    "table": (frozenset([(namespaces["html"], "html"),
17                         (namespaces["html"], "table")]), False),
18    "select": (frozenset([(namespaces["html"], "optgroup"),
19                          (namespaces["html"], "option")]), True)
20}
21
22
23class Node(object):
24    def __init__(self, name):
25        """Node representing an item in the tree.
26        name - The tag name associated with the node
27        parent - The parent of the current node (or None for the document node)
28        value - The value of the current node (applies to text nodes and
29        comments
30        attributes - a dict holding name, value pairs for attributes of the node
31        childNodes - a list of child nodes of the current node. This must
32        include all elements but not necessarily other node types
33        _flags - A list of miscellaneous flags that can be set on the node
34        """
35        self.name = name
36        self.parent = None
37        self.value = None
38        self.attributes = {}
39        self.childNodes = []
40        self._flags = []
41
42    def __str__(self):
43        attributesStr = " ".join(["%s=\"%s\"" % (name, value)
44                                  for name, value in
45                                  self.attributes.items()])
46        if attributesStr:
47            return "<%s %s>" % (self.name, attributesStr)
48        else:
49            return "<%s>" % (self.name)
50
51    def __repr__(self):
52        return "<%s>" % (self.name)
53
54    def appendChild(self, node):
55        """Insert node as a child of the current node
56        """
57        raise NotImplementedError
58
59    def insertText(self, data, insertBefore=None):
60        """Insert data as text in the current node, positioned before the
61        start of node insertBefore or to the end of the node's text.
62        """
63        raise NotImplementedError
64
65    def insertBefore(self, node, refNode):
66        """Insert node as a child of the current node, before refNode in the
67        list of child nodes. Raises ValueError if refNode is not a child of
68        the current node"""
69        raise NotImplementedError
70
71    def removeChild(self, node):
72        """Remove node from the children of the current node
73        """
74        raise NotImplementedError
75
76    def reparentChildren(self, newParent):
77        """Move all the children of the current node to newParent.
78        This is needed so that trees that don't store text as nodes move the
79        text in the correct way
80        """
81        # XXX - should this method be made more general?
82        for child in self.childNodes:
83            newParent.appendChild(child)
84        self.childNodes = []
85
86    def cloneNode(self):
87        """Return a shallow copy of the current node i.e. a node with the same
88        name and attributes but with no parent or child nodes
89        """
90        raise NotImplementedError
91
92    def hasContent(self):
93        """Return true if the node has children or text, false otherwise
94        """
95        raise NotImplementedError
96
97
98class ActiveFormattingElements(list):
99    def append(self, node):
100        equalCount = 0
101        if node != Marker:
102            for element in self[::-1]:
103                if element == Marker:
104                    break
105                if self.nodesEqual(element, node):
106                    equalCount += 1
107                if equalCount == 3:
108                    self.remove(element)
109                    break
110        list.append(self, node)
111
112    def nodesEqual(self, node1, node2):
113        if not node1.nameTuple == node2.nameTuple:
114            return False
115
116        if not node1.attributes == node2.attributes:
117            return False
118
119        return True
120
121
122class TreeBuilder(object):
123    """Base treebuilder implementation
124    documentClass - the class to use for the bottommost node of a document
125    elementClass - the class to use for HTML Elements
126    commentClass - the class to use for comments
127    doctypeClass - the class to use for doctypes
128    """
129
130    # Document class
131    documentClass = None
132
133    # The class to use for creating a node
134    elementClass = None
135
136    # The class to use for creating comments
137    commentClass = None
138
139    # The class to use for creating doctypes
140    doctypeClass = None
141
142    # Fragment class
143    fragmentClass = None
144
145    def __init__(self, namespaceHTMLElements):
146        if namespaceHTMLElements:
147            self.defaultNamespace = "http://www.w3.org/1999/xhtml"
148        else:
149            self.defaultNamespace = None
150        self.reset()
151
152    def reset(self):
153        self.openElements = []
154        self.activeFormattingElements = ActiveFormattingElements()
155
156        # XXX - rename these to headElement, formElement
157        self.headPointer = None
158        self.formPointer = None
159
160        self.insertFromTable = False
161
162        self.document = self.documentClass()
163
164    def elementInScope(self, target, variant=None):
165
166        # If we pass a node in we match that. if we pass a string
167        # match any node with that name
168        exactNode = hasattr(target, "nameTuple")
169
170        listElements, invert = listElementsMap[variant]
171
172        for node in reversed(self.openElements):
173            if (node.name == target and not exactNode or
174                    node == target and exactNode):
175                return True
176            elif (invert ^ (node.nameTuple in listElements)):
177                return False
178
179        assert False  # We should never reach this point
180
181    def reconstructActiveFormattingElements(self):
182        # Within this algorithm the order of steps described in the
183        # specification is not quite the same as the order of steps in the
184        # code. It should still do the same though.
185
186        # Step 1: stop the algorithm when there's nothing to do.
187        if not self.activeFormattingElements:
188            return
189
190        # Step 2 and step 3: we start with the last element. So i is -1.
191        i = len(self.activeFormattingElements) - 1
192        entry = self.activeFormattingElements[i]
193        if entry == Marker or entry in self.openElements:
194            return
195
196        # Step 6
197        while entry != Marker and entry not in self.openElements:
198            if i == 0:
199                # This will be reset to 0 below
200                i = -1
201                break
202            i -= 1
203            # Step 5: let entry be one earlier in the list.
204            entry = self.activeFormattingElements[i]
205
206        while True:
207            # Step 7
208            i += 1
209
210            # Step 8
211            entry = self.activeFormattingElements[i]
212            clone = entry.cloneNode()  # Mainly to get a new copy of the attributes
213
214            # Step 9
215            element = self.insertElement({"type": "StartTag",
216                                          "name": clone.name,
217                                          "namespace": clone.namespace,
218                                          "data": clone.attributes})
219
220            # Step 10
221            self.activeFormattingElements[i] = element
222
223            # Step 11
224            if element == self.activeFormattingElements[-1]:
225                break
226
227    def clearActiveFormattingElements(self):
228        entry = self.activeFormattingElements.pop()
229        while self.activeFormattingElements and entry != Marker:
230            entry = self.activeFormattingElements.pop()
231
232    def elementInActiveFormattingElements(self, name):
233        """Check if an element exists between the end of the active
234        formatting elements and the last marker. If it does, return it, else
235        return false"""
236
237        for item in self.activeFormattingElements[::-1]:
238            # Check for Marker first because if it's a Marker it doesn't have a
239            # name attribute.
240            if item == Marker:
241                break
242            elif item.name == name:
243                return item
244        return False
245
246    def insertRoot(self, token):
247        element = self.createElement(token)
248        self.openElements.append(element)
249        self.document.appendChild(element)
250
251    def insertDoctype(self, token):
252        name = token["name"]
253        publicId = token["publicId"]
254        systemId = token["systemId"]
255
256        doctype = self.doctypeClass(name, publicId, systemId)
257        self.document.appendChild(doctype)
258
259    def insertComment(self, token, parent=None):
260        if parent is None:
261            parent = self.openElements[-1]
262        parent.appendChild(self.commentClass(token["data"]))
263
264    def createElement(self, token):
265        """Create an element but don't insert it anywhere"""
266        name = token["name"]
267        namespace = token.get("namespace", self.defaultNamespace)
268        element = self.elementClass(name, namespace)
269        element.attributes = token["data"]
270        return element
271
272    def _getInsertFromTable(self):
273        return self._insertFromTable
274
275    def _setInsertFromTable(self, value):
276        """Switch the function used to insert an element from the
277        normal one to the misnested table one and back again"""
278        self._insertFromTable = value
279        if value:
280            self.insertElement = self.insertElementTable
281        else:
282            self.insertElement = self.insertElementNormal
283
284    insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
285
286    def insertElementNormal(self, token):
287        name = token["name"]
288        assert isinstance(name, text_type), "Element %s not unicode" % name
289        namespace = token.get("namespace", self.defaultNamespace)
290        element = self.elementClass(name, namespace)
291        element.attributes = token["data"]
292        self.openElements[-1].appendChild(element)
293        self.openElements.append(element)
294        return element
295
296    def insertElementTable(self, token):
297        """Create an element and insert it into the tree"""
298        element = self.createElement(token)
299        if self.openElements[-1].name not in tableInsertModeElements:
300            return self.insertElementNormal(token)
301        else:
302            # We should be in the InTable mode. This means we want to do
303            # special magic element rearranging
304            parent, insertBefore = self.getTableMisnestedNodePosition()
305            if insertBefore is None:
306                parent.appendChild(element)
307            else:
308                parent.insertBefore(element, insertBefore)
309            self.openElements.append(element)
310        return element
311
312    def insertText(self, data, parent=None):
313        """Insert text data."""
314        if parent is None:
315            parent = self.openElements[-1]
316
317        if (not self.insertFromTable or (self.insertFromTable and
318                                         self.openElements[-1].name
319                                         not in tableInsertModeElements)):
320            parent.insertText(data)
321        else:
322            # We should be in the InTable mode. This means we want to do
323            # special magic element rearranging
324            parent, insertBefore = self.getTableMisnestedNodePosition()
325            parent.insertText(data, insertBefore)
326
327    def getTableMisnestedNodePosition(self):
328        """Get the foster parent element, and sibling to insert before
329        (or None) when inserting a misnested table node"""
330        # The foster parent element is the one which comes before the most
331        # recently opened table element
332        # XXX - this is really inelegant
333        lastTable = None
334        fosterParent = None
335        insertBefore = None
336        for elm in self.openElements[::-1]:
337            if elm.name == "table":
338                lastTable = elm
339                break
340        if lastTable:
341            # XXX - we should really check that this parent is actually a
342            # node here
343            if lastTable.parent:
344                fosterParent = lastTable.parent
345                insertBefore = lastTable
346            else:
347                fosterParent = self.openElements[
348                    self.openElements.index(lastTable) - 1]
349        else:
350            fosterParent = self.openElements[0]
351        return fosterParent, insertBefore
352
353    def generateImpliedEndTags(self, exclude=None):
354        name = self.openElements[-1].name
355        # XXX td, th and tr are not actually needed
356        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
357                and name != exclude):
358            self.openElements.pop()
359            # XXX This is not entirely what the specification says. We should
360            # investigate it more closely.
361            self.generateImpliedEndTags(exclude)
362
363    def getDocument(self):
364        "Return the final tree"
365        return self.document
366
367    def getFragment(self):
368        "Return the final fragment"
369        # assert self.innerHTML
370        fragment = self.fragmentClass()
371        self.openElements[0].reparentChildren(fragment)
372        return fragment
373
374    def testSerializer(self, node):
375        """Serialize the subtree of node in the format required by unit tests
376        node - the node from which to start serializing"""
377        raise NotImplementedError
378