• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Helper functions for XML.
2
3This module has misc. helper functions for working with XML DOM nodes."""
4
5import re
6from compat import *
7
8import os
9if os.name != "java":
10    from xml.dom import minidom
11    from xml.sax import saxutils
12
13    def parseDocument(s):
14        return minidom.parseString(s)
15else:
16    from javax.xml.parsers import *
17    import java
18
19    builder = DocumentBuilderFactory.newInstance().newDocumentBuilder()
20
21    def parseDocument(s):
22        stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes())
23        return builder.parse(stream)
24
25def parseAndStripWhitespace(s):
26    try:
27        element = parseDocument(s).documentElement
28    except BaseException, e:
29        raise SyntaxError(str(e))
30    stripWhitespace(element)
31    return element
32
33#Goes through a DOM tree and removes whitespace besides child elements,
34#as long as this whitespace is correctly tab-ified
35def stripWhitespace(element, tab=0):
36    element.normalize()
37
38    lastSpacer = "\n" + ("\t"*tab)
39    spacer = lastSpacer + "\t"
40
41    #Zero children aren't allowed (i.e. <empty/>)
42    #This makes writing output simpler, and matches Canonical XML
43    if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't work in Jython
44        raise SyntaxError("Empty XML elements not allowed")
45
46    #If there's a single child, it must be text context
47    if element.childNodes.length==1:
48        if element.firstChild.nodeType == element.firstChild.TEXT_NODE:
49            #If it's an empty element, remove
50            if element.firstChild.data == lastSpacer:
51                element.removeChild(element.firstChild)
52            return
53        #If not text content, give an error
54        elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE:
55            raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
56        else:
57            raise SyntaxError("Unexpected node type in XML document")
58
59    #Otherwise there's multiple child element
60    child = element.firstChild
61    while child:
62        if child.nodeType == child.ELEMENT_NODE:
63            stripWhitespace(child, tab+1)
64            child = child.nextSibling
65        elif child.nodeType == child.TEXT_NODE:
66            if child == element.lastChild:
67                if child.data != lastSpacer:
68                    raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
69            elif child.data != spacer:
70                raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
71            next = child.nextSibling
72            element.removeChild(child)
73            child = next
74        else:
75            raise SyntaxError("Unexpected node type in XML document")
76
77
78def checkName(element, name):
79    if element.nodeType != element.ELEMENT_NODE:
80        raise SyntaxError("Missing element: '%s'" % name)
81
82    if name == None:
83        return
84
85    if element.tagName != name:
86        raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name, element.tagName))
87
88def getChild(element, index, name=None):
89    if element.nodeType != element.ELEMENT_NODE:
90        raise SyntaxError("Wrong node type in getChild()")
91
92    child = element.childNodes.item(index)
93    if child == None:
94        raise SyntaxError("Missing child: '%s'" % name)
95    checkName(child, name)
96    return child
97
98def getChildIter(element, index):
99    class ChildIter:
100        def __init__(self, element, index):
101            self.element = element
102            self.index = index
103
104        def next(self):
105            if self.index < len(self.element.childNodes):
106                retVal = self.element.childNodes.item(self.index)
107                self.index += 1
108            else:
109                retVal = None
110            return retVal
111
112        def checkEnd(self):
113            if self.index != len(self.element.childNodes):
114                raise SyntaxError("Too many elements under: '%s'" % self.element.tagName)
115    return ChildIter(element, index)
116
117def getChildOrNone(element, index):
118    if element.nodeType != element.ELEMENT_NODE:
119        raise SyntaxError("Wrong node type in getChild()")
120    child = element.childNodes.item(index)
121    return child
122
123def getLastChild(element, index, name=None):
124    if element.nodeType != element.ELEMENT_NODE:
125        raise SyntaxError("Wrong node type in getLastChild()")
126
127    child = element.childNodes.item(index)
128    if child == None:
129        raise SyntaxError("Missing child: '%s'" % name)
130    if child != element.lastChild:
131        raise SyntaxError("Too many elements under: '%s'" % element.tagName)
132    checkName(child, name)
133    return child
134
135#Regular expressions for syntax-checking attribute and element content
136nsRegEx = "http://trevp.net/cryptoID\Z"
137cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z"
138urlRegEx = "http(s)?://.{1,100}\Z"
139sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z"
140base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z"
141certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z"
142keyRegEx = "[A-Z]\Z"
143keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z"
144dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z"
145shortStringRegEx = ".{1,100}\Z"
146exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z"
147notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1
148booleanRegEx = "(true)|(false)"
149
150def getReqAttribute(element, attrName, regEx=""):
151    if element.nodeType != element.ELEMENT_NODE:
152        raise SyntaxError("Wrong node type in getReqAttribute()")
153
154    value = element.getAttribute(attrName)
155    if not value:
156        raise SyntaxError("Missing Attribute: " + attrName)
157    if not re.match(regEx, value):
158        raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value))
159    element.removeAttribute(attrName)
160    return str(value) #de-unicode it; this is needed for bsddb, for example
161
162def getAttribute(element, attrName, regEx=""):
163    if element.nodeType != element.ELEMENT_NODE:
164        raise SyntaxError("Wrong node type in getAttribute()")
165
166    value = element.getAttribute(attrName)
167    if value:
168        if not re.match(regEx, value):
169            raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value))
170        element.removeAttribute(attrName)
171        return str(value) #de-unicode it; this is needed for bsddb, for example
172
173def checkNoMoreAttributes(element):
174    if element.nodeType != element.ELEMENT_NODE:
175        raise SyntaxError("Wrong node type in checkNoMoreAttributes()")
176
177    if element.attributes.length!=0:
178        raise SyntaxError("Extra attributes on '%s'" % element.tagName)
179
180def getText(element, regEx=""):
181    textNode = element.firstChild
182    if textNode == None:
183        raise SyntaxError("Empty element '%s'" % element.tagName)
184    if textNode.nodeType != textNode.TEXT_NODE:
185        raise SyntaxError("Non-text node: '%s'" % element.tagName)
186    if not re.match(regEx, textNode.data):
187        raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, textNode.data))
188    return str(textNode.data) #de-unicode it; this is needed for bsddb, for example
189
190#Function for adding tabs to a string
191def indent(s, steps, ch="\t"):
192    tabs = ch*steps
193    if s[-1] != "\n":
194        s = tabs + s.replace("\n", "\n"+tabs)
195    else:
196        s = tabs + s.replace("\n", "\n"+tabs)
197        s = s[ : -len(tabs)]
198    return s
199
200def escape(s):
201    return saxutils.escape(s)
202