1"""Helper functions for XML. 2 3This module has misc. helper functions for working with XML DOM nodes.""" 4 5import re 6from compat import * 7 8import os 9if os.name != "java": 10 from xml.dom import minidom 11 from xml.sax import saxutils 12 13 def parseDocument(s): 14 return minidom.parseString(s) 15else: 16 from javax.xml.parsers import * 17 import java 18 19 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder() 20 21 def parseDocument(s): 22 stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes()) 23 return builder.parse(stream) 24 25def parseAndStripWhitespace(s): 26 try: 27 element = parseDocument(s).documentElement 28 except BaseException, e: 29 raise SyntaxError(str(e)) 30 stripWhitespace(element) 31 return element 32 33#Goes through a DOM tree and removes whitespace besides child elements, 34#as long as this whitespace is correctly tab-ified 35def stripWhitespace(element, tab=0): 36 element.normalize() 37 38 lastSpacer = "\n" + ("\t"*tab) 39 spacer = lastSpacer + "\t" 40 41 #Zero children aren't allowed (i.e. <empty/>) 42 #This makes writing output simpler, and matches Canonical XML 43 if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't work in Jython 44 raise SyntaxError("Empty XML elements not allowed") 45 46 #If there's a single child, it must be text context 47 if element.childNodes.length==1: 48 if element.firstChild.nodeType == element.firstChild.TEXT_NODE: 49 #If it's an empty element, remove 50 if element.firstChild.data == lastSpacer: 51 element.removeChild(element.firstChild) 52 return 53 #If not text content, give an error 54 elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE: 55 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) 56 else: 57 raise SyntaxError("Unexpected node type in XML document") 58 59 #Otherwise there's multiple child element 60 child = element.firstChild 61 while child: 62 if child.nodeType == child.ELEMENT_NODE: 63 stripWhitespace(child, tab+1) 64 child = child.nextSibling 65 elif child.nodeType == child.TEXT_NODE: 66 if child == element.lastChild: 67 if child.data != lastSpacer: 68 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) 69 elif child.data != spacer: 70 raise SyntaxError("Bad whitespace under '%s'" % element.tagName) 71 next = child.nextSibling 72 element.removeChild(child) 73 child = next 74 else: 75 raise SyntaxError("Unexpected node type in XML document") 76 77 78def checkName(element, name): 79 if element.nodeType != element.ELEMENT_NODE: 80 raise SyntaxError("Missing element: '%s'" % name) 81 82 if name == None: 83 return 84 85 if element.tagName != name: 86 raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name, element.tagName)) 87 88def getChild(element, index, name=None): 89 if element.nodeType != element.ELEMENT_NODE: 90 raise SyntaxError("Wrong node type in getChild()") 91 92 child = element.childNodes.item(index) 93 if child == None: 94 raise SyntaxError("Missing child: '%s'" % name) 95 checkName(child, name) 96 return child 97 98def getChildIter(element, index): 99 class ChildIter: 100 def __init__(self, element, index): 101 self.element = element 102 self.index = index 103 104 def next(self): 105 if self.index < len(self.element.childNodes): 106 retVal = self.element.childNodes.item(self.index) 107 self.index += 1 108 else: 109 retVal = None 110 return retVal 111 112 def checkEnd(self): 113 if self.index != len(self.element.childNodes): 114 raise SyntaxError("Too many elements under: '%s'" % self.element.tagName) 115 return ChildIter(element, index) 116 117def getChildOrNone(element, index): 118 if element.nodeType != element.ELEMENT_NODE: 119 raise SyntaxError("Wrong node type in getChild()") 120 child = element.childNodes.item(index) 121 return child 122 123def getLastChild(element, index, name=None): 124 if element.nodeType != element.ELEMENT_NODE: 125 raise SyntaxError("Wrong node type in getLastChild()") 126 127 child = element.childNodes.item(index) 128 if child == None: 129 raise SyntaxError("Missing child: '%s'" % name) 130 if child != element.lastChild: 131 raise SyntaxError("Too many elements under: '%s'" % element.tagName) 132 checkName(child, name) 133 return child 134 135#Regular expressions for syntax-checking attribute and element content 136nsRegEx = "http://trevp.net/cryptoID\Z" 137cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z" 138urlRegEx = "http(s)?://.{1,100}\Z" 139sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z" 140base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z" 141certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z" 142keyRegEx = "[A-Z]\Z" 143keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z" 144dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z" 145shortStringRegEx = ".{1,100}\Z" 146exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z" 147notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1 148booleanRegEx = "(true)|(false)" 149 150def getReqAttribute(element, attrName, regEx=""): 151 if element.nodeType != element.ELEMENT_NODE: 152 raise SyntaxError("Wrong node type in getReqAttribute()") 153 154 value = element.getAttribute(attrName) 155 if not value: 156 raise SyntaxError("Missing Attribute: " + attrName) 157 if not re.match(regEx, value): 158 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value)) 159 element.removeAttribute(attrName) 160 return str(value) #de-unicode it; this is needed for bsddb, for example 161 162def getAttribute(element, attrName, regEx=""): 163 if element.nodeType != element.ELEMENT_NODE: 164 raise SyntaxError("Wrong node type in getAttribute()") 165 166 value = element.getAttribute(attrName) 167 if value: 168 if not re.match(regEx, value): 169 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value)) 170 element.removeAttribute(attrName) 171 return str(value) #de-unicode it; this is needed for bsddb, for example 172 173def checkNoMoreAttributes(element): 174 if element.nodeType != element.ELEMENT_NODE: 175 raise SyntaxError("Wrong node type in checkNoMoreAttributes()") 176 177 if element.attributes.length!=0: 178 raise SyntaxError("Extra attributes on '%s'" % element.tagName) 179 180def getText(element, regEx=""): 181 textNode = element.firstChild 182 if textNode == None: 183 raise SyntaxError("Empty element '%s'" % element.tagName) 184 if textNode.nodeType != textNode.TEXT_NODE: 185 raise SyntaxError("Non-text node: '%s'" % element.tagName) 186 if not re.match(regEx, textNode.data): 187 raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, textNode.data)) 188 return str(textNode.data) #de-unicode it; this is needed for bsddb, for example 189 190#Function for adding tabs to a string 191def indent(s, steps, ch="\t"): 192 tabs = ch*steps 193 if s[-1] != "\n": 194 s = tabs + s.replace("\n", "\n"+tabs) 195 else: 196 s = tabs + s.replace("\n", "\n"+tabs) 197 s = s[ : -len(tabs)] 198 return s 199 200def escape(s): 201 return saxutils.escape(s) 202