1"""ANTLR3 runtime package""" 2 3# begin[licence] 4# 5# [The "BSD licence"] 6# Copyright (c) 2005-2012 Terence Parr 7# All rights reserved. 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions 11# are met: 12# 1. Redistributions of source code must retain the above copyright 13# notice, this list of conditions and the following disclaimer. 14# 2. Redistributions in binary form must reproduce the above copyright 15# notice, this list of conditions and the following disclaimer in the 16# documentation and/or other materials provided with the distribution. 17# 3. The name of the author may not be used to endorse or promote products 18# derived from this software without specific prior written permission. 19# 20# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30# 31# end[licence] 32 33from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE 34 35############################################################################ 36# 37# basic token interface 38# 39############################################################################ 40 41class Token(object): 42 """@brief Abstract token baseclass.""" 43 44 TOKEN_NAMES_MAP = None 45 46 @classmethod 47 def registerTokenNamesMap(cls, tokenNamesMap): 48 """@brief Store a mapping from token type to token name. 49 50 This enables token.typeName to give something more meaningful 51 than, e.g., '6'. 52 """ 53 cls.TOKEN_NAMES_MAP = tokenNamesMap 54 cls.TOKEN_NAMES_MAP[EOF] = "EOF" 55 56 def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, 57 index=-1, line=0, charPositionInLine=-1, input=None): 58 # We use -1 for index and charPositionInLine as an invalid index 59 self._type = type 60 self._channel = channel 61 self._text = text 62 self._index = index 63 self._line = 0 64 self._charPositionInLine = charPositionInLine 65 self.input = input 66 67 # To override a property, you'll need to override both the getter and setter. 68 @property 69 def text(self): 70 return self._text 71 72 @text.setter 73 def text(self, value): 74 self._text = value 75 76 77 @property 78 def type(self): 79 return self._type 80 81 @type.setter 82 def type(self, value): 83 self._type = value 84 85 # For compatibility 86 def getType(self): 87 return self._type 88 89 @property 90 def typeName(self): 91 if self.TOKEN_NAMES_MAP: 92 return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE") 93 else: 94 return str(self._type) 95 96 @property 97 def line(self): 98 """Lines are numbered 1..n.""" 99 return self._line 100 101 @line.setter 102 def line(self, value): 103 self._line = value 104 105 106 @property 107 def charPositionInLine(self): 108 """Columns are numbered 0..n-1.""" 109 return self._charPositionInLine 110 111 @charPositionInLine.setter 112 def charPositionInLine(self, pos): 113 self._charPositionInLine = pos 114 115 116 @property 117 def channel(self): 118 return self._channel 119 120 @channel.setter 121 def channel(self, value): 122 self._channel = value 123 124 125 @property 126 def index(self): 127 """ 128 An index from 0..n-1 of the token object in the input stream. 129 This must be valid in order to use the ANTLRWorks debugger. 130 """ 131 return self._index 132 133 @index.setter 134 def index(self, value): 135 self._index = value 136 137 138 def getInputStream(self): 139 """@brief From what character stream was this token created. 140 141 You don't have to implement but it's nice to know where a Token 142 comes from if you have include files etc... on the input.""" 143 144 raise NotImplementedError 145 146 def setInputStream(self, input): 147 """@brief From what character stream was this token created. 148 149 You don't have to implement but it's nice to know where a Token 150 comes from if you have include files etc... on the input.""" 151 152 raise NotImplementedError 153 154 155############################################################################ 156# 157# token implementations 158# 159# Token 160# +- CommonToken 161# \- ClassicToken 162# 163############################################################################ 164 165class CommonToken(Token): 166 """@brief Basic token implementation. 167 168 This implementation does not copy the text from the input stream upon 169 creation, but keeps start/stop pointers into the stream to avoid 170 unnecessary copy operations. 171 172 """ 173 174 def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, 175 input=None, start=None, stop=None, oldToken=None): 176 177 if oldToken: 178 super().__init__(oldToken.type, oldToken.channel, oldToken.text, 179 oldToken.index, oldToken.line, 180 oldToken.charPositionInLine, oldToken.input) 181 if isinstance(oldToken, CommonToken): 182 self.start = oldToken.start 183 self.stop = oldToken.stop 184 else: 185 self.start = start 186 self.stop = stop 187 188 else: 189 super().__init__(type=type, channel=channel, input=input) 190 191 # We need to be able to change the text once in a while. If 192 # this is non-null, then getText should return this. Note that 193 # start/stop are not affected by changing this. 194 self._text = text 195 196 # The char position into the input buffer where this token starts 197 self.start = start 198 199 # The char position into the input buffer where this token stops 200 # This is the index of the last char, *not* the index after it! 201 self.stop = stop 202 203 204 @property 205 def text(self): 206 # Could be the empty string, and we want to return that. 207 if self._text is not None: 208 return self._text 209 210 if not self.input: 211 return None 212 213 if self.start < self.input.size() and self.stop < self.input.size(): 214 return self.input.substring(self.start, self.stop) 215 216 return '<EOF>' 217 218 @text.setter 219 def text(self, value): 220 """ 221 Override the text for this token. getText() will return this text 222 rather than pulling from the buffer. Note that this does not mean 223 that start/stop indexes are not valid. It means that that input 224 was converted to a new string in the token object. 225 """ 226 self._text = value 227 228 229 def getInputStream(self): 230 return self.input 231 232 def setInputStream(self, input): 233 self.input = input 234 235 236 def __str__(self): 237 if self.type == EOF: 238 return "<EOF>" 239 240 channelStr = "" 241 if self.channel > 0: 242 channelStr = ",channel=" + str(self.channel) 243 244 txt = self.text 245 if txt: 246 # Put 2 backslashes in front of each character 247 txt = txt.replace("\n", r"\\n") 248 txt = txt.replace("\r", r"\\r") 249 txt = txt.replace("\t", r"\\t") 250 else: 251 txt = "<no text>" 252 253 return ("[@{0.index},{0.start}:{0.stop}={txt!r}," 254 "<{0.typeName}>{channelStr}," 255 "{0.line}:{0.charPositionInLine}]" 256 .format(self, txt=txt, channelStr=channelStr)) 257 258 259class ClassicToken(Token): 260 """@brief Alternative token implementation. 261 262 A Token object like we'd use in ANTLR 2.x; has an actual string created 263 and associated with this object. These objects are needed for imaginary 264 tree nodes that have payload objects. We need to create a Token object 265 that has a string; the tree node will point at this token. CommonToken 266 has indexes into a char stream and hence cannot be used to introduce 267 new strings. 268 """ 269 270 def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, 271 oldToken=None): 272 if oldToken: 273 super().__init__(type=oldToken.type, channel=oldToken.channel, 274 text=oldToken.text, line=oldToken.line, 275 charPositionInLine=oldToken.charPositionInLine) 276 277 else: 278 super().__init__(type=type, channel=channel, text=text, 279 index=None, line=None, charPositionInLine=None) 280 281 282 def getInputStream(self): 283 return None 284 285 def setInputStream(self, input): 286 pass 287 288 289 def toString(self): 290 channelStr = "" 291 if self.channel > 0: 292 channelStr = ",channel=" + str(self.channel) 293 294 txt = self.text 295 if not txt: 296 txt = "<no text>" 297 298 return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr}," 299 "{0.line!r}:{0.charPositionInLine!r}]" 300 .format(self, txt=txt, channelStr=channelStr)) 301 302 __str__ = toString 303 __repr__ = toString 304 305 306INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) 307 308# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR 309# will avoid creating a token for this symbol and try to fetch another. 310SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) 311