1"""ANTLR3 runtime package""" 2 3# begin[licence] 4# 5# [The "BSD licence"] 6# Copyright (c) 2005-2008 Terence Parr 7# All rights reserved. 8# 9# Redistribution and use in source and binary forms, with or without 10# modification, are permitted provided that the following conditions 11# are met: 12# 1. Redistributions of source code must retain the above copyright 13# notice, this list of conditions and the following disclaimer. 14# 2. Redistributions in binary form must reproduce the above copyright 15# notice, this list of conditions and the following disclaimer in the 16# documentation and/or other materials provided with the distribution. 17# 3. The name of the author may not be used to endorse or promote products 18# derived from this software without specific prior written permission. 19# 20# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30# 31# end[licence] 32 33from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE 34 35############################################################################ 36# 37# basic token interface 38# 39############################################################################ 40 41class Token(object): 42 """@brief Abstract token baseclass.""" 43 44 def getText(self): 45 """@brief Get the text of the token. 46 47 Using setter/getter methods is deprecated. Use o.text instead. 48 """ 49 raise NotImplementedError 50 51 def setText(self, text): 52 """@brief Set the text of the token. 53 54 Using setter/getter methods is deprecated. Use o.text instead. 55 """ 56 raise NotImplementedError 57 58 59 def getType(self): 60 """@brief Get the type of the token. 61 62 Using setter/getter methods is deprecated. Use o.type instead.""" 63 64 raise NotImplementedError 65 66 def setType(self, ttype): 67 """@brief Get the type of the token. 68 69 Using setter/getter methods is deprecated. Use o.type instead.""" 70 71 raise NotImplementedError 72 73 74 def getLine(self): 75 """@brief Get the line number on which this token was matched 76 77 Lines are numbered 1..n 78 79 Using setter/getter methods is deprecated. Use o.line instead.""" 80 81 raise NotImplementedError 82 83 def setLine(self, line): 84 """@brief Set the line number on which this token was matched 85 86 Using setter/getter methods is deprecated. Use o.line instead.""" 87 88 raise NotImplementedError 89 90 91 def getCharPositionInLine(self): 92 """@brief Get the column of the tokens first character, 93 94 Columns are numbered 0..n-1 95 96 Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" 97 98 raise NotImplementedError 99 100 def setCharPositionInLine(self, pos): 101 """@brief Set the column of the tokens first character, 102 103 Using setter/getter methods is deprecated. Use o.charPositionInLine instead.""" 104 105 raise NotImplementedError 106 107 108 def getChannel(self): 109 """@brief Get the channel of the token 110 111 Using setter/getter methods is deprecated. Use o.channel instead.""" 112 113 raise NotImplementedError 114 115 def setChannel(self, channel): 116 """@brief Set the channel of the token 117 118 Using setter/getter methods is deprecated. Use o.channel instead.""" 119 120 raise NotImplementedError 121 122 123 def getTokenIndex(self): 124 """@brief Get the index in the input stream. 125 126 An index from 0..n-1 of the token object in the input stream. 127 This must be valid in order to use the ANTLRWorks debugger. 128 129 Using setter/getter methods is deprecated. Use o.index instead.""" 130 131 raise NotImplementedError 132 133 def setTokenIndex(self, index): 134 """@brief Set the index in the input stream. 135 136 Using setter/getter methods is deprecated. Use o.index instead.""" 137 138 raise NotImplementedError 139 140 141 def getInputStream(self): 142 """@brief From what character stream was this token created. 143 144 You don't have to implement but it's nice to know where a Token 145 comes from if you have include files etc... on the input.""" 146 147 raise NotImplementedError 148 149 def setInputStream(self, input): 150 """@brief From what character stream was this token created. 151 152 You don't have to implement but it's nice to know where a Token 153 comes from if you have include files etc... on the input.""" 154 155 raise NotImplementedError 156 157 158############################################################################ 159# 160# token implementations 161# 162# Token 163# +- CommonToken 164# \- ClassicToken 165# 166############################################################################ 167 168class CommonToken(Token): 169 """@brief Basic token implementation. 170 171 This implementation does not copy the text from the input stream upon 172 creation, but keeps start/stop pointers into the stream to avoid 173 unnecessary copy operations. 174 175 """ 176 177 def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None, 178 input=None, start=None, stop=None, oldToken=None): 179 Token.__init__(self) 180 181 if oldToken is not None: 182 self.type = oldToken.type 183 self.line = oldToken.line 184 self.charPositionInLine = oldToken.charPositionInLine 185 self.channel = oldToken.channel 186 self.index = oldToken.index 187 self._text = oldToken._text 188 self.input = oldToken.input 189 if isinstance(oldToken, CommonToken): 190 self.start = oldToken.start 191 self.stop = oldToken.stop 192 193 else: 194 self.type = type 195 self.input = input 196 self.charPositionInLine = -1 # set to invalid position 197 self.line = 0 198 self.channel = channel 199 200 #What token number is this from 0..n-1 tokens; < 0 implies invalid index 201 self.index = -1 202 203 # We need to be able to change the text once in a while. If 204 # this is non-null, then getText should return this. Note that 205 # start/stop are not affected by changing this. 206 self._text = text 207 208 # The char position into the input buffer where this token starts 209 self.start = start 210 211 # The char position into the input buffer where this token stops 212 # This is the index of the last char, *not* the index after it! 213 self.stop = stop 214 215 216 def getText(self): 217 if self._text is not None: 218 return self._text 219 220 if self.input is None: 221 return None 222 223 if self.start < self.input.size() and self.stop < self.input.size(): 224 return self.input.substring(self.start, self.stop) 225 226 return '<EOF>' 227 228 229 def setText(self, text): 230 """ 231 Override the text for this token. getText() will return this text 232 rather than pulling from the buffer. Note that this does not mean 233 that start/stop indexes are not valid. It means that that input 234 was converted to a new string in the token object. 235 """ 236 self._text = text 237 238 text = property(getText, setText) 239 240 241 def getType(self): 242 return self.type 243 244 def setType(self, ttype): 245 self.type = ttype 246 247 def getTypeName(self): 248 return str(self.type) 249 250 typeName = property(lambda s: s.getTypeName()) 251 252 def getLine(self): 253 return self.line 254 255 def setLine(self, line): 256 self.line = line 257 258 259 def getCharPositionInLine(self): 260 return self.charPositionInLine 261 262 def setCharPositionInLine(self, pos): 263 self.charPositionInLine = pos 264 265 266 def getChannel(self): 267 return self.channel 268 269 def setChannel(self, channel): 270 self.channel = channel 271 272 273 def getTokenIndex(self): 274 return self.index 275 276 def setTokenIndex(self, index): 277 self.index = index 278 279 280 def getInputStream(self): 281 return self.input 282 283 def setInputStream(self, input): 284 self.input = input 285 286 287 def __str__(self): 288 if self.type == EOF: 289 return "<EOF>" 290 291 channelStr = "" 292 if self.channel > 0: 293 channelStr = ",channel=" + str(self.channel) 294 295 txt = self.text 296 if txt is not None: 297 txt = txt.replace("\n","\\\\n") 298 txt = txt.replace("\r","\\\\r") 299 txt = txt.replace("\t","\\\\t") 300 else: 301 txt = "<no text>" 302 303 return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % ( 304 self.index, 305 self.start, self.stop, 306 txt, 307 self.typeName, channelStr, 308 self.line, self.charPositionInLine 309 ) 310 311 312class ClassicToken(Token): 313 """@brief Alternative token implementation. 314 315 A Token object like we'd use in ANTLR 2.x; has an actual string created 316 and associated with this object. These objects are needed for imaginary 317 tree nodes that have payload objects. We need to create a Token object 318 that has a string; the tree node will point at this token. CommonToken 319 has indexes into a char stream and hence cannot be used to introduce 320 new strings. 321 """ 322 323 def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL, 324 oldToken=None 325 ): 326 Token.__init__(self) 327 328 if oldToken is not None: 329 self.text = oldToken.text 330 self.type = oldToken.type 331 self.line = oldToken.line 332 self.charPositionInLine = oldToken.charPositionInLine 333 self.channel = oldToken.channel 334 335 self.text = text 336 self.type = type 337 self.line = None 338 self.charPositionInLine = None 339 self.channel = channel 340 self.index = None 341 342 343 def getText(self): 344 return self.text 345 346 def setText(self, text): 347 self.text = text 348 349 350 def getType(self): 351 return self.type 352 353 def setType(self, ttype): 354 self.type = ttype 355 356 357 def getLine(self): 358 return self.line 359 360 def setLine(self, line): 361 self.line = line 362 363 364 def getCharPositionInLine(self): 365 return self.charPositionInLine 366 367 def setCharPositionInLine(self, pos): 368 self.charPositionInLine = pos 369 370 371 def getChannel(self): 372 return self.channel 373 374 def setChannel(self, channel): 375 self.channel = channel 376 377 378 def getTokenIndex(self): 379 return self.index 380 381 def setTokenIndex(self, index): 382 self.index = index 383 384 385 def getInputStream(self): 386 return None 387 388 def setInputStream(self, input): 389 pass 390 391 392 def toString(self): 393 channelStr = "" 394 if self.channel > 0: 395 channelStr = ",channel=" + str(self.channel) 396 397 txt = self.text 398 if txt is None: 399 txt = "<no text>" 400 401 return "[@%r,%r,<%r>%s,%r:%r]" % (self.index, 402 txt, 403 self.type, 404 channelStr, 405 self.line, 406 self.charPositionInLine 407 ) 408 409 410 __str__ = toString 411 __repr__ = toString 412 413 414INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) 415 416# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR 417# will avoid creating a token for this symbol and try to fetch another. 418SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE) 419