• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""ANTLR3 runtime package"""
2
3# begin[licence]
4#
5# [The "BSD licence"]
6# Copyright (c) 2005-2008 Terence Parr
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17# 3. The name of the author may not be used to endorse or promote products
18#    derived from this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31# end[licence]
32
33from antlr3.constants import EOF, DEFAULT_CHANNEL, INVALID_TOKEN_TYPE
34
35############################################################################
36#
37# basic token interface
38#
39############################################################################
40
41class Token(object):
42    """@brief Abstract token baseclass."""
43
44    def getText(self):
45        """@brief Get the text of the token.
46
47        Using setter/getter methods is deprecated. Use o.text instead.
48        """
49        raise NotImplementedError
50
51    def setText(self, text):
52        """@brief Set the text of the token.
53
54        Using setter/getter methods is deprecated. Use o.text instead.
55        """
56        raise NotImplementedError
57
58
59    def getType(self):
60        """@brief Get the type of the token.
61
62        Using setter/getter methods is deprecated. Use o.type instead."""
63
64        raise NotImplementedError
65
66    def setType(self, ttype):
67        """@brief Get the type of the token.
68
69        Using setter/getter methods is deprecated. Use o.type instead."""
70
71        raise NotImplementedError
72
73
74    def getLine(self):
75        """@brief Get the line number on which this token was matched
76
77        Lines are numbered 1..n
78
79        Using setter/getter methods is deprecated. Use o.line instead."""
80
81        raise NotImplementedError
82
83    def setLine(self, line):
84        """@brief Set the line number on which this token was matched
85
86        Using setter/getter methods is deprecated. Use o.line instead."""
87
88        raise NotImplementedError
89
90
91    def getCharPositionInLine(self):
92        """@brief Get the column of the tokens first character,
93
94        Columns are numbered 0..n-1
95
96        Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
97
98        raise NotImplementedError
99
100    def setCharPositionInLine(self, pos):
101        """@brief Set the column of the tokens first character,
102
103        Using setter/getter methods is deprecated. Use o.charPositionInLine instead."""
104
105        raise NotImplementedError
106
107
108    def getChannel(self):
109        """@brief Get the channel of the token
110
111        Using setter/getter methods is deprecated. Use o.channel instead."""
112
113        raise NotImplementedError
114
115    def setChannel(self, channel):
116        """@brief Set the channel of the token
117
118        Using setter/getter methods is deprecated. Use o.channel instead."""
119
120        raise NotImplementedError
121
122
123    def getTokenIndex(self):
124        """@brief Get the index in the input stream.
125
126        An index from 0..n-1 of the token object in the input stream.
127        This must be valid in order to use the ANTLRWorks debugger.
128
129        Using setter/getter methods is deprecated. Use o.index instead."""
130
131        raise NotImplementedError
132
133    def setTokenIndex(self, index):
134        """@brief Set the index in the input stream.
135
136        Using setter/getter methods is deprecated. Use o.index instead."""
137
138        raise NotImplementedError
139
140
141    def getInputStream(self):
142        """@brief From what character stream was this token created.
143
144        You don't have to implement but it's nice to know where a Token
145        comes from if you have include files etc... on the input."""
146
147        raise NotImplementedError
148
149    def setInputStream(self, input):
150        """@brief From what character stream was this token created.
151
152        You don't have to implement but it's nice to know where a Token
153        comes from if you have include files etc... on the input."""
154
155        raise NotImplementedError
156
157
158############################################################################
159#
160# token implementations
161#
162# Token
163# +- CommonToken
164# \- ClassicToken
165#
166############################################################################
167
168class CommonToken(Token):
169    """@brief Basic token implementation.
170
171    This implementation does not copy the text from the input stream upon
172    creation, but keeps start/stop pointers into the stream to avoid
173    unnecessary copy operations.
174
175    """
176
177    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
178                 input=None, start=None, stop=None, oldToken=None):
179        Token.__init__(self)
180
181        if oldToken is not None:
182            self.type = oldToken.type
183            self.line = oldToken.line
184            self.charPositionInLine = oldToken.charPositionInLine
185            self.channel = oldToken.channel
186            self.index = oldToken.index
187            self._text = oldToken._text
188            self.input = oldToken.input
189            if isinstance(oldToken, CommonToken):
190                self.start = oldToken.start
191                self.stop = oldToken.stop
192
193        else:
194            self.type = type
195            self.input = input
196            self.charPositionInLine = -1 # set to invalid position
197            self.line = 0
198            self.channel = channel
199
200	    #What token number is this from 0..n-1 tokens; < 0 implies invalid index
201            self.index = -1
202
203            # We need to be able to change the text once in a while.  If
204            # this is non-null, then getText should return this.  Note that
205            # start/stop are not affected by changing this.
206            self._text = text
207
208            # The char position into the input buffer where this token starts
209            self.start = start
210
211            # The char position into the input buffer where this token stops
212            # This is the index of the last char, *not* the index after it!
213            self.stop = stop
214
215
216    def getText(self):
217        if self._text is not None:
218            return self._text
219
220        if self.input is None:
221            return None
222
223        if self.start < self.input.size() and self.stop < self.input.size():
224          return self.input.substring(self.start, self.stop)
225
226        return '<EOF>'
227
228
229    def setText(self, text):
230        """
231        Override the text for this token.  getText() will return this text
232        rather than pulling from the buffer.  Note that this does not mean
233        that start/stop indexes are not valid.  It means that that input
234        was converted to a new string in the token object.
235	"""
236        self._text = text
237
238    text = property(getText, setText)
239
240
241    def getType(self):
242        return self.type
243
244    def setType(self, ttype):
245        self.type = ttype
246
247    def getTypeName(self):
248        return str(self.type)
249
250    typeName = property(lambda s: s.getTypeName())
251
252    def getLine(self):
253        return self.line
254
255    def setLine(self, line):
256        self.line = line
257
258
259    def getCharPositionInLine(self):
260        return self.charPositionInLine
261
262    def setCharPositionInLine(self, pos):
263        self.charPositionInLine = pos
264
265
266    def getChannel(self):
267        return self.channel
268
269    def setChannel(self, channel):
270        self.channel = channel
271
272
273    def getTokenIndex(self):
274        return self.index
275
276    def setTokenIndex(self, index):
277        self.index = index
278
279
280    def getInputStream(self):
281        return self.input
282
283    def setInputStream(self, input):
284        self.input = input
285
286
287    def __str__(self):
288        if self.type == EOF:
289            return "<EOF>"
290
291        channelStr = ""
292        if self.channel > 0:
293            channelStr = ",channel=" + str(self.channel)
294
295        txt = self.text
296        if txt is not None:
297            txt = txt.replace("\n","\\\\n")
298            txt = txt.replace("\r","\\\\r")
299            txt = txt.replace("\t","\\\\t")
300        else:
301            txt = "<no text>"
302
303        return "[@%d,%d:%d=%r,<%s>%s,%d:%d]" % (
304            self.index,
305            self.start, self.stop,
306            txt,
307            self.typeName, channelStr,
308            self.line, self.charPositionInLine
309            )
310
311
312class ClassicToken(Token):
313    """@brief Alternative token implementation.
314
315    A Token object like we'd use in ANTLR 2.x; has an actual string created
316    and associated with this object.  These objects are needed for imaginary
317    tree nodes that have payload objects.  We need to create a Token object
318    that has a string; the tree node will point at this token.  CommonToken
319    has indexes into a char stream and hence cannot be used to introduce
320    new strings.
321    """
322
323    def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
324                 oldToken=None
325                 ):
326        Token.__init__(self)
327
328        if oldToken is not None:
329            self.text = oldToken.text
330            self.type = oldToken.type
331            self.line = oldToken.line
332            self.charPositionInLine = oldToken.charPositionInLine
333            self.channel = oldToken.channel
334
335        self.text = text
336        self.type = type
337        self.line = None
338        self.charPositionInLine = None
339        self.channel = channel
340        self.index = None
341
342
343    def getText(self):
344        return self.text
345
346    def setText(self, text):
347        self.text = text
348
349
350    def getType(self):
351        return self.type
352
353    def setType(self, ttype):
354        self.type = ttype
355
356
357    def getLine(self):
358        return self.line
359
360    def setLine(self, line):
361        self.line = line
362
363
364    def getCharPositionInLine(self):
365        return self.charPositionInLine
366
367    def setCharPositionInLine(self, pos):
368        self.charPositionInLine = pos
369
370
371    def getChannel(self):
372        return self.channel
373
374    def setChannel(self, channel):
375        self.channel = channel
376
377
378    def getTokenIndex(self):
379        return self.index
380
381    def setTokenIndex(self, index):
382        self.index = index
383
384
385    def getInputStream(self):
386        return None
387
388    def setInputStream(self, input):
389        pass
390
391
392    def toString(self):
393        channelStr = ""
394        if self.channel > 0:
395            channelStr = ",channel=" + str(self.channel)
396
397        txt = self.text
398        if txt is None:
399            txt = "<no text>"
400
401        return "[@%r,%r,<%r>%s,%r:%r]" % (self.index,
402                                          txt,
403                                          self.type,
404                                          channelStr,
405                                          self.line,
406                                          self.charPositionInLine
407                                          )
408
409
410    __str__ = toString
411    __repr__ = toString
412
413
414INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
415
416# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
417# will avoid creating a token for this symbol and try to fetch another.
418SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
419