• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""ANTLR3 runtime package"""
2
3# begin[licence]
4#
5# [The "BSD licence"]
6# Copyright (c) 2005-2012 Terence Parr
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17# 3. The name of the author may not be used to endorse or promote products
18#    derived from this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31# end[licence]
32
33from .constants import DEFAULT_CHANNEL, EOF, INVALID_TOKEN_TYPE
34
35############################################################################
36#
37# basic token interface
38#
39############################################################################
40
41class Token(object):
42    """@brief Abstract token baseclass."""
43
44    TOKEN_NAMES_MAP = None
45
46    @classmethod
47    def registerTokenNamesMap(cls, tokenNamesMap):
48        """@brief Store a mapping from token type to token name.
49
50        This enables token.typeName to give something more meaningful
51        than, e.g., '6'.
52        """
53        cls.TOKEN_NAMES_MAP = tokenNamesMap
54        cls.TOKEN_NAMES_MAP[EOF] = "EOF"
55
56    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
57                 index=-1, line=0, charPositionInLine=-1, input=None):
58        # We use -1 for index and charPositionInLine as an invalid index
59        self._type = type
60        self._channel = channel
61        self._text = text
62        self._index = index
63        self._line = 0
64        self._charPositionInLine = charPositionInLine
65        self.input = input
66
67    # To override a property, you'll need to override both the getter and setter.
68    @property
69    def text(self):
70        return self._text
71
72    @text.setter
73    def text(self, value):
74        self._text = value
75
76
77    @property
78    def type(self):
79        return self._type
80
81    @type.setter
82    def type(self, value):
83        self._type = value
84
85    # For compatibility
86    def getType(self):
87        return self._type
88
89    @property
90    def typeName(self):
91        if self.TOKEN_NAMES_MAP:
92            return self.TOKEN_NAMES_MAP.get(self._type, "INVALID_TOKEN_TYPE")
93        else:
94            return str(self._type)
95
96    @property
97    def line(self):
98        """Lines are numbered 1..n."""
99        return self._line
100
101    @line.setter
102    def line(self, value):
103        self._line = value
104
105
106    @property
107    def charPositionInLine(self):
108        """Columns are numbered 0..n-1."""
109        return self._charPositionInLine
110
111    @charPositionInLine.setter
112    def charPositionInLine(self, pos):
113        self._charPositionInLine = pos
114
115
116    @property
117    def channel(self):
118        return self._channel
119
120    @channel.setter
121    def channel(self, value):
122        self._channel = value
123
124
125    @property
126    def index(self):
127        """
128        An index from 0..n-1 of the token object in the input stream.
129        This must be valid in order to use the ANTLRWorks debugger.
130        """
131        return self._index
132
133    @index.setter
134    def index(self, value):
135        self._index = value
136
137
138    def getInputStream(self):
139        """@brief From what character stream was this token created.
140
141        You don't have to implement but it's nice to know where a Token
142        comes from if you have include files etc... on the input."""
143
144        raise NotImplementedError
145
146    def setInputStream(self, input):
147        """@brief From what character stream was this token created.
148
149        You don't have to implement but it's nice to know where a Token
150        comes from if you have include files etc... on the input."""
151
152        raise NotImplementedError
153
154
155############################################################################
156#
157# token implementations
158#
159# Token
160# +- CommonToken
161# \- ClassicToken
162#
163############################################################################
164
165class CommonToken(Token):
166    """@brief Basic token implementation.
167
168    This implementation does not copy the text from the input stream upon
169    creation, but keeps start/stop pointers into the stream to avoid
170    unnecessary copy operations.
171
172    """
173
174    def __init__(self, type=None, channel=DEFAULT_CHANNEL, text=None,
175                 input=None, start=None, stop=None, oldToken=None):
176
177        if oldToken:
178            super().__init__(oldToken.type, oldToken.channel, oldToken.text,
179                             oldToken.index, oldToken.line,
180                             oldToken.charPositionInLine, oldToken.input)
181            if isinstance(oldToken, CommonToken):
182                self.start = oldToken.start
183                self.stop = oldToken.stop
184            else:
185                self.start = start
186                self.stop = stop
187
188        else:
189            super().__init__(type=type, channel=channel, input=input)
190
191            # We need to be able to change the text once in a while.  If
192            # this is non-null, then getText should return this.  Note that
193            # start/stop are not affected by changing this.
194            self._text = text
195
196            # The char position into the input buffer where this token starts
197            self.start = start
198
199            # The char position into the input buffer where this token stops
200            # This is the index of the last char, *not* the index after it!
201            self.stop = stop
202
203
204    @property
205    def text(self):
206        # Could be the empty string, and we want to return that.
207        if self._text is not None:
208            return self._text
209
210        if not self.input:
211            return None
212
213        if self.start < self.input.size() and self.stop < self.input.size():
214            return self.input.substring(self.start, self.stop)
215
216        return '<EOF>'
217
218    @text.setter
219    def text(self, value):
220        """
221        Override the text for this token.  getText() will return this text
222        rather than pulling from the buffer.  Note that this does not mean
223        that start/stop indexes are not valid.  It means that that input
224        was converted to a new string in the token object.
225        """
226        self._text = value
227
228
229    def getInputStream(self):
230        return self.input
231
232    def setInputStream(self, input):
233        self.input = input
234
235
236    def __str__(self):
237        if self.type == EOF:
238            return "<EOF>"
239
240        channelStr = ""
241        if self.channel > 0:
242            channelStr = ",channel=" + str(self.channel)
243
244        txt = self.text
245        if txt:
246            # Put 2 backslashes in front of each character
247            txt = txt.replace("\n", r"\\n")
248            txt = txt.replace("\r", r"\\r")
249            txt = txt.replace("\t", r"\\t")
250        else:
251            txt = "<no text>"
252
253        return ("[@{0.index},{0.start}:{0.stop}={txt!r},"
254                "<{0.typeName}>{channelStr},"
255                "{0.line}:{0.charPositionInLine}]"
256                .format(self, txt=txt, channelStr=channelStr))
257
258
259class ClassicToken(Token):
260    """@brief Alternative token implementation.
261
262    A Token object like we'd use in ANTLR 2.x; has an actual string created
263    and associated with this object.  These objects are needed for imaginary
264    tree nodes that have payload objects.  We need to create a Token object
265    that has a string; the tree node will point at this token.  CommonToken
266    has indexes into a char stream and hence cannot be used to introduce
267    new strings.
268    """
269
270    def __init__(self, type=None, text=None, channel=DEFAULT_CHANNEL,
271                 oldToken=None):
272        if oldToken:
273            super().__init__(type=oldToken.type, channel=oldToken.channel,
274                             text=oldToken.text, line=oldToken.line,
275                             charPositionInLine=oldToken.charPositionInLine)
276
277        else:
278            super().__init__(type=type, channel=channel, text=text,
279                             index=None, line=None, charPositionInLine=None)
280
281
282    def getInputStream(self):
283        return None
284
285    def setInputStream(self, input):
286        pass
287
288
289    def toString(self):
290        channelStr = ""
291        if self.channel > 0:
292            channelStr = ",channel=" + str(self.channel)
293
294        txt = self.text
295        if not txt:
296            txt = "<no text>"
297
298        return ("[@{0.index!r},{txt!r},<{0.type!r}>{channelStr},"
299                "{0.line!r}:{0.charPositionInLine!r}]"
300                .format(self, txt=txt, channelStr=channelStr))
301
302    __str__ = toString
303    __repr__ = toString
304
305
306INVALID_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
307
308# In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
309# will avoid creating a token for this symbol and try to fetch another.
310SKIP_TOKEN = CommonToken(type=INVALID_TOKEN_TYPE)
311