• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""ANTLR3 runtime package"""
2
3# begin[licence]
4#
5# [The "BSD licence"]
6# Copyright (c) 2005-2008 Terence Parr
7# All rights reserved.
8#
9# Redistribution and use in source and binary forms, with or without
10# modification, are permitted provided that the following conditions
11# are met:
12# 1. Redistributions of source code must retain the above copyright
13#    notice, this list of conditions and the following disclaimer.
14# 2. Redistributions in binary form must reproduce the above copyright
15#    notice, this list of conditions and the following disclaimer in the
16#    documentation and/or other materials provided with the distribution.
17# 3. The name of the author may not be used to endorse or promote products
18#    derived from this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30#
31# end[licence]
32
33import codecs
34from StringIO import StringIO
35
36from antlr3.constants import DEFAULT_CHANNEL, EOF
37from antlr3.tokens import Token, CommonToken
38
39
40############################################################################
41#
42# basic interfaces
43#   IntStream
44#    +- CharStream
45#    \- TokenStream
46#
47# subclasses must implemented all methods
48#
49############################################################################
50
51class IntStream(object):
52    """
53    @brief Base interface for streams of integer values.
54
55    A simple stream of integers used when all I care about is the char
56    or token type sequence (such as interpretation).
57    """
58
59    def consume(self):
60        raise NotImplementedError
61
62
63    def LA(self, i):
64        """Get int at current input pointer + i ahead where i=1 is next int.
65
66        Negative indexes are allowed.  LA(-1) is previous token (token
67	just matched).  LA(-i) where i is before first token should
68	yield -1, invalid char / EOF.
69	"""
70
71        raise NotImplementedError
72
73
74    def mark(self):
75        """
76        Tell the stream to start buffering if it hasn't already.  Return
77        current input position, index(), or some other marker so that
78        when passed to rewind() you get back to the same spot.
79        rewind(mark()) should not affect the input cursor.  The Lexer
80        track line/col info as well as input index so its markers are
81        not pure input indexes.  Same for tree node streams.
82        """
83
84        raise NotImplementedError
85
86
87    def index(self):
88        """
89        Return the current input symbol index 0..n where n indicates the
90        last symbol has been read.  The index is the symbol about to be
91        read not the most recently read symbol.
92        """
93
94        raise NotImplementedError
95
96
97    def rewind(self, marker=None):
98        """
99        Reset the stream so that next call to index would return marker.
100        The marker will usually be index() but it doesn't have to be.  It's
101        just a marker to indicate what state the stream was in.  This is
102        essentially calling release() and seek().  If there are markers
103        created after this marker argument, this routine must unroll them
104        like a stack.  Assume the state the stream was in when this marker
105        was created.
106
107        If marker is None:
108        Rewind to the input position of the last marker.
109        Used currently only after a cyclic DFA and just
110        before starting a sem/syn predicate to get the
111        input position back to the start of the decision.
112        Do not "pop" the marker off the state.  mark(i)
113        and rewind(i) should balance still. It is
114        like invoking rewind(last marker) but it should not "pop"
115        the marker off.  It's like seek(last marker's input position).
116	"""
117
118        raise NotImplementedError
119
120
121    def release(self, marker=None):
122        """
123        You may want to commit to a backtrack but don't want to force the
124        stream to keep bookkeeping objects around for a marker that is
125        no longer necessary.  This will have the same behavior as
126        rewind() except it releases resources without the backward seek.
127        This must throw away resources for all markers back to the marker
128        argument.  So if you're nested 5 levels of mark(), and then release(2)
129        you have to release resources for depths 2..5.
130	"""
131
132        raise NotImplementedError
133
134
135    def seek(self, index):
136        """
137        Set the input cursor to the position indicated by index.  This is
138        normally used to seek ahead in the input stream.  No buffering is
139        required to do this unless you know your stream will use seek to
140        move backwards such as when backtracking.
141
142        This is different from rewind in its multi-directional
143        requirement and in that its argument is strictly an input cursor
144        (index).
145
146        For char streams, seeking forward must update the stream state such
147        as line number.  For seeking backwards, you will be presumably
148        backtracking using the mark/rewind mechanism that restores state and
149        so this method does not need to update state when seeking backwards.
150
151        Currently, this method is only used for efficient backtracking using
152        memoization, but in the future it may be used for incremental parsing.
153
154        The index is 0..n-1.  A seek to position i means that LA(1) will
155        return the ith symbol.  So, seeking to 0 means LA(1) will return the
156        first element in the stream.
157        """
158
159        raise NotImplementedError
160
161
162    def size(self):
163        """
164        Only makes sense for streams that buffer everything up probably, but
165        might be useful to display the entire stream or for testing.  This
166        value includes a single EOF.
167	"""
168
169        raise NotImplementedError
170
171
172    def getSourceName(self):
173        """
174        Where are you getting symbols from?  Normally, implementations will
175        pass the buck all the way to the lexer who can ask its input stream
176        for the file name or whatever.
177        """
178
179        raise NotImplementedError
180
181
182class CharStream(IntStream):
183    """
184    @brief A source of characters for an ANTLR lexer.
185
186    This is an abstract class that must be implemented by a subclass.
187
188    """
189
190    # pylint does not realize that this is an interface, too
191    #pylint: disable-msg=W0223
192
193    EOF = -1
194
195
196    def substring(self, start, stop):
197        """
198        For infinite streams, you don't need this; primarily I'm providing
199        a useful interface for action code.  Just make sure actions don't
200        use this on streams that don't support it.
201        """
202
203        raise NotImplementedError
204
205
206    def LT(self, i):
207        """
208        Get the ith character of lookahead.  This is the same usually as
209        LA(i).  This will be used for labels in the generated
210        lexer code.  I'd prefer to return a char here type-wise, but it's
211        probably better to be 32-bit clean and be consistent with LA.
212        """
213
214        raise NotImplementedError
215
216
217    def getLine(self):
218        """ANTLR tracks the line information automatically"""
219
220        raise NotImplementedError
221
222
223    def setLine(self, line):
224        """
225        Because this stream can rewind, we need to be able to reset the line
226        """
227
228        raise NotImplementedError
229
230
231    def getCharPositionInLine(self):
232        """
233        The index of the character relative to the beginning of the line 0..n-1
234        """
235
236        raise NotImplementedError
237
238
239    def setCharPositionInLine(self, pos):
240        raise NotImplementedError
241
242
243class TokenStream(IntStream):
244    """
245
246    @brief A stream of tokens accessing tokens from a TokenSource
247
248    This is an abstract class that must be implemented by a subclass.
249
250    """
251
252    # pylint does not realize that this is an interface, too
253    #pylint: disable-msg=W0223
254
255    def LT(self, k):
256        """
257        Get Token at current input pointer + i ahead where i=1 is next Token.
258        i<0 indicates tokens in the past.  So -1 is previous token and -2 is
259        two tokens ago. LT(0) is undefined.  For i>=n, return Token.EOFToken.
260        Return null for LT(0) and any index that results in an absolute address
261        that is negative.
262	"""
263
264        raise NotImplementedError
265
266
267    def range(self):
268        """
269        How far ahead has the stream been asked to look?  The return
270        value is a valid index from 0..n-1.
271        """
272
273        raise NotImplementedError
274
275
276    def get(self, i):
277        """
278        Get a token at an absolute index i; 0..n-1.  This is really only
279        needed for profiling and debugging and token stream rewriting.
280        If you don't want to buffer up tokens, then this method makes no
281        sense for you.  Naturally you can't use the rewrite stream feature.
282        I believe DebugTokenStream can easily be altered to not use
283        this method, removing the dependency.
284        """
285
286        raise NotImplementedError
287
288
289    def getTokenSource(self):
290        """
291        Where is this stream pulling tokens from?  This is not the name, but
292        the object that provides Token objects.
293	"""
294
295        raise NotImplementedError
296
297
298    def toString(self, start=None, stop=None):
299        """
300        Return the text of all tokens from start to stop, inclusive.
301        If the stream does not buffer all the tokens then it can just
302        return "" or null;  Users should not access $ruleLabel.text in
303        an action of course in that case.
304
305        Because the user is not required to use a token with an index stored
306        in it, we must provide a means for two token objects themselves to
307        indicate the start/end location.  Most often this will just delegate
308        to the other toString(int,int).  This is also parallel with
309        the TreeNodeStream.toString(Object,Object).
310	"""
311
312        raise NotImplementedError
313
314
315############################################################################
316#
317# character streams for use in lexers
318#   CharStream
319#   \- ANTLRStringStream
320#
321############################################################################
322
323
324class ANTLRStringStream(CharStream):
325    """
326    @brief CharStream that pull data from a unicode string.
327
328    A pretty quick CharStream that pulls all data from an array
329    directly.  Every method call counts in the lexer.
330
331    """
332
333
334    def __init__(self, data):
335        """
336        @param data This should be a unicode string holding the data you want
337           to parse. If you pass in a byte string, the Lexer will choke on
338           non-ascii data.
339
340        """
341
342        CharStream.__init__(self)
343
344  	# The data being scanned
345        self.strdata = unicode(data)
346        self.data = [ord(c) for c in self.strdata]
347
348	# How many characters are actually in the buffer
349        self.n = len(data)
350
351 	# 0..n-1 index into string of next char
352        self.p = 0
353
354	# line number 1..n within the input
355        self.line = 1
356
357 	# The index of the character relative to the beginning of the
358        # line 0..n-1
359        self.charPositionInLine = 0
360
361	# A list of CharStreamState objects that tracks the stream state
362        # values line, charPositionInLine, and p that can change as you
363        # move through the input stream.  Indexed from 0..markDepth-1.
364        self._markers = [ ]
365        self.lastMarker = None
366        self.markDepth = 0
367
368        # What is name or source of this char stream?
369        self.name = None
370
371
372    def reset(self):
373        """
374        Reset the stream so that it's in the same state it was
375        when the object was created *except* the data array is not
376        touched.
377        """
378
379        self.p = 0
380        self.line = 1
381        self.charPositionInLine = 0
382        self._markers = [ ]
383
384
385    def consume(self):
386        try:
387            if self.data[self.p] == 10: # \n
388                self.line += 1
389                self.charPositionInLine = 0
390            else:
391                self.charPositionInLine += 1
392
393            self.p += 1
394
395        except IndexError:
396            # happend when we reached EOF and self.data[self.p] fails
397            # just do nothing
398            pass
399
400
401
402    def LA(self, i):
403        if i == 0:
404            return 0 # undefined
405
406        if i < 0:
407            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
408
409        try:
410            return self.data[self.p+i-1]
411        except IndexError:
412            return EOF
413
414
415
416    def LT(self, i):
417        if i == 0:
418            return 0 # undefined
419
420        if i < 0:
421            i += 1 # e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
422
423        try:
424            return self.strdata[self.p+i-1]
425        except IndexError:
426            return EOF
427
428
429    def index(self):
430        """
431        Return the current input symbol index 0..n where n indicates the
432        last symbol has been read.  The index is the index of char to
433        be returned from LA(1).
434        """
435
436        return self.p
437
438
439    def size(self):
440        return self.n
441
442
443    def mark(self):
444        state = (self.p, self.line, self.charPositionInLine)
445        try:
446            self._markers[self.markDepth] = state
447        except IndexError:
448            self._markers.append(state)
449        self.markDepth += 1
450
451        self.lastMarker = self.markDepth
452
453        return self.lastMarker
454
455
456    def rewind(self, marker=None):
457        if marker is None:
458            marker = self.lastMarker
459
460        p, line, charPositionInLine = self._markers[marker-1]
461
462        self.seek(p)
463        self.line = line
464        self.charPositionInLine = charPositionInLine
465        self.release(marker)
466
467
468    def release(self, marker=None):
469        if marker is None:
470            marker = self.lastMarker
471
472        self.markDepth = marker-1
473
474
475    def seek(self, index):
476        """
477        consume() ahead until p==index; can't just set p=index as we must
478        update line and charPositionInLine.
479        """
480
481        if index <= self.p:
482            self.p = index # just jump; don't update stream state (line, ...)
483            return
484
485        # seek forward, consume until p hits index
486        while self.p < index:
487            self.consume()
488
489
490    def substring(self, start, stop):
491        return self.strdata[start:stop+1]
492
493
494    def getLine(self):
495        """Using setter/getter methods is deprecated. Use o.line instead."""
496        return self.line
497
498
499    def getCharPositionInLine(self):
500        """
501        Using setter/getter methods is deprecated. Use o.charPositionInLine
502        instead.
503        """
504        return self.charPositionInLine
505
506
507    def setLine(self, line):
508        """Using setter/getter methods is deprecated. Use o.line instead."""
509        self.line = line
510
511
512    def setCharPositionInLine(self, pos):
513        """
514        Using setter/getter methods is deprecated. Use o.charPositionInLine
515        instead.
516        """
517        self.charPositionInLine = pos
518
519
520    def getSourceName(self):
521        return self.name
522
523
524class ANTLRFileStream(ANTLRStringStream):
525    """
526    @brief CharStream that opens a file to read the data.
527
528    This is a char buffer stream that is loaded from a file
529    all at once when you construct the object.
530    """
531
532    def __init__(self, fileName, encoding=None):
533        """
534        @param fileName The path to the file to be opened. The file will be
535           opened with mode 'rb'.
536
537        @param encoding If you set the optional encoding argument, then the
538           data will be decoded on the fly.
539
540        """
541
542        self.fileName = fileName
543
544        fp = codecs.open(fileName, 'rb', encoding)
545        try:
546            data = fp.read()
547        finally:
548            fp.close()
549
550        ANTLRStringStream.__init__(self, data)
551
552
553    def getSourceName(self):
554        """Deprecated, access o.fileName directly."""
555
556        return self.fileName
557
558
559class ANTLRInputStream(ANTLRStringStream):
560    """
561    @brief CharStream that reads data from a file-like object.
562
563    This is a char buffer stream that is loaded from a file like object
564    all at once when you construct the object.
565
566    All input is consumed from the file, but it is not closed.
567    """
568
569    def __init__(self, file, encoding=None):
570        """
571        @param file A file-like object holding your input. Only the read()
572           method must be implemented.
573
574        @param encoding If you set the optional encoding argument, then the
575           data will be decoded on the fly.
576
577        """
578
579        if encoding is not None:
580            # wrap input in a decoding reader
581            reader = codecs.lookup(encoding)[2]
582            file = reader(file)
583
584        data = file.read()
585
586        ANTLRStringStream.__init__(self, data)
587
588
589# I guess the ANTLR prefix exists only to avoid a name clash with some Java
590# mumbojumbo. A plain "StringStream" looks better to me, which should be
591# the preferred name in Python.
592StringStream = ANTLRStringStream
593FileStream = ANTLRFileStream
594InputStream = ANTLRInputStream
595
596
597############################################################################
598#
599# Token streams
600#   TokenStream
601#   +- CommonTokenStream
602#   \- TokenRewriteStream
603#
604############################################################################
605
606
607class CommonTokenStream(TokenStream):
608    """
609    @brief The most common stream of tokens
610
611    The most common stream of tokens is one where every token is buffered up
612    and tokens are prefiltered for a certain channel (the parser will only
613    see these tokens and cannot change the filter channel number during the
614    parse).
615    """
616
617    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
618        """
619        @param tokenSource A TokenSource instance (usually a Lexer) to pull
620            the tokens from.
621
622        @param channel Skip tokens on any channel but this one; this is how we
623            skip whitespace...
624
625        """
626
627        TokenStream.__init__(self)
628
629        self.tokenSource = tokenSource
630
631	# Record every single token pulled from the source so we can reproduce
632        # chunks of it later.
633        self.tokens = []
634
635	# Map<tokentype, channel> to override some Tokens' channel numbers
636        self.channelOverrideMap = {}
637
638	# Set<tokentype>; discard any tokens with this type
639        self.discardSet = set()
640
641	# Skip tokens on any channel but this one; this is how we skip
642        # whitespace...
643        self.channel = channel
644
645	# By default, track all incoming tokens
646        self.discardOffChannelTokens = False
647
648	# The index into the tokens list of the current token (next token
649        # to consume).  p==-1 indicates that the tokens list is empty
650        self.p = -1
651
652        # Remember last marked position
653        self.lastMarker = None
654
655        # how deep have we gone?
656        self._range = -1
657
658
659    def makeEOFToken(self):
660        return self.tokenSource.makeEOFToken()
661
662
663    def setTokenSource(self, tokenSource):
664        """Reset this token stream by setting its token source."""
665
666        self.tokenSource = tokenSource
667        self.tokens = []
668        self.p = -1
669        self.channel = DEFAULT_CHANNEL
670
671
672    def reset(self):
673        self.p = 0
674        self.lastMarker = None
675
676
677    def fillBuffer(self):
678        """
679        Load all tokens from the token source and put in tokens.
680	This is done upon first LT request because you might want to
681        set some token type / channel overrides before filling buffer.
682        """
683
684
685        index = 0
686        t = self.tokenSource.nextToken()
687        while t is not None and t.type != EOF:
688            discard = False
689
690            if self.discardSet is not None and t.type in self.discardSet:
691                discard = True
692
693            elif self.discardOffChannelTokens and t.channel != self.channel:
694                discard = True
695
696            # is there a channel override for token type?
697            try:
698                overrideChannel = self.channelOverrideMap[t.type]
699
700            except KeyError:
701                # no override for this type
702                pass
703
704            else:
705                if overrideChannel == self.channel:
706                    t.channel = overrideChannel
707                else:
708                    discard = True
709
710            if not discard:
711                t.index = index
712                self.tokens.append(t)
713                index += 1
714
715            t = self.tokenSource.nextToken()
716
717        # leave p pointing at first token on channel
718        self.p = 0
719        self.p = self.skipOffTokenChannels(self.p)
720
721
722    def consume(self):
723        """
724        Move the input pointer to the next incoming token.  The stream
725        must become active with LT(1) available.  consume() simply
726        moves the input pointer so that LT(1) points at the next
727        input symbol. Consume at least one token.
728
729        Walk past any token not on the channel the parser is listening to.
730        """
731
732        if self.p < len(self.tokens):
733            self.p += 1
734
735            self.p = self.skipOffTokenChannels(self.p) # leave p on valid token
736
737
738    def skipOffTokenChannels(self, i):
739        """
740        Given a starting index, return the index of the first on-channel
741        token.
742        """
743
744        try:
745            while self.tokens[i].channel != self.channel:
746                i += 1
747        except IndexError:
748            # hit the end of token stream
749            pass
750
751        return i
752
753
754    def skipOffTokenChannelsReverse(self, i):
755        while i >= 0 and self.tokens[i].channel != self.channel:
756            i -= 1
757
758        return i
759
760
761    def setTokenTypeChannel(self, ttype, channel):
762        """
763        A simple filter mechanism whereby you can tell this token stream
764        to force all tokens of type ttype to be on channel.  For example,
765        when interpreting, we cannot exec actions so we need to tell
766        the stream to force all WS and NEWLINE to be a different, ignored
767        channel.
768	"""
769
770        self.channelOverrideMap[ttype] = channel
771
772
773    def discardTokenType(self, ttype):
774        self.discardSet.add(ttype)
775
776
777    def getTokens(self, start=None, stop=None, types=None):
778        """
779        Given a start and stop index, return a list of all tokens in
780        the token type set.  Return None if no tokens were found.  This
781        method looks at both on and off channel tokens.
782        """
783
784        if self.p == -1:
785            self.fillBuffer()
786
787        if stop is None or stop >= len(self.tokens):
788            stop = len(self.tokens) - 1
789
790        if start is None or stop < 0:
791            start = 0
792
793        if start > stop:
794            return None
795
796        if isinstance(types, (int, long)):
797            # called with a single type, wrap into set
798            types = set([types])
799
800        filteredTokens = [
801            token for token in self.tokens[start:stop]
802            if types is None or token.type in types
803            ]
804
805        if len(filteredTokens) == 0:
806            return None
807
808        return filteredTokens
809
810
811    def LT(self, k):
812        """
813        Get the ith token from the current position 1..n where k=1 is the
814        first symbol of lookahead.
815        """
816
817        if self.p == -1:
818            self.fillBuffer()
819
820        if k == 0:
821            return None
822
823        if k < 0:
824            return self.LB(-k)
825
826        i = self.p
827        n = 1
828        # find k good tokens
829        while n < k:
830            # skip off-channel tokens
831            i = self.skipOffTokenChannels(i+1) # leave p on valid token
832            n += 1
833
834        if i > self._range:
835            self._range = i
836
837        try:
838            return self.tokens[i]
839        except IndexError:
840            return self.makeEOFToken()
841
842
843    def LB(self, k):
844        """Look backwards k tokens on-channel tokens"""
845
846        if self.p == -1:
847            self.fillBuffer()
848
849        if k == 0:
850            return None
851
852        if self.p - k < 0:
853            return None
854
855        i = self.p
856        n = 1
857        # find k good tokens looking backwards
858        while n <= k:
859            # skip off-channel tokens
860            i = self.skipOffTokenChannelsReverse(i-1) # leave p on valid token
861            n += 1
862
863        if i < 0:
864            return None
865
866        return self.tokens[i]
867
868
869    def get(self, i):
870        """
871        Return absolute token i; ignore which channel the tokens are on;
872        that is, count all tokens not just on-channel tokens.
873        """
874
875        return self.tokens[i]
876
877
878    def slice(self, start, stop):
879        if self.p == -1:
880            self.fillBuffer()
881
882        if start < 0 or stop < 0:
883            return None
884
885        return self.tokens[start:stop+1]
886
887
888    def LA(self, i):
889        return self.LT(i).type
890
891
892    def mark(self):
893        self.lastMarker = self.index()
894        return self.lastMarker
895
896
897    def release(self, marker=None):
898        # no resources to release
899        pass
900
901
902    def size(self):
903        return len(self.tokens)
904
905
906    def range(self):
907        return self._range
908
909
910    def index(self):
911        return self.p
912
913
914    def rewind(self, marker=None):
915        if marker is None:
916            marker = self.lastMarker
917
918        self.seek(marker)
919
920
921    def seek(self, index):
922        self.p = index
923
924
925    def getTokenSource(self):
926        return self.tokenSource
927
928
929    def getSourceName(self):
930        return self.tokenSource.getSourceName()
931
932
933    def toString(self, start=None, stop=None):
934        if self.p == -1:
935            self.fillBuffer()
936
937        if start is None:
938            start = 0
939        elif not isinstance(start, int):
940            start = start.index
941
942        if stop is None:
943            stop = len(self.tokens) - 1
944        elif not isinstance(stop, int):
945            stop = stop.index
946
947        if stop >= len(self.tokens):
948            stop = len(self.tokens) - 1
949
950        return ''.join([t.text for t in self.tokens[start:stop+1]])
951
952
953class RewriteOperation(object):
954    """@brief Internal helper class."""
955
956    def __init__(self, stream, index, text):
957        self.stream = stream
958
959        # What index into rewrites List are we?
960        self.instructionIndex = None
961
962        # Token buffer index.
963        self.index = index
964        self.text = text
965
966    def execute(self, buf):
967        """Execute the rewrite operation by possibly adding to the buffer.
968        Return the index of the next token to operate on.
969        """
970
971        return self.index
972
973    def toString(self):
974        opName = self.__class__.__name__
975        return '<%s@%d:"%s">' % (
976            opName, self.index, self.text)
977
978    __str__ = toString
979    __repr__ = toString
980
981
982class InsertBeforeOp(RewriteOperation):
983    """@brief Internal helper class."""
984
985    def execute(self, buf):
986        buf.write(self.text)
987        if self.stream.tokens[self.index].type != EOF:
988            buf.write(self.stream.tokens[self.index].text)
989        return self.index + 1
990
991
992class ReplaceOp(RewriteOperation):
993    """
994    @brief Internal helper class.
995
996    I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
997    instructions.
998    """
999
1000    def __init__(self, stream, first, last, text):
1001        RewriteOperation.__init__(self, stream, first, text)
1002        self.lastIndex = last
1003
1004
1005    def execute(self, buf):
1006        if self.text is not None:
1007            buf.write(self.text)
1008
1009        return self.lastIndex + 1
1010
1011
1012    def toString(self):
1013        if self.text is None:
1014            return '<DeleteOp@%d..%d>' % (self.index, self.lastIndex)
1015
1016        return '<ReplaceOp@%d..%d:"%s">' % (
1017            self.index, self.lastIndex, self.text)
1018
1019    __str__ = toString
1020    __repr__ = toString
1021
1022
1023class TokenRewriteStream(CommonTokenStream):
1024    """@brief CommonTokenStream that can be modified.
1025
1026    Useful for dumping out the input stream after doing some
1027    augmentation or other manipulations.
1028
1029    You can insert stuff, replace, and delete chunks.  Note that the
1030    operations are done lazily--only if you convert the buffer to a
1031    String.  This is very efficient because you are not moving data around
1032    all the time.  As the buffer of tokens is converted to strings, the
1033    toString() method(s) check to see if there is an operation at the
1034    current index.  If so, the operation is done and then normal String
1035    rendering continues on the buffer.  This is like having multiple Turing
1036    machine instruction streams (programs) operating on a single input tape. :)
1037
1038    Since the operations are done lazily at toString-time, operations do not
1039    screw up the token index values.  That is, an insert operation at token
1040    index i does not change the index values for tokens i+1..n-1.
1041
1042    Because operations never actually alter the buffer, you may always get
1043    the original token stream back without undoing anything.  Since
1044    the instructions are queued up, you can easily simulate transactions and
1045    roll back any changes if there is an error just by removing instructions.
1046    For example,
1047
1048     CharStream input = new ANTLRFileStream("input");
1049     TLexer lex = new TLexer(input);
1050     TokenRewriteStream tokens = new TokenRewriteStream(lex);
1051     T parser = new T(tokens);
1052     parser.startRule();
1053
1054     Then in the rules, you can execute
1055        Token t,u;
1056        ...
1057        input.insertAfter(t, "text to put after t");}
1058        input.insertAfter(u, "text after u");}
1059        System.out.println(tokens.toString());
1060
1061    Actually, you have to cast the 'input' to a TokenRewriteStream. :(
1062
1063    You can also have multiple "instruction streams" and get multiple
1064    rewrites from a single pass over the input.  Just name the instruction
1065    streams and use that name again when printing the buffer.  This could be
1066    useful for generating a C file and also its header file--all from the
1067    same buffer:
1068
1069        tokens.insertAfter("pass1", t, "text to put after t");}
1070        tokens.insertAfter("pass2", u, "text after u");}
1071        System.out.println(tokens.toString("pass1"));
1072        System.out.println(tokens.toString("pass2"));
1073
1074    If you don't use named rewrite streams, a "default" stream is used as
1075    the first example shows.
1076    """
1077
1078    DEFAULT_PROGRAM_NAME = "default"
1079    MIN_TOKEN_INDEX = 0
1080
1081    def __init__(self, tokenSource=None, channel=DEFAULT_CHANNEL):
1082        CommonTokenStream.__init__(self, tokenSource, channel)
1083
1084        # You may have multiple, named streams of rewrite operations.
1085        # I'm calling these things "programs."
1086        #  Maps String (name) -> rewrite (List)
1087        self.programs = {}
1088        self.programs[self.DEFAULT_PROGRAM_NAME] = []
1089
1090 	# Map String (program name) -> Integer index
1091        self.lastRewriteTokenIndexes = {}
1092
1093
1094    def rollback(self, *args):
1095        """
1096        Rollback the instruction stream for a program so that
1097        the indicated instruction (via instructionIndex) is no
1098        longer in the stream.  UNTESTED!
1099        """
1100
1101        if len(args) == 2:
1102            programName = args[0]
1103            instructionIndex = args[1]
1104        elif len(args) == 1:
1105            programName = self.DEFAULT_PROGRAM_NAME
1106            instructionIndex = args[0]
1107        else:
1108            raise TypeError("Invalid arguments")
1109
1110        p = self.programs.get(programName, None)
1111        if p is not None:
1112            self.programs[programName] = (
1113                p[self.MIN_TOKEN_INDEX:instructionIndex])
1114
1115
1116    def deleteProgram(self, programName=DEFAULT_PROGRAM_NAME):
1117        """Reset the program so that no instructions exist"""
1118
1119        self.rollback(programName, self.MIN_TOKEN_INDEX)
1120
1121
1122    def insertAfter(self, *args):
1123        if len(args) == 2:
1124            programName = self.DEFAULT_PROGRAM_NAME
1125            index = args[0]
1126            text = args[1]
1127
1128        elif len(args) == 3:
1129            programName = args[0]
1130            index = args[1]
1131            text = args[2]
1132
1133        else:
1134            raise TypeError("Invalid arguments")
1135
1136        if isinstance(index, Token):
1137            # index is a Token, grap the stream index from it
1138            index = index.index
1139
1140        # to insert after, just insert before next index (even if past end)
1141        self.insertBefore(programName, index+1, text)
1142
1143
1144    def insertBefore(self, *args):
1145        if len(args) == 2:
1146            programName = self.DEFAULT_PROGRAM_NAME
1147            index = args[0]
1148            text = args[1]
1149
1150        elif len(args) == 3:
1151            programName = args[0]
1152            index = args[1]
1153            text = args[2]
1154
1155        else:
1156            raise TypeError("Invalid arguments")
1157
1158        if isinstance(index, Token):
1159            # index is a Token, grap the stream index from it
1160            index = index.index
1161
1162        op = InsertBeforeOp(self, index, text)
1163        rewrites = self.getProgram(programName)
1164        op.instructionIndex = len(rewrites)
1165        rewrites.append(op)
1166
1167
1168    def replace(self, *args):
1169        if len(args) == 2:
1170            programName = self.DEFAULT_PROGRAM_NAME
1171            first = args[0]
1172            last = args[0]
1173            text = args[1]
1174
1175        elif len(args) == 3:
1176            programName = self.DEFAULT_PROGRAM_NAME
1177            first = args[0]
1178            last = args[1]
1179            text = args[2]
1180
1181        elif len(args) == 4:
1182            programName = args[0]
1183            first = args[1]
1184            last = args[2]
1185            text = args[3]
1186
1187        else:
1188            raise TypeError("Invalid arguments")
1189
1190        if isinstance(first, Token):
1191            # first is a Token, grap the stream index from it
1192            first = first.index
1193
1194        if isinstance(last, Token):
1195            # last is a Token, grap the stream index from it
1196            last = last.index
1197
1198        if first > last or first < 0 or last < 0 or last >= len(self.tokens):
1199            raise ValueError(
1200                "replace: range invalid: %d..%d (size=%d)"
1201                % (first, last, len(self.tokens)))
1202
1203        op = ReplaceOp(self, first, last, text)
1204        rewrites = self.getProgram(programName)
1205        op.instructionIndex = len(rewrites)
1206        rewrites.append(op)
1207
1208
1209    def delete(self, *args):
1210        self.replace(*(list(args) + [None]))
1211
1212
1213    def getLastRewriteTokenIndex(self, programName=DEFAULT_PROGRAM_NAME):
1214        return self.lastRewriteTokenIndexes.get(programName, -1)
1215
1216
1217    def setLastRewriteTokenIndex(self, programName, i):
1218        self.lastRewriteTokenIndexes[programName] = i
1219
1220
1221    def getProgram(self, name):
1222        p = self.programs.get(name, None)
1223        if p is  None:
1224            p = self.initializeProgram(name)
1225
1226        return p
1227
1228
1229    def initializeProgram(self, name):
1230        p = []
1231        self.programs[name] = p
1232        return p
1233
1234
1235    def toOriginalString(self, start=None, end=None):
1236        if self.p == -1:
1237            self.fillBuffer()
1238
1239        if start is None:
1240            start = self.MIN_TOKEN_INDEX
1241        if end is None:
1242            end = self.size() - 1
1243
1244        buf = StringIO()
1245        i = start
1246        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1247            if self.get(i).type != EOF:
1248                buf.write(self.get(i).text)
1249            i += 1
1250
1251        return buf.getvalue()
1252
1253
1254    def toString(self, *args):
1255        if self.p == -1:
1256            self.fillBuffer()
1257
1258        if len(args) == 0:
1259            programName = self.DEFAULT_PROGRAM_NAME
1260            start = self.MIN_TOKEN_INDEX
1261            end = self.size() - 1
1262
1263        elif len(args) == 1:
1264            programName = args[0]
1265            start = self.MIN_TOKEN_INDEX
1266            end = self.size() - 1
1267
1268        elif len(args) == 2:
1269            programName = self.DEFAULT_PROGRAM_NAME
1270            start = args[0]
1271            end = args[1]
1272
1273        if start is None:
1274            start = self.MIN_TOKEN_INDEX
1275        elif not isinstance(start, int):
1276            start = start.index
1277
1278        if end is None:
1279            end = len(self.tokens) - 1
1280        elif not isinstance(end, int):
1281            end = end.index
1282
1283        # ensure start/end are in range
1284        if end >= len(self.tokens):
1285            end = len(self.tokens) - 1
1286
1287        if start < 0:
1288            start = 0
1289
1290        rewrites = self.programs.get(programName)
1291        if rewrites is None or len(rewrites) == 0:
1292            # no instructions to execute
1293            return self.toOriginalString(start, end)
1294
1295        buf = StringIO()
1296
1297        # First, optimize instruction stream
1298        indexToOp = self.reduceToSingleOperationPerIndex(rewrites)
1299
1300        # Walk buffer, executing instructions and emitting tokens
1301        i = start
1302        while i <= end and i < len(self.tokens):
1303            op = indexToOp.get(i)
1304            # remove so any left have index size-1
1305            try:
1306                del indexToOp[i]
1307            except KeyError:
1308                pass
1309
1310            t = self.tokens[i]
1311            if op is None:
1312                # no operation at that index, just dump token
1313                if t.type != EOF:
1314                    buf.write(t.text)
1315                i += 1 # move to next token
1316
1317            else:
1318                i = op.execute(buf) # execute operation and skip
1319
1320        # include stuff after end if it's last index in buffer
1321        # So, if they did an insertAfter(lastValidIndex, "foo"), include
1322        # foo if end==lastValidIndex.
1323        if end == len(self.tokens) - 1:
1324            # Scan any remaining operations after last token
1325            # should be included (they will be inserts).
1326            for i in sorted(indexToOp.keys()):
1327                op = indexToOp[i]
1328                if op.index >= len(self.tokens)-1:
1329                    buf.write(op.text)
1330
1331        return buf.getvalue()
1332
1333    __str__ = toString
1334
1335
1336    def reduceToSingleOperationPerIndex(self, rewrites):
1337        """
1338        We need to combine operations and report invalid operations (like
1339        overlapping replaces that are not completed nested).  Inserts to
1340        same index need to be combined etc...   Here are the cases:
1341
1342        I.i.u I.j.v                           leave alone, nonoverlapping
1343        I.i.u I.i.v                           combine: Iivu
1344
1345        R.i-j.u R.x-y.v | i-j in x-y          delete first R
1346        R.i-j.u R.i-j.v                       delete first R
1347        R.i-j.u R.x-y.v | x-y in i-j          ERROR
1348        R.i-j.u R.x-y.v | boundaries overlap  ERROR
1349
1350        Delete special case of replace (text==null):
1351        D.i-j.u D.x-y.v |                     boundaries overlapcombine to
1352                                              max(min)..max(right)
1353
1354        I.i.u R.x-y.v   |                     i in (x+1)-ydelete I (since
1355                                              insert before we're not deleting
1356                                              i)
1357        I.i.u R.x-y.v   |                     i not in (x+1)-yleave alone,
1358                                              nonoverlapping
1359
1360        R.x-y.v I.i.u   | i in x-y            ERROR
1361        R.x-y.v I.x.u                         R.x-y.uv (combine, delete I)
1362        R.x-y.v I.i.u   | i not in x-y        leave alone, nonoverlapping
1363
1364        I.i.u = insert u before op @ index i
1365        R.x-y.u = replace x-y indexed tokens with u
1366
1367        First we need to examine replaces.  For any replace op:
1368
1369          1. wipe out any insertions before op within that range.
1370          2. Drop any replace op before that is contained completely within
1371             that range.
1372          3. Throw exception upon boundary overlap with any previous replace.
1373
1374        Then we can deal with inserts:
1375
1376          1. for any inserts to same index, combine even if not adjacent.
1377          2. for any prior replace with same left boundary, combine this
1378             insert with replace and delete this replace.
1379          3. throw exception if index in same range as previous replace
1380
1381        Don't actually delete; make op null in list. Easier to walk list.
1382        Later we can throw as we add to index -> op map.
1383
1384        Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
1385        inserted stuff would be before the replace range.  But, if you
1386        add tokens in front of a method body '{' and then delete the method
1387        body, I think the stuff before the '{' you added should disappear too.
1388
1389        Return a map from token index to operation.
1390        """
1391
1392        # WALK REPLACES
1393        for i, rop in enumerate(rewrites):
1394            if rop is None:
1395                continue
1396
1397            if not isinstance(rop, ReplaceOp):
1398                continue
1399
1400            # Wipe prior inserts within range
1401            for j, iop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1402                if iop.index == rop.index:
1403                    # E.g., insert before 2, delete 2..2; update replace
1404                    # text to include insert before, kill insert
1405                    rewrites[iop.instructionIndex] = None
1406                    rop.text = self.catOpText(iop.text, rop.text)
1407
1408                elif iop.index > rop.index and iop.index <= rop.lastIndex:
1409                    # delete insert as it's a no-op.
1410                    rewrites[j] = None
1411
1412            # Drop any prior replaces contained within
1413            for j, prevRop in self.getKindOfOps(rewrites, ReplaceOp, i):
1414                if (prevRop.index >= rop.index
1415                    and prevRop.lastIndex <= rop.lastIndex):
1416                    # delete replace as it's a no-op.
1417                    rewrites[j] = None
1418                    continue
1419
1420                # throw exception unless disjoint or identical
1421                disjoint = (prevRop.lastIndex < rop.index
1422                            or prevRop.index > rop.lastIndex)
1423                same = (prevRop.index == rop.index
1424                        and prevRop.lastIndex == rop.lastIndex)
1425
1426                # Delete special case of replace (text==null):
1427                # D.i-j.u D.x-y.v| boundaries overlapcombine to
1428                # max(min)..max(right)
1429                if prevRop.text is None and rop.text is None and not disjoint:
1430                    # kill first delete
1431                    rewrites[prevRop.instructionIndex] = None
1432
1433                    rop.index = min(prevRop.index, rop.index)
1434                    rop.lastIndex = max(prevRop.lastIndex, rop.lastIndex)
1435
1436                elif not disjoint and not same:
1437                    raise ValueError(
1438                        "replace op boundaries of %s overlap with previous %s"
1439                        % (rop, prevRop))
1440
1441        # WALK INSERTS
1442        for i, iop in enumerate(rewrites):
1443            if iop is None:
1444                continue
1445
1446            if not isinstance(iop, InsertBeforeOp):
1447                continue
1448
1449            # combine current insert with prior if any at same index
1450            for j, prevIop in self.getKindOfOps(rewrites, InsertBeforeOp, i):
1451                if prevIop.index == iop.index: # combine objects
1452                    # convert to strings...we're in process of toString'ing
1453                    # whole token buffer so no lazy eval issue with any
1454                    # templates
1455                    iop.text = self.catOpText(iop.text, prevIop.text)
1456                    # delete redundant prior insert
1457                    rewrites[j] = None
1458
1459            # look for replaces where iop.index is in range; error
1460            for j, rop in self.getKindOfOps(rewrites, ReplaceOp, i):
1461                if iop.index == rop.index:
1462                    rop.text = self.catOpText(iop.text, rop.text)
1463                    # delete current insert
1464                    rewrites[i] = None
1465                    continue
1466
1467                if iop.index >= rop.index and iop.index <= rop.lastIndex:
1468                    raise ValueError(
1469                        "insert op %s within boundaries of previous %s"
1470                        % (iop, rop))
1471
1472        m = {}
1473        for i, op in enumerate(rewrites):
1474            if op is None:
1475                # ignore deleted ops
1476                continue
1477
1478            assert op.index not in m, "should only be one op per index"
1479            m[op.index] = op
1480
1481        return m
1482
1483
1484    def catOpText(self, a, b):
1485        x = ""
1486        y = ""
1487        if a is not None:
1488            x = a
1489        if b is not None:
1490            y = b
1491        return x + y
1492
1493
1494    def getKindOfOps(self, rewrites, kind, before=None):
1495        """Get all operations before an index of a particular kind."""
1496
1497        if before is None:
1498            before = len(rewrites)
1499        elif before > len(rewrites):
1500            before = len(rewrites)
1501
1502        for i, op in enumerate(rewrites[:before]):
1503            if op is None:
1504                # ignore deleted
1505                continue
1506            if op.__class__ == kind:
1507                yield i, op
1508
1509
1510    def toDebugString(self, start=None, end=None):
1511        if start is None:
1512            start = self.MIN_TOKEN_INDEX
1513        if end is None:
1514            end = self.size() - 1
1515
1516        buf = StringIO()
1517        i = start
1518        while i >= self.MIN_TOKEN_INDEX and i <= end and i < len(self.tokens):
1519            buf.write(self.get(i))
1520            i += 1
1521
1522        return buf.getvalue()
1523