• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/** A lexer is recognizer that draws input symbols from a character stream.
2 *  lexer grammars result in a subclass of this object. A Lexer object
3 *  uses simplified match() and error recovery mechanisms in the interest
4 *  of speed.
5 */
6org.antlr.runtime.Lexer = function(input, state) {
7    if (state) {
8        org.antlr.runtime.Lexer.superclass.constructor.call(this, state);
9    }
10    if (input) {
11        this.input = input;
12    }
13};
14
15org.antlr.lang.extend(org.antlr.runtime.Lexer, org.antlr.runtime.BaseRecognizer, {
16    reset: function() {
17        // reset all recognizer state variables
18        org.antlr.runtime.Lexer.superclass.reset.call(this);
19        if ( org.antlr.lang.isValue(this.input) ) {
20            this.input.seek(0); // rewind the input
21        }
22        if ( !org.antlr.lang.isValue(this.state) ) {
23            return; // no shared state work to do
24        }
25        this.state.token = null;
26        this.state.type = org.antlr.runtime.Token.INVALID_TOKEN_TYPE;
27        this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
28        this.state.tokenStartCharIndex = -1;
29        this.state.tokenStartCharPositionInLine = -1;
30        this.state.tokenStartLine = -1;
31        this.state.text = null;
32    },
33
34    /** Return a token from this source; i.e., match a token on the char
35     *  stream.
36     */
37    nextToken: function() {
38        while (true) {
39            this.state.token = null;
40            this.state.channel = org.antlr.runtime.Token.DEFAULT_CHANNEL;
41            this.state.tokenStartCharIndex = this.input.index();
42            this.state.tokenStartCharPositionInLine = this.input.getCharPositionInLine();
43            this.state.tokenStartLine = this.input.getLine();
44            this.state.text = null;
45            if ( this.input.LA(1)===org.antlr.runtime.CharStream.EOF ) {
46                return org.antlr.runtime.Token.EOF_TOKEN;
47            }
48            try {
49                this.mTokens();
50                if ( !org.antlr.lang.isValue(this.state.token) ) {
51                    this.emit();
52                }
53                else if ( this.state.token==org.antlr.runtime.Token.SKIP_TOKEN ) {
54                    continue;
55                }
56                return this.state.token;
57            }
58            catch (re) {
59                if (re instanceof org.antlr.runtime.NoViableAltException) {
60                    this.reportError(re);
61                    this.recover(re);
62                } else if ( re instanceof org.antlr.runtime.RecognitionException ) {
63                    this.reportError(re);
64                } else {
65                    throw re;
66                }
67            }
68        }
69    },
70
71    /** Instruct the lexer to skip creating a token for current lexer rule
72     *  and look for another token.  nextToken() knows to keep looking when
73     *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
74     *  if token==null at end of any token rule, it creates one for you
75     *  and emits it.
76     */
77    skip: function() {
78        this.state.token = org.antlr.runtime.Token.SKIP_TOKEN;
79    },
80
81    /** Set the char stream and reset the lexer */
82    setCharStream: function(input) {
83        this.input = null;
84        this.reset();
85        this.input = input;
86    },
87
88    getCharStream: function() {
89        return this.input;
90    },
91
92    getSourceName: function() {
93        return this.input.getSourceName();
94    },
95
96    /** Currently does not support multiple emits per nextToken invocation
97     *  for efficiency reasons.  Subclass and override this method and
98     *  nextToken (to push tokens into a list and pull from that list rather
99     *  than a single variable as this implementation does).
100     *
101     *  The standard method called to automatically emit a token at the
102     *  outermost lexical rule.  The token object should point into the
103     *  char buffer start..stop.  If there is a text override in 'text',
104     *  use that to set the token's text.  Override this method to emit
105     *  custom Token objects.
106     *
107     *  If you are building trees, then you should also override
108     *  Parser or TreeParser.getMissingSymbol().
109     */
110    emit: function() {
111        if (arguments.length===0) {
112            var t = new org.antlr.runtime.CommonToken(this.input, this.state.type, this.state.channel, this.state.tokenStartCharIndex, this.getCharIndex()-1);
113            t.setLine(this.state.tokenStartLine);
114            t.setText(this.state.text);
115            t.setCharPositionInLine(this.state.tokenStartCharPositionInLine);
116            this.state.token = t;
117            return t;
118        } else {
119            this.state.token = arguments[0];
120        }
121    },
122
123    match: function(s) {
124        var i = 0,
125            mte;
126
127        if (org.antlr.lang.isString(s)) {
128            while ( i<s.length ) {
129                if ( this.input.LA(1)!=s.charAt(i) ) {
130                    if ( this.state.backtracking>0 ) {
131                        this.state.failed = true;
132                        return;
133                    }
134                    mte = new org.antlr.runtime.MismatchedTokenException(s.charAt(i), this.input);
135                    this.recover(mte);
136                    throw mte;
137                }
138                i++;
139                this.input.consume();
140                this.state.failed = false;
141            }
142        } else if (org.antlr.lang.isNumber(s)) {
143            if ( this.input.LA(1)!=s ) {
144                if ( this.state.backtracking>0 ) {
145                    this.state.failed = true;
146                    return;
147                }
148                mte = new org.antlr.runtime.MismatchedTokenException(s, this.input);
149                this.recover(mte);
150                throw mte;
151            }
152            this.input.consume();
153            this.state.failed = false;
154        }
155    },
156
157    matchAny: function() {
158        this.input.consume();
159    },
160
161    matchRange: function(a, b) {
162        if ( this.input.LA(1)<a || this.input.LA(1)>b ) {
163            if ( this.state.backtracking>0 ) {
164                this.state.failed = true;
165                return;
166            }
167            var mre = new org.antlr.runtime.MismatchedRangeException(a,b,this.input);
168            this.recover(mre);
169            throw mre;
170        }
171        this.input.consume();
172        this.state.failed = false;
173    },
174
175    getLine: function() {
176        return this.input.getLine();
177    },
178
179    getCharPositionInLine: function() {
180        return this.input.getCharPositionInLine();
181    },
182
183    /** What is the index of the current character of lookahead? */
184    getCharIndex: function() {
185        return this.input.index();
186    },
187
188    /** Return the text matched so far for the current token or any
189     *  text override.
190     */
191    getText: function() {
192        if ( org.antlr.lang.isString(this.state.text) ) {
193            return this.state.text;
194        }
195        return this.input.substring(this.state.tokenStartCharIndex,this.getCharIndex()-1);
196    },
197
198    /** Set the complete text of this token; it wipes any previous
199     *  changes to the text.
200     */
201    setText: function(text) {
202        this.state.text = text;
203    },
204
205    reportError: function(e) {
206        /** TODO: not thought about recovery in lexer yet.
207         *
208        // if we've already reported an error and have not matched a token
209        // yet successfully, don't report any errors.
210        if ( errorRecovery ) {
211            //System.err.print("[SPURIOUS] ");
212            return;
213        }
214        errorRecovery = true;
215         */
216
217        this.displayRecognitionError(this.getTokenNames(), e);
218    },
219
220    getErrorMessage: function(e, tokenNames) {
221        var msg = null;
222        if ( e instanceof org.antlr.runtime.MismatchedTokenException ) {
223            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting "+this.getCharErrorDisplay(e.expecting);
224        }
225        else if ( e instanceof org.antlr.runtime.NoViableAltException ) {
226            msg = "no viable alternative at character "+this.getCharErrorDisplay(e.c);
227        }
228        else if ( e instanceof org.antlr.runtime.EarlyExitException ) {
229            msg = "required (...)+ loop did not match anything at character "+this.getCharErrorDisplay(e.c);
230        }
231        else if ( e instanceof org.antlr.runtime.MismatchedNotSetException ) {
232            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
233        }
234        else if ( e instanceof org.antlr.runtime.MismatchedSetException ) {
235            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+e.expecting;
236        }
237        else if ( e instanceof org.antlr.runtime.MismatchedRangeException ) {
238            msg = "mismatched character "+this.getCharErrorDisplay(e.c)+" expecting set "+
239                this.getCharErrorDisplay(e.a)+".."+this.getCharErrorDisplay(e.b);
240        }
241        else {
242            msg = org.antlr.runtime.Lexer.superclass.getErrorMessage.call(this, e, tokenNames);
243        }
244        return msg;
245    },
246
247    getCharErrorDisplay: function(c) {
248        var s = c; //String.fromCharCode(c);
249        switch ( s ) {
250            case org.antlr.runtime.Token.EOF :
251                s = "<EOF>";
252                break;
253            case "\n" :
254                s = "\\n";
255                break;
256            case "\t" :
257                s = "\\t";
258                break;
259            case "\r" :
260                s = "\\r";
261                break;
262        }
263        return "'"+s+"'";
264    },
265
266    /** Lexers can normally match any char in it's vocabulary after matching
267     *  a token, so do the easy thing and just kill a character and hope
268     *  it all works out.  You can instead use the rule invocation stack
269     *  to do sophisticated error recovery if you are in a fragment rule.
270     */
271    recover: function(re) {
272        this.input.consume();
273    },
274
275    traceIn: function(ruleName, ruleIndex)  {
276        var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
277        org.antlr.runtime.Lexer.superclass.traceIn.call(this, ruleName, ruleIndex, inputSymbol);
278    },
279
280    traceOut: function(ruleName, ruleIndex)  {
281		var inputSymbol = String.fromCharCode(this.input.LT(1))+" line="+this.getLine()+":"+this.getCharPositionInLine();
282		org.antlr.runtime.Lexer.superclass.traceOut.call(this, ruleName, ruleIndex, inputSymbol);
283	}
284});
285