/* * [The "BSD licence"] * Copyright (c) 2005-2008 Terence Parr * All rights reserved. * * Conversion to C#: * Copyright (c) 2008-2009 Sam Harwell, Pixel Mine, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ namespace Antlr.Runtime { using ConditionalAttribute = System.Diagnostics.ConditionalAttribute; /** * A lexer is recognizer that draws input symbols from a character stream. * lexer grammars result in a subclass of this object. A Lexer object * uses simplified match() and error recovery mechanisms in the interest * of speed. * */ public abstract class Lexer : BaseRecognizer, ITokenSource { /** Where is the lexer drawing characters from? */ protected ICharStream input; public Lexer() { } public Lexer( ICharStream input ) { this.input = input; } public Lexer( ICharStream input, RecognizerSharedState state ) : base(state) { this.input = input; } #region Properties public string Text { /** Return the text matched so far for the current token or any text override. */ get { if ( state.text != null ) { return state.text; } return input.Substring( state.tokenStartCharIndex, CharIndex - state.tokenStartCharIndex ); } /** Set the complete text of this token; it wipes any previous changes to the text. */ set { state.text = value; } } public int Line { get { return input.Line; } set { input.Line = value; } } public int CharPositionInLine { get { return input.CharPositionInLine; } set { input.CharPositionInLine = value; } } #endregion public override void Reset() { base.Reset(); // reset all recognizer state variables // wack Lexer state variables if ( input != null ) { input.Seek( 0 ); // rewind the input } if ( state == null ) { return; // no shared state work to do } state.token = null; state.type = TokenTypes.Invalid; state.channel = TokenChannels.Default; state.tokenStartCharIndex = -1; state.tokenStartCharPositionInLine = -1; state.tokenStartLine = -1; state.text = null; } /** Return a token from this source; i.e., match a token on the char stream. */ public virtual IToken NextToken() { for ( ; ; ) { state.token = null; state.channel = TokenChannels.Default; state.tokenStartCharIndex = input.Index; state.tokenStartCharPositionInLine = input.CharPositionInLine; state.tokenStartLine = input.Line; state.text = null; if ( input.LA( 1 ) == CharStreamConstants.EndOfFile ) { IToken eof = new CommonToken((ICharStream)input, CharStreamConstants.EndOfFile, TokenChannels.Default, input.Index, input.Index); eof.Line = Line; eof.CharPositionInLine = CharPositionInLine; return eof; } try { ParseNextToken(); if ( state.token == null ) { Emit(); } else if ( state.token == Tokens.Skip ) { continue; } return state.token; } catch (MismatchedRangeException mre) { ReportError(mre); // MatchRange() routine has already called recover() } catch (MismatchedTokenException mte) { ReportError(mte); // Match() routine has already called recover() } catch ( RecognitionException re ) { ReportError( re ); Recover( re ); // throw out current char and try again } } } /** * Instruct the lexer to skip creating a token for current lexer rule * and look for another token. nextToken() knows to keep looking when * a lexer rule finishes with token set to SKIP_TOKEN. Recall that * if token==null at end of any token rule, it creates one for you * and emits it. * */ public virtual void Skip() { state.token = Tokens.Skip; } /** This is the lexer entry point that sets instance var 'token' */ public abstract void mTokens(); public virtual ICharStream CharStream { get { return input; } /** Set the char stream and reset the lexer */ set { input = null; Reset(); input = value; } } public override string SourceName { get { return input.SourceName; } } /** * Currently does not support multiple emits per nextToken invocation * for efficiency reasons. Subclass and override this method and * nextToken (to push tokens into a list and pull from that list rather * than a single variable as this implementation does). * */ public virtual void Emit( IToken token ) { state.token = token; } /** * The standard method called to automatically emit a token at the * outermost lexical rule. The token object should point into the * char buffer start..stop. If there is a text override in 'text', * use that to set the token's text. Override this method to emit * custom Token objects. * * * * If you are building trees, then you should also override * Parser or TreeParser.getMissingSymbol(). * */ public virtual IToken Emit() { IToken t = new CommonToken( input, state.type, state.channel, state.tokenStartCharIndex, CharIndex - 1 ); t.Line = state.tokenStartLine; t.Text = state.text; t.CharPositionInLine = state.tokenStartCharPositionInLine; Emit( t ); return t; } public virtual void Match( string s ) { int i = 0; while ( i < s.Length ) { if ( input.LA( 1 ) != s[i] ) { if ( state.backtracking > 0 ) { state.failed = true; return; } MismatchedTokenException mte = new MismatchedTokenException(s[i], input, TokenNames); Recover( mte ); throw mte; } i++; input.Consume(); state.failed = false; } } public virtual void MatchAny() { input.Consume(); } public virtual void Match( int c ) { if ( input.LA( 1 ) != c ) { if ( state.backtracking > 0 ) { state.failed = true; return; } MismatchedTokenException mte = new MismatchedTokenException(c, input, TokenNames); Recover( mte ); // don't really recover; just consume in lexer throw mte; } input.Consume(); state.failed = false; } public virtual void MatchRange( int a, int b ) { if ( input.LA( 1 ) < a || input.LA( 1 ) > b ) { if ( state.backtracking > 0 ) { state.failed = true; return; } MismatchedRangeException mre = new MismatchedRangeException(a, b, input); Recover( mre ); throw mre; } input.Consume(); state.failed = false; } /** What is the index of the current character of lookahead? */ public virtual int CharIndex { get { return input.Index; } } public override void ReportError( RecognitionException e ) { /** TODO: not thought about recovery in lexer yet. * // if we've already reported an error and have not matched a token // yet successfully, don't report any errors. if ( errorRecovery ) { //System.err.print("[SPURIOUS] "); return; } errorRecovery = true; */ DisplayRecognitionError( this.TokenNames, e ); } public override string GetErrorMessage( RecognitionException e, string[] tokenNames ) { string msg = null; if ( e is MismatchedTokenException ) { MismatchedTokenException mte = (MismatchedTokenException)e; msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting " + GetCharErrorDisplay( mte.Expecting ); } else if ( e is NoViableAltException ) { NoViableAltException nvae = (NoViableAltException)e; // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" // and "(decision="+nvae.decisionNumber+") and // "state "+nvae.stateNumber msg = "no viable alternative at character " + GetCharErrorDisplay( e.Character ); } else if ( e is EarlyExitException ) { EarlyExitException eee = (EarlyExitException)e; // for development, can add "(decision="+eee.decisionNumber+")" msg = "required (...)+ loop did not match anything at character " + GetCharErrorDisplay( e.Character ); } else if ( e is MismatchedNotSetException ) { MismatchedNotSetException mse = (MismatchedNotSetException)e; msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; } else if ( e is MismatchedSetException ) { MismatchedSetException mse = (MismatchedSetException)e; msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + mse.Expecting; } else if ( e is MismatchedRangeException ) { MismatchedRangeException mre = (MismatchedRangeException)e; msg = "mismatched character " + GetCharErrorDisplay( e.Character ) + " expecting set " + GetCharErrorDisplay( mre.A ) + ".." + GetCharErrorDisplay( mre.B ); } else { msg = base.GetErrorMessage( e, tokenNames ); } return msg; } public virtual string GetCharErrorDisplay( int c ) { string s = ( (char)c ).ToString(); switch ( c ) { case TokenTypes.EndOfFile: s = ""; break; case '\n': s = "\\n"; break; case '\t': s = "\\t"; break; case '\r': s = "\\r"; break; } return "'" + s + "'"; } /** * Lexers can normally match any char in it's vocabulary after matching * a token, so do the easy thing and just kill a character and hope * it all works out. You can instead use the rule invocation stack * to do sophisticated error recovery if you are in a fragment rule. * */ public virtual void Recover( RecognitionException re ) { //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); //re.printStackTrace(); input.Consume(); } [Conditional("ANTLR_TRACE")] public virtual void TraceIn( string ruleName, int ruleIndex ) { string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; base.TraceIn( ruleName, ruleIndex, inputSymbol ); } [Conditional("ANTLR_TRACE")] public virtual void TraceOut( string ruleName, int ruleIndex ) { string inputSymbol = ( (char)input.LT( 1 ) ) + " line=" + Line + ":" + CharPositionInLine; base.TraceOut( ruleName, ruleIndex, inputSymbol ); } protected virtual void ParseNextToken() { mTokens(); } } }