unit Antlr.Runtime;
(*
[The "BSD licence"]
Copyright (c) 2008 Erik van Bilsen
Copyright (c) 2005-2007 Kunle Odutola
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code MUST RETAIN the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form MUST REPRODUCE the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior WRITTEN permission.
4. Unless explicitly state otherwise, any contribution intentionally
submitted for inclusion in this work to the copyright owner or licensor
shall be under the terms and conditions of this license, without any
additional terms or conditions.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*)
interface
{$IF CompilerVersion < 20}
{$MESSAGE ERROR 'You need Delphi 2009 or higher to use the Antlr runtime'}
{$IFEND}
uses
SysUtils,
Classes,
Generics.Defaults,
Generics.Collections,
Antlr.Runtime.Tools,
Antlr.Runtime.Collections;
type
TCharStreamConstants = (cscEOF = -1);
type
ERecognitionException = class;
ENoViableAltException = class;
///
/// A simple stream of integers. This is useful when all we care about is the char
/// or token type sequence (such as for interpretation).
///
IIntStream = interface(IANTLRInterface)
['{6B851BDB-DD9C-422B-AD1E-567E52D2654F}']
{ Property accessors }
function GetSourceName: String;
{ Methods }
///
/// Advances the read position of the stream. Updates line and column state
///
procedure Consume;
///
/// Get int at current input pointer + I ahead (where I=1 is next int)
/// Negative indexes are allowed. LA(-1) is previous token (token just matched).
/// LA(-i) where i is before first token should yield -1, invalid char or EOF.
///
function LA(I: Integer): Integer;
function LAChar(I: Integer): Char;
/// Tell the stream to start buffering if it hasn't already.
///
/// Executing Rewind(Mark()) on a stream should not affect the input position.
/// The Lexer tracks line/col info as well as input index so its markers are
/// not pure input indexes. Same for tree node streams. */
///
/// Return a marker that can be passed to
/// to return to the current position.
/// This could be the current input position, a value return from
/// , or some other marker.
function Mark: Integer;
///
/// Return the current input symbol index 0..N where N indicates the
/// last symbol has been read. The index is the symbol about to be
/// read not the most recently read symbol.
///
function Index: Integer;
///
/// Resets the stream so that the next call to
/// would return marker.
///
///
/// The marker will usually be but
/// it doesn't have to be. It's just a marker to indicate what
/// state the stream was in. This is essentially calling
/// and .
/// If there are other markers created after the specified marker,
/// this routine must unroll them like a stack. Assumes the state the
/// stream was in when this marker was created.
///
procedure Rewind(const Marker: Integer); overload;
///
/// Rewind to the input position of the last marker.
///
///
/// Used currently only after a cyclic DFA and just before starting
/// a sem/syn predicate to get the input position back to the start
/// of the decision. Do not "pop" the marker off the state. Mark(I)
/// and Rewind(I) should balance still. It is like invoking
/// Rewind(last marker) but it should not "pop" the marker off.
/// It's like Seek(last marker's input position).
///
procedure Rewind; overload;
///
/// You may want to commit to a backtrack but don't want to force the
/// stream to keep bookkeeping objects around for a marker that is
/// no longer necessary. This will have the same behavior as
/// except it releases resources without
/// the backward seek.
///
///
/// This must throw away resources for all markers back to the marker
/// argument. So if you're nested 5 levels of Mark(), and then Release(2)
/// you have to release resources for depths 2..5.
///
procedure Release(const Marker: Integer);
///
/// Set the input cursor to the position indicated by index. This is
/// normally used to seek ahead in the input stream.
///
///
/// No buffering is required to do this unless you know your stream
/// will use seek to move backwards such as when backtracking.
///
/// This is different from rewind in its multi-directional requirement
/// and in that its argument is strictly an input cursor (index).
///
/// For char streams, seeking forward must update the stream state such
/// as line number. For seeking backwards, you will be presumably
/// backtracking using the
/// /
/// mechanism that restores state and so this method does not need to
/// update state when seeking backwards.
///
/// Currently, this method is only used for efficient backtracking using
/// memoization, but in the future it may be used for incremental parsing.
///
/// The index is 0..N-1. A seek to position i means that LA(1) will return
/// the ith symbol. So, seeking to 0 means LA(1) will return the first
/// element in the stream.
///
procedure Seek(const Index: Integer);
/// Returns the size of the entire stream.
///
/// Only makes sense for streams that buffer everything up probably,
/// but might be useful to display the entire stream or for testing.
/// This value includes a single EOF.
///
function Size: Integer;
{ Properties }
///
/// Where are you getting symbols from? Normally, implementations will
/// pass the buck all the way to the lexer who can ask its input stream
/// for the file name or whatever.
///
property SourceName: String read GetSourceName;
end;
/// A source of characters for an ANTLR lexer
ICharStream = interface(IIntStream)
['{C30EF0DB-F4BD-4CBC-8C8F-828DABB6FF36}']
{ Property accessors }
function GetLine: Integer;
procedure SetLine(const Value: Integer);
function GetCharPositionInLine: Integer;
procedure SetCharPositionInLine(const Value: Integer);
{ Methods }
///
/// Get the ith character of lookahead. This is usually the same as
/// LA(I). This will be used for labels in the generated lexer code.
/// I'd prefer to return a char here type-wise, but it's probably
/// better to be 32-bit clean and be consistent with LA.
///
function LT(const I: Integer): Integer;
///
/// This primarily a useful interface for action code (just make sure
/// actions don't use this on streams that don't support it).
/// For infinite streams, you don't need this.
///
function Substring(const Start, Stop: Integer): String;
{ Properties }
///
/// The current line in the character stream (ANTLR tracks the
/// line information automatically. To support rewinding character
/// streams, we are able to [re-]set the line.
///
property Line: Integer read GetLine write SetLine;
///
/// The index of the character relative to the beginning of the
/// line (0..N-1). To support rewinding character streams, we are
/// able to [re-]set the character position.
///
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
end;
IToken = interface(IANTLRInterface)
['{73BF129C-2F45-4C68-838E-BF5D3536AC6D}']
{ Property accessors }
function GetTokenType: Integer;
procedure SetTokenType(const Value: Integer);
function GetLine: Integer;
procedure SetLine(const Value: Integer);
function GetCharPositionInLine: Integer;
procedure SetCharPositionInLine(const Value: Integer);
function GetChannel: Integer;
procedure SetChannel(const Value: Integer);
function GetTokenIndex: Integer;
procedure SetTokenIndex(const Value: Integer);
function GetText: String;
procedure SetText(const Value: String);
{ Properties }
property TokenType: Integer read GetTokenType write SetTokenType;
/// The line number on which this token was matched; line=1..N
property Line: Integer read GetLine write SetLine;
///
/// The index of the first character relative to the beginning of the line 0..N-1
///
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
/// The line number on which this token was matched; line=1..N
property Channel: Integer read GetChannel write SetChannel;
///
/// An index from 0..N-1 of the token object in the input stream
///
///
/// This must be valid in order to use the ANTLRWorks debugger.
///
property TokenIndex: Integer read GetTokenIndex write SetTokenIndex;
/// The text of the token
///
/// When setting the text, it might be a NOP such as for the CommonToken,
/// which doesn't have string pointers, just indexes into a char buffer.
///
property Text: String read GetText write SetText;
end;
///
/// A source of tokens must provide a sequence of tokens via NextToken()
/// and also must reveal it's source of characters; CommonToken's text is
/// computed from a CharStream; it only store indices into the char stream.
///
/// Errors from the lexer are never passed to the parser. Either you want
/// to keep going or you do not upon token recognition error. If you do not
/// want to continue lexing then you do not want to continue parsing. Just
/// throw an exception not under RecognitionException and Delphi will naturally
/// toss you all the way out of the recognizers. If you want to continue
/// lexing then you should not throw an exception to the parser--it has already
/// requested a token. Keep lexing until you get a valid one. Just report
/// errors and keep going, looking for a valid token.
///
ITokenSource = interface(IANTLRInterface)
['{2C71FAD0-AEEE-417D-B576-4059F7C4CEB4}']
{ Property accessors }
function GetSourceName: String;
{ Methods }
///
/// Returns a Token object from the input stream (usually a CharStream).
/// Does not fail/return upon lexing error; just keeps chewing on the
/// characters until it gets a good one; errors are not passed through
/// to the parser.
///
function NextToken: IToken;
{ Properties }
///
/// Where are you getting tokens from? normally the implication will simply
/// ask lexers input stream.
///
property SourceName: String read GetSourceName;
end;
/// A stream of tokens accessing tokens from a TokenSource
ITokenStream = interface(IIntStream)
['{59E5B39D-31A6-496D-9FA9-AC75CC584B68}']
{ Property accessors }
function GetTokenSource: ITokenSource;
procedure SetTokenSource(const Value: ITokenSource);
{ Methods }
///
/// Get Token at current input pointer + I ahead (where I=1 is next
/// Token).
/// I < 0 indicates tokens in the past. So -1 is previous token and -2 is
/// two tokens ago. LT(0) is undefined. For I>=N, return Token.EOFToken.
/// Return null for LT(0) and any index that results in an absolute address
/// that is negative.
///
function LT(const K: Integer): IToken;
///
/// Get a token at an absolute index I; 0..N-1. This is really only
/// needed for profiling and debugging and token stream rewriting.
/// If you don't want to buffer up tokens, then this method makes no
/// sense for you. Naturally you can't use the rewrite stream feature.
/// I believe DebugTokenStream can easily be altered to not use
/// this method, removing the dependency.
///
function Get(const I: Integer): IToken;
/// Return the text of all tokens from start to stop, inclusive.
/// If the stream does not buffer all the tokens then it can just
/// return ''; Users should not access $ruleLabel.text in
/// an action of course in that case.
///
function ToString(const Start, Stop: Integer): String; overload;
/// Because the user is not required to use a token with an index stored
/// in it, we must provide a means for two token objects themselves to
/// indicate the start/end location. Most often this will just delegate
/// to the other ToString(Integer,Integer). This is also parallel with
/// the TreeNodeStream.ToString(Object,Object).
///
function ToString(const Start, Stop: IToken): String; overload;
{ Properties }
property TokenSource: ITokenSource read GetTokenSource write SetTokenSource;
end;
///
/// This is the complete state of a stream.
///
/// When walking ahead with cyclic DFA for syntactic predicates, we
/// need to record the state of the input stream (char index, line,
/// etc...) so that we can rewind the state after scanning ahead.
///
ICharStreamState = interface(IANTLRInterface)
['{62D2A1CD-ED3A-4C95-A366-AB8F2E54060B}']
{ Property accessors }
function GetP: Integer;
procedure SetP(const Value: Integer);
function GetLine: Integer;
procedure SetLine(const Value: Integer);
function GetCharPositionInLine: Integer;
procedure SetCharPositionInLine(const Value: Integer);
{ Properties }
/// Index into the char stream of next lookahead char
property P: Integer read GetP write SetP;
/// What line number is the scanner at before processing buffer[P]?
property Line: Integer read GetLine write SetLine;
/// What char position 0..N-1 in line is scanner before processing buffer[P]?
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
end;
///
/// A pretty quick that uses a character array
/// directly as it's underlying source.
///
IANTLRStringStream = interface(ICharStream)
['{2FA24299-FF97-4AB6-8CA6-5D3DA13C4AB2}']
{ Methods }
///
/// Resets the stream so that it is in the same state it was
/// when the object was created *except* the data array is not
/// touched.
///
procedure Reset;
end;
///
/// A character stream - an - that loads
/// and caches the contents of it's underlying file fully during
/// object construction
///
///
/// This looks very much like an ANTLReaderStream or an ANTLRInputStream
/// but, it is a special case. Since we know the exact size of the file to
/// load, we can avoid lots of data copying and buffer resizing.
///
IANTLRFileStream = interface(IANTLRStringStream)
['{2B0145DB-2DAA-48A0-8316-B47A69EDDD1A}']
{ Methods }
///
/// Loads and buffers the specified file to be used as this
/// ANTLRFileStream's source
///
/// File to load
/// Encoding to apply to file
procedure Load(const FileName: String; const Encoding: TEncoding);
end;
///
/// A stripped-down version of org.antlr.misc.BitSet that is just
/// good enough to handle runtime requirements such as FOLLOW sets
/// for automatic error recovery.
///
IBitSet = interface(IANTLRInterface)
['{F2045045-FC46-4779-A65D-56C65D257A8E}']
{ Property accessors }
function GetIsNil: Boolean;
{ Methods }
/// return "this or a" in a new set
function BitSetOr(const A: IBitSet): IBitSet;
/// Or this element into this set (grow as necessary to accommodate)
procedure Add(const El: Integer);
/// Grows the set to a larger number of bits.
/// element that must fit in set
///
procedure GrowToInclude(const Bit: Integer);
procedure OrInPlace(const A: IBitSet);
function Size: Integer;
function Member(const El: Integer): Boolean;
// remove this element from this set
procedure Remove(const El: Integer);
function NumBits: Integer;
/// return how much space is being used by the bits array not
/// how many actually have member bits on.
///
function LengthInLongWords: Integer;
function ToArray: TIntegerArray;
function ToPackedArray: TUInt64Array;
function ToString: String; overload;
function ToString(const TokenNames: TStringArray): String; overload;
function Equals(Obj: TObject): Boolean;
{ Properties }
property IsNil: Boolean read GetIsNil;
end;
TBitSetArray = array of IBitSet;
///
/// The set of fields needed by an abstract recognizer to recognize input
/// and recover from errors
///
///
/// As a separate state object, it can be shared among multiple grammars;
/// e.g., when one grammar imports another.
/// These fields are publicly visible but the actual state pointer per
/// parser is protected.
///
IRecognizerSharedState = interface(IANTLRInterface)
['{6CB6E17A-0B01-4AA7-8D49-5742A3CB8901}']
{ Property accessors }
function GetFollowing: TBitSetArray;
procedure SetFollowing(const Value: TBitSetArray);
function GetFollowingStackPointer: Integer;
procedure SetFollowingStackPointer(const Value: Integer);
function GetErrorRecovery: Boolean;
procedure SetErrorRecovery(const Value: Boolean);
function GetLastErrorIndex: Integer;
procedure SetLastErrorIndex(const Value: Integer);
function GetFailed: Boolean;
procedure SetFailed(const Value: Boolean);
function GetSyntaxErrors: Integer;
procedure SetSyntaxErrors(const Value: Integer);
function GetBacktracking: Integer;
procedure SetBacktracking(const Value: Integer);
function GetRuleMemo: TDictionaryArray;
function GetRuleMemoCount: Integer;
procedure SetRuleMemoCount(const Value: Integer);
function GetToken: IToken;
procedure SetToken(const Value: IToken);
function GetTokenStartCharIndex: Integer;
procedure SetTokenStartCharIndex(const Value: Integer);
function GetTokenStartLine: Integer;
procedure SetTokenStartLine(const Value: Integer);
function GetTokenStartCharPositionInLine: Integer;
procedure SetTokenStartCharPositionInLine(const Value: Integer);
function GetChannel: Integer;
procedure SetChannel(const Value: Integer);
function GetTokenType: Integer;
procedure SetTokenType(const Value: Integer);
function GetText: String;
procedure SetText(const Value: String);
{ Properties }
///
/// Tracks the set of token types that can follow any rule invocation.
/// Stack grows upwards. When it hits the max, it grows 2x in size
/// and keeps going.
///
property Following: TBitSetArray read GetFollowing write SetFollowing;
property FollowingStackPointer: Integer read GetFollowingStackPointer write SetFollowingStackPointer;
///
/// This is true when we see an error and before having successfully
/// matched a token. Prevents generation of more than one error message
/// per error.
///
property ErrorRecovery: Boolean read GetErrorRecovery write SetErrorRecovery;
///
/// The index into the input stream where the last error occurred.
///
///
/// This is used to prevent infinite loops where an error is found
/// but no token is consumed during recovery...another error is found,
/// ad naseum. This is a failsafe mechanism to guarantee that at least
/// one token/tree node is consumed for two errors.
///
property LastErrorIndex: Integer read GetLastErrorIndex write SetLastErrorIndex;
///
/// In lieu of a return value, this indicates that a rule or token
/// has failed to match. Reset to false upon valid token match.
///
property Failed: Boolean read GetFailed write SetFailed;
///
/// Did the recognizer encounter a syntax error? Track how many.
///
property SyntaxErrors: Integer read GetSyntaxErrors write SetSyntaxErrors;
///
/// If 0, no backtracking is going on. Safe to exec actions etc...
/// If >0 then it's the level of backtracking.
///
property Backtracking: Integer read GetBacktracking write SetBacktracking;
///
/// An array[size num rules] of Map<Integer,Integer> that tracks
/// the stop token index for each rule.
///
///
/// RuleMemo[RuleIndex] is the memoization table for RuleIndex.
/// For key RuleStartIndex, you get back the stop token for
/// associated rule or MEMO_RULE_FAILED.
///
/// This is only used if rule memoization is on (which it is by default).
///
property RuleMemo: TDictionaryArray read GetRuleMemo;
property RuleMemoCount: Integer read GetRuleMemoCount write SetRuleMemoCount;
// Lexer Specific Members
// LEXER FIELDS (must be in same state object to avoid casting
// constantly in generated code and Lexer object) :(
///
/// Token object normally returned by NextToken() after matching lexer rules.
///
///
/// The goal of all lexer rules/methods is to create a token object.
/// This is an instance variable as multiple rules may collaborate to
/// create a single token. NextToken will return this object after
/// matching lexer rule(s). If you subclass to allow multiple token
/// emissions, then set this to the last token to be matched or
/// something nonnull so that the auto token emit mechanism will not
/// emit another token.
///
property Token: IToken read GetToken write SetToken;
///
/// What character index in the stream did the current token start at?
///
///
/// Needed, for example, to get the text for current token. Set at
/// the start of nextToken.
///
property TokenStartCharIndex: Integer read GetTokenStartCharIndex write SetTokenStartCharIndex;
///
/// The line on which the first character of the token resides
///
property TokenStartLine: Integer read GetTokenStartLine write SetTokenStartLine;
/// The character position of first character within the line
property TokenStartCharPositionInLine: Integer read GetTokenStartCharPositionInLine write SetTokenStartCharPositionInLine;
/// The channel number for the current token
property Channel: Integer read GetChannel write SetChannel;
/// The token type for the current token
property TokenType: Integer read GetTokenType write SetTokenType;
///
/// You can set the text for the current token to override what is in
/// the input char buffer. Use setText() or can set this instance var.
///
property Text: String read GetText write SetText;
end;
ICommonToken = interface(IToken)
['{06B1B0C3-2A0D-477A-AE30-414F51ACE8A0}']
{ Property accessors }
function GetStartIndex: Integer;
procedure SetStartIndex(const Value: Integer);
function GetStopIndex: Integer;
procedure SetStopIndex(const Value: Integer);
function GetInputStream: ICharStream;
procedure SetInputStream(const Value: ICharStream);
{ Methods }
function ToString: String;
{ Properties }
property StartIndex: Integer read GetStartIndex write SetStartIndex;
property StopIndex: Integer read GetStopIndex write SetStopIndex;
property InputStream: ICharStream read GetInputStream write SetInputStream;
end;
///
/// A Token object like we'd use in ANTLR 2.x; has an actual string created
/// and associated with this object. These objects are needed for imaginary
/// tree nodes that have payload objects. We need to create a Token object
/// that has a string; the tree node will point at this token. CommonToken
/// has indexes into a char stream and hence cannot be used to introduce
/// new strings.
///
IClassicToken = interface(IToken)
{ Property accessors }
function GetTokenType: Integer;
procedure SetTokenType(const Value: Integer);
function GetLine: Integer;
procedure SetLine(const Value: Integer);
function GetCharPositionInLine: Integer;
procedure SetCharPositionInLine(const Value: Integer);
function GetChannel: Integer;
procedure SetChannel(const Value: Integer);
function GetTokenIndex: Integer;
procedure SetTokenIndex(const Value: Integer);
function GetText: String;
procedure SetText(const Value: String);
function GetInputStream: ICharStream;
procedure SetInputStream(const Value: ICharStream);
{ Properties }
property TokenType: Integer read GetTokenType write SetTokenType;
property Line: Integer read GetLine write SetLine;
property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine;
property Channel: Integer read GetChannel write SetChannel;
property TokenIndex: Integer read GetTokenIndex write SetTokenIndex;
property Text: String read GetText write SetText;
property InputStream: ICharStream read GetInputStream write SetInputStream;
end;
///
/// A generic recognizer that can handle recognizers generated from
/// lexer, parser, and tree grammars. This is all the parsing
/// support code essentially; most of it is error recovery stuff and
/// backtracking.
///
IBaseRecognizer = interface(IANTLRObject)
['{90813CE2-614B-4773-A26E-936E7DE7E9E9}']
{ Property accessors }
function GetInput: IIntStream;
function GetBacktrackingLevel: Integer;
function GetState: IRecognizerSharedState;
function GetNumberOfSyntaxErrors: Integer;
function GetGrammarFileName: String;
function GetSourceName: String;
function GetTokenNames: TStringArray;
{ Methods }
procedure BeginBacktrack(const Level: Integer);
procedure EndBacktrack(const Level: Integer; const Successful: Boolean);
/// Reset the parser's state. Subclasses must rewind the input stream.
procedure Reset;
///
/// Match current input symbol against ttype. Attempt
/// single token insertion or deletion error recovery. If
/// that fails, throw EMismatchedTokenException.
///
///
/// To turn off single token insertion or deletion error
/// recovery, override MismatchRecover() and have it call
/// plain Mismatch(), which does not recover. Then any error
/// in a rule will cause an exception and immediate exit from
/// rule. Rule would recover by resynchronizing to the set of
/// symbols that can follow rule ref.
///
function Match(const Input: IIntStream; const TokenType: Integer;
const Follow: IBitSet): IANTLRInterface;
function MismatchIsUnwantedToken(const Input: IIntStream;
const TokenType: Integer): Boolean;
function MismatchIsMissingToken(const Input: IIntStream;
const Follow: IBitSet): Boolean;
/// A hook to listen in on the token consumption during error recovery.
/// The DebugParser subclasses this to fire events to the listenter.
///
procedure BeginResync;
procedure EndResync;
///
/// Report a recognition problem.
///
///
/// This method sets errorRecovery to indicate the parser is recovering
/// not parsing. Once in recovery mode, no errors are generated.
/// To get out of recovery mode, the parser must successfully Match
/// a token (after a resync). So it will go:
///
/// 1. error occurs
/// 2. enter recovery mode, report error
/// 3. consume until token found in resynch set
/// 4. try to resume parsing
/// 5. next Match() will reset errorRecovery mode
///
/// If you override, make sure to update syntaxErrors if you care about that.
///
procedure ReportError(const E: ERecognitionException);
/// Match the wildcard: in a symbol
procedure MatchAny(const Input: IIntStream);
procedure DisplayRecognitionError(const TokenNames: TStringArray;
const E: ERecognitionException);
///
/// What error message should be generated for the various exception types?
///
/// Not very object-oriented code, but I like having all error message generation
/// within one method rather than spread among all of the exception classes. This
/// also makes it much easier for the exception handling because the exception
/// classes do not have to have pointers back to this object to access utility
/// routines and so on. Also, changing the message for an exception type would be
/// difficult because you would have to subclassing exception, but then somehow get
/// ANTLR to make those kinds of exception objects instead of the default.
///
/// This looks weird, but trust me--it makes the most sense in terms of flexibility.
///
/// For grammar debugging, you will want to override this to add more information
/// such as the stack frame with GetRuleInvocationStack(e, this.GetType().Fullname)
/// and, for no viable alts, the decision description and state etc...
///
/// Override this to change the message generated for one or more exception types.
///
function GetErrorMessage(const E: ERecognitionException;
const TokenNames: TStringArray): String;
///
/// What is the error header, normally line/character position information?
///
function GetErrorHeader(const E: ERecognitionException): String;
///
/// How should a token be displayed in an error message? The default
/// is to display just the text, but during development you might
/// want to have a lot of information spit out. Override in that case
/// to use t.ToString() (which, for CommonToken, dumps everything about
/// the token). This is better than forcing you to override a method in
/// your token objects because you don't have to go modify your lexer
/// so that it creates a new type.
///
function GetTokenErrorDisplay(const T: IToken): String;
///
/// Override this method to change where error messages go
///
procedure EmitErrorMessage(const Msg: String);
///
/// Recover from an error found on the input stream. This is
/// for NoViableAlt and mismatched symbol exceptions. If you enable
/// single token insertion and deletion, this will usually not
/// handle mismatched symbol exceptions but there could be a mismatched
/// token that the Match() routine could not recover from.
///
procedure Recover(const Input: IIntStream; const RE: ERecognitionException);
// Not currently used
function RecoverFromMismatchedSet(const Input: IIntStream;
const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface;
procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload;
/// Consume tokens until one matches the given token set
procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload;
///
/// Returns List <String> of the rules in your parser instance
/// leading up to a call to this method. You could override if
/// you want more details such as the file/line info of where
/// in the parser source code a rule is invoked.
///
///
/// NOT IMPLEMENTED IN THE DELPHI VERSION YET
/// This is very useful for error messages and for context-sensitive
/// error recovery.
///
//function GetRuleInvocationStack: IList; overload;
///
/// A more general version of GetRuleInvocationStack where you can
/// pass in, for example, a RecognitionException to get it's rule
/// stack trace. This routine is shared with all recognizers, hence,
/// static.
///
/// TODO: move to a utility class or something; weird having lexer call this
///
///
/// NOT IMPLEMENTED IN THE DELPHI VERSION YET
///
//function GetRuleInvocationStack(const E: Exception;
// const RecognizerClassName: String): IList; overload;
/// A convenience method for use most often with template rewrites.
/// Convert a List<Token> to List<String>
///
function ToStrings(const Tokens: IList): IList;
///
/// Given a rule number and a start token index number, return
/// MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
/// start index. If this rule has parsed input starting from the
/// start index before, then return where the rule stopped parsing.
/// It returns the index of the last token matched by the rule.
///
///
/// For now we use a hashtable and just the slow Object-based one.
/// Later, we can make a special one for ints and also one that
/// tosses out data after we commit past input position i.
///
function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer;
///
/// Has this rule already parsed input at the current index in the
/// input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
/// If we attempted but failed to parse properly before, return
/// MEMO_RULE_FAILED.
///
/// This method has a side-effect: if we have seen this input for
/// this rule and successfully parsed before, then seek ahead to
/// 1 past the stop token matched for this rule last time.
///
function AlreadyParsedRule(const Input: IIntStream;
const RuleIndex: Integer): Boolean;
///
/// Record whether or not this rule parsed the input at this position
/// successfully. Use a standard hashtable for now.
///
procedure Memoize(const Input: IIntStream; const RuleIndex,
RuleStartIndex: Integer);
///
/// Return how many rule/input-index pairs there are in total.
/// TODO: this includes synpreds. :(
///
///
function GetRuleMemoizationChaceSize: Integer;
procedure TraceIn(const RuleName: String; const RuleIndex: Integer;
const InputSymbol: String);
procedure TraceOut(const RuleName: String; const RuleIndex: Integer;
const InputSymbol: String);
{ Properties }
property Input: IIntStream read GetInput;
property BacktrackingLevel: Integer read GetBacktrackingLevel;
property State: IRecognizerSharedState read GetState;
///
/// Get number of recognition errors (lexer, parser, tree parser). Each
/// recognizer tracks its own number. So parser and lexer each have
/// separate count. Does not count the spurious errors found between
/// an error and next valid token match
///
/// See also ReportError()
///
property NumberOfSyntaxErrors: Integer read GetNumberOfSyntaxErrors;
///
/// For debugging and other purposes, might want the grammar name.
/// Have ANTLR generate an implementation for this property.
///
///
property GrammarFileName: String read GetGrammarFileName;
///
/// For debugging and other purposes, might want the source name.
/// Have ANTLR provide a hook for this property.
///
/// The source name
property SourceName: String read GetSourceName;
///
/// Used to print out token names like ID during debugging and
/// error reporting. The generated parsers implement a method
/// that overrides this to point to their string[] tokenNames.
///
property TokenNames: TStringArray read GetTokenNames;
end;
///
/// The most common stream of tokens is one where every token is buffered up
/// and tokens are prefiltered for a certain channel (the parser will only
/// see these tokens and cannot change the filter channel number during the
/// parse).
///
/// TODO: how to access the full token stream? How to track all tokens matched per rule?
///
ICommonTokenStream = interface(ITokenStream)
{ Methods }
///
/// A simple filter mechanism whereby you can tell this token stream
/// to force all tokens of type TType to be on Channel.
///
///
///
/// For example,
/// when interpreting, we cannot exec actions so we need to tell
/// the stream to force all WS and NEWLINE to be a different, ignored
/// channel.
///
procedure SetTokenTypeChannel(const TType, Channel: Integer);
procedure DiscardTokenType(const TType: Integer);
procedure DiscardOffChannelTokens(const Discard: Boolean);
function GetTokens: IList; overload;
function GetTokens(const Start, Stop: Integer): IList; overload;
/// Given a start and stop index, return a List of all tokens in
/// the token type BitSet. Return null if no tokens were found. This
/// method looks at both on and off channel tokens.
///
function GetTokens(const Start, Stop: Integer;
const Types: IBitSet): IList; overload;
function GetTokens(const Start, Stop: Integer;
const Types: IList): IList; overload;
function GetTokens(const Start, Stop,
TokenType: Integer): IList; overload;
procedure Reset;
end;
IDFA = interface;
TSpecialStateTransitionHandler = function(const DFA: IDFA; S: Integer;
const Input: IIntStream): Integer of Object;
///
/// A DFA implemented as a set of transition tables.
///
///
///
/// Any state that has a semantic predicate edge is special; those states are
/// generated with if-then-else structures in a SpecialStateTransition()
/// which is generated by cyclicDFA template.
///
///
/// There are at most 32767 states (16-bit signed short). Could get away with byte
/// sometimes but would have to generate different types and the simulation code too.
///
///
/// As a point of reference, the Tokens rule DFA for the lexer in the Java grammar
/// sample has approximately 326 states.
///
///
IDFA = interface(IANTLRInterface)
['{36312B59-B718-48EF-A0EC-4529DE70F4C2}']
{ Property accessors }
function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler);
{ Methods }
///
/// From the input stream, predict what alternative will succeed using this
/// DFA (representing the covering regular approximation to the underlying CFL).
///
/// Input stream
/// Return an alternative number 1..N. Throw an exception upon error.
function Predict(const Input: IIntStream): Integer;
///
/// A hook for debugging interface
///
///
procedure Error(const NVAE: ENoViableAltException);
function SpecialStateTransition(const S: Integer; const Input: IIntStream): Integer;
function Description: String;
function SpecialTransition(const State, Symbol: Integer): Integer;
{ Properties }
property SpecialStateTransitionHandler: TSpecialStateTransitionHandler read GetSpecialStateTransitionHandler write SetSpecialStateTransitionHandler;
end;
///
/// A lexer is recognizer that draws input symbols from a character stream.
/// lexer grammars result in a subclass of this object. A Lexer object
/// uses simplified Match() and error recovery mechanisms in the interest
/// of speed.
///
ILexer = interface(IBaseRecognizer)
['{331AAB49-E7CD-40E7-AEF5-427F7D6577AD}']
{ Property accessors }
function GetCharStream: ICharStream;
procedure SetCharStream(const Value: ICharStream);
function GetLine: Integer;
function GetCharPositionInLine: Integer;
function GetCharIndex: Integer;
function GetText: String;
procedure SetText(const Value: String);
{ Methods }
///
/// Return a token from this source; i.e., Match a token on the char stream.
///
function NextToken: IToken;
///
/// Instruct the lexer to skip creating a token for current lexer rule and
/// look for another token. NextToken() knows to keep looking when a lexer
/// rule finishes with token set to SKIP_TOKEN. Recall that if token==null
/// at end of any token rule, it creates one for you and emits it.
///
procedure Skip;
/// This is the lexer entry point that sets instance var 'token'
procedure DoTokens;
///
/// Currently does not support multiple emits per nextToken invocation
/// for efficiency reasons. Subclass and override this method and
/// NextToken (to push tokens into a list and pull from that list rather
/// than a single variable as this implementation does).
///
procedure Emit(const Token: IToken); overload;
///
/// The standard method called to automatically emit a token at the
/// outermost lexical rule. The token object should point into the
/// char buffer start..stop. If there is a text override in 'text',
/// use that to set the token's text.
///
/// Override this method to emit custom Token objects.
/// If you are building trees, then you should also override
/// Parser or TreeParser.GetMissingSymbol().
///
function Emit: IToken; overload;
procedure Match(const S: String); overload;
procedure Match(const C: Integer); overload;
procedure MatchAny;
procedure MatchRange(const A, B: Integer);
///
/// Lexers can normally Match any char in it's vocabulary after matching
/// a token, so do the easy thing and just kill a character and hope
/// it all works out. You can instead use the rule invocation stack
/// to do sophisticated error recovery if you are in a Fragment rule.
///
procedure Recover(const RE: ERecognitionException);
function GetCharErrorDisplay(const C: Integer): String;
procedure TraceIn(const RuleName: String; const RuleIndex: Integer);
procedure TraceOut(const RuleName: String; const RuleIndex: Integer);
{ Properties }
/// Set the char stream and reset the lexer
property CharStream: ICharStream read GetCharStream write SetCharStream;
property Line: Integer read GetLine;
property CharPositionInLine: Integer read GetCharPositionInLine;
/// What is the index of the current character of lookahead?
property CharIndex: Integer read GetCharIndex;
///
/// Gets or sets the 'lexeme' for the current token.
///
///
///
/// The getter returns the text matched so far for the current token or any
/// text override.
///
///
/// The setter sets the complete text of this token. It overrides/wipes any
/// previous changes to the text.
///
///
property Text: String read GetText write SetText;
end;
/// A parser for TokenStreams. Parser grammars result in a subclass
/// of this.
///
IParser = interface(IBaseRecognizer)
['{7420879A-5D1F-43CA-BD49-2264D7514501}']
{ Property accessors }
function GetTokenStream: ITokenStream;
procedure SetTokenStream(const Value: ITokenStream);
{ Methods }
procedure TraceIn(const RuleName: String; const RuleIndex: Integer);
procedure TraceOut(const RuleName: String; const RuleIndex: Integer);
{ Properties }
/// Set the token stream and reset the parser
property TokenStream: ITokenStream read GetTokenStream write SetTokenStream;
end;
///
/// Rules can return start/stop info as well as possible trees and templates
///
IRuleReturnScope = interface(IANTLRInterface)
['{E9870056-BF6D-4CB2-B71C-10B80797C0B4}']
{ Property accessors }
function GetStart: IANTLRInterface;
procedure SetStart(const Value: IANTLRInterface);
function GetStop: IANTLRInterface;
procedure SetStop(const Value: IANTLRInterface);
function GetTree: IANTLRInterface;
procedure SetTree(const Value: IANTLRInterface);
function GetTemplate: IANTLRInterface;
{ Properties }
/// Return the start token or tree
property Start: IANTLRInterface read GetStart write SetStart;
/// Return the stop token or tree
property Stop: IANTLRInterface read GetStop write SetStop;
/// Has a value potentially if output=AST;
property Tree: IANTLRInterface read GetTree write SetTree;
///
/// Has a value potentially if output=template;
/// Don't use StringTemplate type to avoid dependency on ST assembly
///
property Template: IANTLRInterface read GetTemplate;
end;
///
/// Rules that return more than a single value must return an object
/// containing all the values. Besides the properties defined in
/// RuleLabelScope.PredefinedRulePropertiesScope there may be user-defined
/// return values. This class simply defines the minimum properties that
/// are always defined and methods to access the others that might be
/// available depending on output option such as template and tree.
///
/// Note text is not an actual property of the return value, it is computed
/// from start and stop using the input stream's ToString() method. I
/// could add a ctor to this so that we can pass in and store the input
/// stream, but I'm not sure we want to do that. It would seem to be undefined
/// to get the .text property anyway if the rule matches tokens from multiple
/// input streams.
///
/// I do not use getters for fields of objects that are used simply to
/// group values such as this aggregate.
///
IParserRuleReturnScope = interface(IRuleReturnScope)
['{9FB62050-E23B-4FE4-87D5-2C1EE67AEC3E}']
end;
/// Useful for dumping out the input stream after doing some
/// augmentation or other manipulations.
///
///
///
/// You can insert stuff, Replace, and delete chunks. Note that the
/// operations are done lazily--only if you convert the buffer to a
/// String. This is very efficient because you are not moving data around
/// all the time. As the buffer of tokens is converted to strings, the
/// ToString() method(s) check to see if there is an operation at the
/// current index. If so, the operation is done and then normal String
/// rendering continues on the buffer. This is like having multiple Turing
/// machine instruction streams (programs) operating on a single input tape. :)
///
/// Since the operations are done lazily at ToString-time, operations do not
/// screw up the token index values. That is, an insert operation at token
/// index I does not change the index values for tokens I+1..N-1.
///
/// Because operations never actually alter the buffer, you may always get
/// the original token stream back without undoing anything. Since
/// the instructions are queued up, you can easily simulate transactions and
/// roll back any changes if there is an error just by removing instructions.
/// For example,
///
/// var
/// Input: ICharStream;
/// Lex: ILexer;
/// Tokens: ITokenRewriteStream;
/// Parser: IParser;
/// Input := TANTLRFileStream.Create('input');
/// Lex := TLexer.Create(Input);
/// Tokens := TTokenRewriteStream.Create(Lex);
/// Parser := TParser.Create(tokens);
/// Parser.startRule();
///
/// Then in the rules, you can execute
/// var
/// t,u: IToken;
/// ...
/// Input.InsertAfter(t, 'text to put after t');
/// Input.InsertAfter(u, 'text after u');
/// WriteLn(Tokens.ToString());
///
/// Actually, you have to cast the 'input' to a TokenRewriteStream. :(
///
/// You can also have multiple "instruction streams" and get multiple
/// rewrites from a single pass over the input. Just name the instruction
/// streams and use that name again when printing the buffer. This could be
/// useful for generating a C file and also its header file--all from the
/// same buffer:
///
/// Tokens.InsertAfter('pass1', t, 'text to put after t');
/// Tokens.InsertAfter('pass2', u, 'text after u');
/// WriteLn(Tokens.ToString('pass1'));
/// WriteLn(Tokens.ToString('pass2'));
///
/// If you don't use named rewrite streams, a "default" stream is used as
/// the first example shows.
///
ITokenRewriteStream = interface(ICommonTokenStream)
['{7B49CBB6-9395-4781-B616-F201889EEA13}']
{ Methods }
procedure Rollback(const InstructionIndex: Integer); overload;
/// Rollback the instruction stream for a program so that
/// the indicated instruction (via instructionIndex) is no
/// longer in the stream. UNTESTED!
///
procedure Rollback(const ProgramName: String;
const InstructionIndex: Integer); overload;
procedure DeleteProgram; overload;
/// Reset the program so that no instructions exist
procedure DeleteProgram(const ProgramName: String); overload;
procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload;
procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload;
procedure InsertAfter(const ProgramName: String; const T: IToken;
const Text: IANTLRInterface); overload;
procedure InsertAfter(const ProgramName: String; const Index: Integer;
const Text: IANTLRInterface); overload;
procedure InsertAfter(const T: IToken; const Text: String); overload;
procedure InsertAfter(const Index: Integer; const Text: String); overload;
procedure InsertAfter(const ProgramName: String; const T: IToken;
const Text: String); overload;
procedure InsertAfter(const ProgramName: String; const Index: Integer;
const Text: String); overload;
procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload;
procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload;
procedure InsertBefore(const ProgramName: String; const T: IToken;
const Text: IANTLRInterface); overload;
procedure InsertBefore(const ProgramName: String; const Index: Integer;
const Text: IANTLRInterface); overload;
procedure InsertBefore(const T: IToken; const Text: String); overload;
procedure InsertBefore(const Index: Integer; const Text: String); overload;
procedure InsertBefore(const ProgramName: String; const T: IToken;
const Text: String); overload;
procedure InsertBefore(const ProgramName: String; const Index: Integer;
const Text: String); overload;
procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload;
procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload;
procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload;
procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload;
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
const Text: IANTLRInterface); overload;
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
const Text: IANTLRInterface); overload;
procedure Replace(const Index: Integer; const Text: String); overload;
procedure Replace(const Start, Stop: Integer; const Text: String); overload;
procedure Replace(const IndexT: IToken; const Text: String); overload;
procedure Replace(const Start, Stop: IToken; const Text: String); overload;
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
const Text: String); overload;
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
const Text: String); overload;
procedure Delete(const Index: Integer); overload;
procedure Delete(const Start, Stop: Integer); overload;
procedure Delete(const IndexT: IToken); overload;
procedure Delete(const Start, Stop: IToken); overload;
procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload;
procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload;
function GetLastRewriteTokenIndex: Integer;
function ToOriginalString: String; overload;
function ToOriginalString(const Start, Stop: Integer): String; overload;
function ToString(const ProgramName: String): String; overload;
function ToString(const ProgramName: String;
const Start, Stop: Integer): String; overload;
function ToDebugString: String; overload;
function ToDebugString(const Start, Stop: Integer): String; overload;
end;
/// The root of the ANTLR exception hierarchy.
///
/// To avoid English-only error messages and to generally make things
/// as flexible as possible, these exceptions are not created with strings,
/// but rather the information necessary to generate an error. Then
/// the various reporting methods in Parser and Lexer can be overridden
/// to generate a localized error message. For example, MismatchedToken
/// exceptions are built with the expected token type.
/// So, don't expect getMessage() to return anything.
///
/// You can access the stack trace, which means that you can compute the
/// complete trace of rules from the start symbol. This gives you considerable
/// context information with which to generate useful error messages.
///
/// ANTLR generates code that throws exceptions upon recognition error and
/// also generates code to catch these exceptions in each rule. If you
/// want to quit upon first error, you can turn off the automatic error
/// handling mechanism using rulecatch action, but you still need to
/// override methods mismatch and recoverFromMismatchSet.
///
/// In general, the recognition exceptions can track where in a grammar a
/// problem occurred and/or what was the expected input. While the parser
/// knows its state (such as current input symbol and line info) that
/// state can change before the exception is reported so current token index
/// is computed and stored at exception time. From this info, you can
/// perhaps print an entire line of input not just a single token, for example.
/// Better to just say the recognizer had a problem and then let the parser
/// figure out a fancy report.
///
ERecognitionException = class(Exception)
strict private
FApproximateLineInfo: Boolean;
strict protected
/// What input stream did the error occur in?
FInput: IIntStream;
///
/// What is index of token/char were we looking at when the error occurred?
///
FIndex: Integer;
///
/// The current Token when an error occurred. Since not all streams
/// can retrieve the ith Token, we have to track the Token object.
///
FToken: IToken;
/// [Tree parser] Node with the problem.
FNode: IANTLRInterface;
/// The current char when an error occurred. For lexers.
FC: Integer;
/// Track the line at which the error occurred in case this is
/// generated from a lexer. We need to track this since the
/// unexpected char doesn't carry the line info.
///
FLine: Integer;
FCharPositionInLine: Integer;
strict protected
procedure ExtractInformationFromTreeNodeStream(const Input: IIntStream);
function GetUnexpectedType: Integer; virtual;
public
/// Used for remote debugger deserialization
constructor Create; overload;
constructor Create(const AMessage: String); overload;
constructor Create(const AInput: IIntStream); overload;
constructor Create(const AMessage: String; const AInput: IIntStream); overload;
///
/// If you are parsing a tree node stream, you will encounter some
/// imaginary nodes w/o line/col info. We now search backwards looking
/// for most recent token with line/col info, but notify getErrorHeader()
/// that info is approximate.
///
property ApproximateLineInfo: Boolean read FApproximateLineInfo write FApproximateLineInfo;
///
/// Returns the current Token when the error occurred (for parsers
/// although a tree parser might also set the token)
///
property Token: IToken read FToken write FToken;
///
/// Returns the [tree parser] node where the error occured (for tree parsers).
///
property Node: IANTLRInterface read FNode write FNode;
///
/// Returns the line at which the error occurred (for lexers)
///
property Line: Integer read FLine write FLine;
///
/// Returns the character position in the line when the error
/// occurred (for lexers)
///
property CharPositionInLine: Integer read FCharPositionInLine write FCharPositionInLine;
/// Returns the input stream in which the error occurred
property Input: IIntStream read FInput write FInput;
///
/// Returns the token type or char of the unexpected input element
///
property UnexpectedType: Integer read GetUnexpectedType;
///
/// Returns the current char when the error occurred (for lexers)
///
property Character: Integer read FC write FC;
///
/// Returns the token/char index in the stream when the error occurred
///
property Index: Integer read FIndex write FIndex;
end;
///
/// A mismatched char or Token or tree node.
///
EMismatchedTokenException = class(ERecognitionException)
strict private
FExpecting: Integer;
public
constructor Create(const AExpecting: Integer; const AInput: IIntStream);
function ToString: String; override;
property Expecting: Integer read FExpecting write FExpecting;
end;
EUnwantedTokenException = class(EMismatchedTokenException)
strict private
function GetUnexpectedToken: IToken;
public
property UnexpectedToken: IToken read GetUnexpectedToken;
function ToString: String; override;
end;
///
/// We were expecting a token but it's not found. The current token
/// is actually what we wanted next. Used for tree node errors too.
///
EMissingTokenException = class(EMismatchedTokenException)
strict private
FInserted: IANTLRInterface;
function GetMissingType: Integer;
public
constructor Create(const AExpecting: Integer; const AInput: IIntStream;
const AInserted: IANTLRInterface);
function ToString: String; override;
property MissingType: Integer read GetMissingType;
property Inserted: IANTLRInterface read FInserted write FInserted;
end;
EMismatchedTreeNodeException = class(ERecognitionException)
strict private
FExpecting: Integer;
public
constructor Create(const AExpecting: Integer; const AInput: IIntStream);
function ToString: String; override;
property Expecting: Integer read FExpecting write FExpecting;
end;
ENoViableAltException = class(ERecognitionException)
strict private
FGrammarDecisionDescription: String;
FDecisionNumber: Integer;
FStateNumber: Integer;
public
constructor Create(const AGrammarDecisionDescription: String;
const ADecisionNumber, AStateNumber: Integer; const AInput: IIntStream);
function ToString: String; override;
property GrammarDecisionDescription: String read FGrammarDecisionDescription;
property DecisionNumber: Integer read FDecisionNumber;
property StateNumber: Integer read FStateNumber;
end;
EEarlyExitException = class(ERecognitionException)
strict private
FDecisionNumber: Integer;
public
constructor Create(const ADecisionNumber: Integer; const AInput: IIntStream);
property DecisionNumber: Integer read FDecisionNumber;
end;
EMismatchedSetException = class(ERecognitionException)
strict private
FExpecting: IBitSet;
public
constructor Create(const AExpecting: IBitSet; const AInput: IIntStream);
function ToString: String; override;
property Expecting: IBitSet read FExpecting write FExpecting;
end;
EMismatchedNotSetException = class(EMismatchedSetException)
public
function ToString: String; override;
end;
EFailedPredicateException = class(ERecognitionException)
strict private
FRuleName: String;
FPredicateText: String;
public
constructor Create(const AInput: IIntStream; const ARuleName,
APredicateText: String);
function ToString: String; override;
property RuleName: String read FRuleName write FRuleName;
property PredicateText: String read FPredicateText write FPredicateText;
end;
EMismatchedRangeException = class(ERecognitionException)
strict private
FA: Integer;
FB: Integer;
public
constructor Create(const AA, AB: Integer; const AInput: IIntStream);
function ToString: String; override;
property A: Integer read FA write FA;
property B: Integer read FB write FB;
end;
type
TCharStreamState = class(TANTLRObject, ICharStreamState)
strict private
FP: Integer;
FLine: Integer;
FCharPositionInLine: Integer;
protected
{ ICharStreamState }
function GetP: Integer;
procedure SetP(const Value: Integer);
function GetLine: Integer;
procedure SetLine(const Value: Integer);
function GetCharPositionInLine: Integer;
procedure SetCharPositionInLine(const Value: Integer);
end;
type
TANTLRStringStream = class(TANTLRObject, IANTLRStringStream, ICharStream)
private
FData: PChar;
FOwnsData: Boolean;
/// How many characters are actually in the buffer?
FN: Integer;
/// Current line number within the input (1..n )
FLine: Integer;
/// Index in our array for the next char (0..n-1)
FP: Integer;
///
/// The index of the character relative to the beginning of the
/// line (0..n-1)
///
FCharPositionInLine: Integer;
///
/// Tracks the depth of nested calls
///
FMarkDepth: Integer;
///
/// A list of CharStreamState objects that tracks the stream state
/// (i.e. line, charPositionInLine, and p) that can change as you
/// move through the input stream. Indexed from 1..markDepth.
/// A null is kept @ index 0. Create upon first call to Mark().
///
FMarkers: IList;
///
/// Track the last Mark() call result value for use in Rewind().
///
FLastMarker: Integer;
///
/// What is name or source of this char stream?
///
FName: String;
protected
{ IIntStream }
function GetSourceName: String; virtual;
procedure Consume; virtual;
function LA(I: Integer): Integer; virtual;
function LAChar(I: Integer): Char;
function Index: Integer;
function Size: Integer;
function Mark: Integer; virtual;
procedure Rewind(const Marker: Integer); overload; virtual;
procedure Rewind; overload; virtual;
procedure Release(const Marker: Integer); virtual;
procedure Seek(const Index: Integer); virtual;
property SourceName: String read GetSourceName write FName;
protected
{ ICharStream }
function GetLine: Integer; virtual;
procedure SetLine(const Value: Integer); virtual;
function GetCharPositionInLine: Integer; virtual;
procedure SetCharPositionInLine(const Value: Integer); virtual;
function LT(const I: Integer): Integer; virtual;
function Substring(const Start, Stop: Integer): String; virtual;
protected
{ IANTLRStringStream }
procedure Reset; virtual;
public
constructor Create; overload;
///
/// Initializes a new instance of the ANTLRStringStream class for the
/// specified string. This copies data from the string to a local
/// character array
///
constructor Create(const AInput: String); overload;
///
/// Initializes a new instance of the ANTLRStringStream class for the
/// specified character array. This is the preferred constructor as
/// no data is copied
///
constructor Create(const AData: PChar;
const ANumberOfActualCharsInArray: Integer); overload;
destructor Destroy; override;
end;
TANTLRFileStream = class(TANTLRStringStream, IANTLRFileStream)
strict private
/// Fully qualified name of the stream's underlying file
FFileName: String;
protected
{ IIntStream }
function GetSourceName: String; override;
protected
{ IANTLRFileStream }
procedure Load(const FileName: String; const Encoding: TEncoding); virtual;
public
///
/// Initializes a new instance of the ANTLRFileStream class for the
/// specified file name
///
constructor Create(const AFileName: String); overload;
///
/// Initializes a new instance of the ANTLRFileStream class for the
/// specified file name and encoding
///
constructor Create(const AFileName: String; const AEncoding: TEncoding); overload;
end;
TBitSet = class(TANTLRObject, IBitSet, ICloneable)
strict private
const
BITS = 64; // number of bits / ulong
LOG_BITS = 6; // 2 shl 6 = 64
/// We will often need to do a mod operator (i mod nbits).
/// Its turns out that, for powers of two, this mod operation is
/// same as . Since mod is slow, we use a precomputed
/// mod mask to do the mod instead.
///
MOD_MASK = BITS - 1;
strict private
/// The actual data bits
FBits: TUInt64Array;
strict private
class function WordNumber(const Bit: Integer): Integer; static;
class function BitMask(const BitNumber: Integer): UInt64; static;
class function NumWordsToHold(const El: Integer): Integer; static;
protected
{ ICloneable }
function Clone: IANTLRInterface; virtual;
protected
{ IBitSet }
function GetIsNil: Boolean; virtual;
function BitSetOr(const A: IBitSet): IBitSet; virtual;
procedure Add(const El: Integer); virtual;
procedure GrowToInclude(const Bit: Integer); virtual;
procedure OrInPlace(const A: IBitSet); virtual;
function Size: Integer; virtual;
function Member(const El: Integer): Boolean; virtual;
procedure Remove(const El: Integer); virtual;
function NumBits: Integer; virtual;
function LengthInLongWords: Integer; virtual;
function ToArray: TIntegerArray; virtual;
function ToPackedArray: TUInt64Array; virtual;
function ToString(const TokenNames: TStringArray): String; reintroduce; overload; virtual;
public
/// Construct a bitset of size one word (64 bits)
constructor Create; overload;
/// Construction from a static array of ulongs
constructor Create(const ABits: array of UInt64); overload;
/// Construction from a list of integers
constructor Create(const AItems: IList); overload;
/// Construct a bitset given the size
/// The size of the bitset in bits
constructor Create(const ANBits: Integer); overload;
class function BitSetOf(const El: Integer): IBitSet; overload; static;
class function BitSetOf(const A, B: Integer): IBitSet; overload; static;
class function BitSetOf(const A, B, C: Integer): IBitSet; overload; static;
class function BitSetOf(const A, B, C, D: Integer): IBitSet; overload; static;
function ToString: String; overload; override;
function Equals(Obj: TObject): Boolean; override;
end;
TRecognizerSharedState = class(TANTLRObject, IRecognizerSharedState)
strict private
FFollowing: TBitSetArray;
FFollowingStackPointer: Integer;
FErrorRecovery: Boolean;
FLastErrorIndex: Integer;
FFailed: Boolean;
FSyntaxErrors: Integer;
FBacktracking: Integer;
FRuleMemo: TDictionaryArray;
FToken: IToken;
FTokenStartCharIndex: Integer;
FTokenStartLine: Integer;
FTokenStartCharPositionInLine: Integer;
FChannel: Integer;
FTokenType: Integer;
FText: String;
protected
{ IRecognizerSharedState }
function GetFollowing: TBitSetArray;
procedure SetFollowing(const Value: TBitSetArray);
function GetFollowingStackPointer: Integer;
procedure SetFollowingStackPointer(const Value: Integer);
function GetErrorRecovery: Boolean;
procedure SetErrorRecovery(const Value: Boolean);
function GetLastErrorIndex: Integer;
procedure SetLastErrorIndex(const Value: Integer);
function GetFailed: Boolean;
procedure SetFailed(const Value: Boolean);
function GetSyntaxErrors: Integer;
procedure SetSyntaxErrors(const Value: Integer);
function GetBacktracking: Integer;
procedure SetBacktracking(const Value: Integer);
function GetRuleMemo: TDictionaryArray;
function GetRuleMemoCount: Integer;
procedure SetRuleMemoCount(const Value: Integer);
function GetToken: IToken;
procedure SetToken(const Value: IToken);
function GetTokenStartCharIndex: Integer;
procedure SetTokenStartCharIndex(const Value: Integer);
function GetTokenStartLine: Integer;
procedure SetTokenStartLine(const Value: Integer);
function GetTokenStartCharPositionInLine: Integer;
procedure SetTokenStartCharPositionInLine(const Value: Integer);
function GetChannel: Integer;
procedure SetChannel(const Value: Integer);
function GetTokenType: Integer;
procedure SetTokenType(const Value: Integer);
function GetText: String;
procedure SetText(const Value: String);
public
constructor Create;
end;
TCommonToken = class(TANTLRObject, ICommonToken, IToken)
strict protected
FTokenType: Integer;
FLine: Integer;
FCharPositionInLine: Integer;
FChannel: Integer;
FInput: ICharStream;
/// We need to be able to change the text once in a while. If
/// this is non-null, then getText should return this. Note that
/// start/stop are not affected by changing this.
///
FText: String;
/// What token number is this from 0..n-1 tokens; < 0 implies invalid index
FIndex: Integer;
/// The char position into the input buffer where this token starts
FStart: Integer;
/// The char position into the input buffer where this token stops
FStop: Integer;
protected
{ IToken }
function GetTokenType: Integer; virtual;
procedure SetTokenType(const Value: Integer); virtual;
function GetLine: Integer; virtual;
procedure SetLine(const Value: Integer); virtual;
function GetCharPositionInLine: Integer; virtual;
procedure SetCharPositionInLine(const Value: Integer); virtual;
function GetChannel: Integer; virtual;
procedure SetChannel(const Value: Integer); virtual;
function GetTokenIndex: Integer; virtual;
procedure SetTokenIndex(const Value: Integer); virtual;
function GetText: String; virtual;
procedure SetText(const Value: String); virtual;
protected
{ ICommonToken }
function GetStartIndex: Integer;
procedure SetStartIndex(const Value: Integer);
function GetStopIndex: Integer;
procedure SetStopIndex(const Value: Integer);
function GetInputStream: ICharStream;
procedure SetInputStream(const Value: ICharStream);
protected
constructor Create; overload;
public
constructor Create(const ATokenType: Integer); overload;
constructor Create(const AInput: ICharStream; const ATokenType, AChannel,
AStart, AStop: Integer); overload;
constructor Create(const ATokenType: Integer; const AText: String); overload;
constructor Create(const AOldToken: IToken); overload;
function ToString: String; override;
end;
TClassicToken = class(TANTLRObject, IClassicToken, IToken)
strict private
FText: String;
FTokenType: Integer;
FLine: Integer;
FCharPositionInLine: Integer;
FChannel: Integer;
/// What token number is this from 0..n-1 tokens
FIndex: Integer;
protected
{ IClassicToken }
function GetTokenType: Integer; virtual;
procedure SetTokenType(const Value: Integer); virtual;
function GetLine: Integer; virtual;
procedure SetLine(const Value: Integer); virtual;
function GetCharPositionInLine: Integer; virtual;
procedure SetCharPositionInLine(const Value: Integer); virtual;
function GetChannel: Integer; virtual;
procedure SetChannel(const Value: Integer); virtual;
function GetTokenIndex: Integer; virtual;
procedure SetTokenIndex(const Value: Integer); virtual;
function GetText: String; virtual;
procedure SetText(const Value: String); virtual;
function GetInputStream: ICharStream; virtual;
procedure SetInputStream(const Value: ICharStream); virtual;
public
constructor Create(const ATokenType: Integer); overload;
constructor Create(const AOldToken: IToken); overload;
constructor Create(const ATokenType: Integer; const AText: String); overload;
constructor Create(const ATokenType: Integer; const AText: String;
const AChannel: Integer); overload;
function ToString: String; override;
end;
TToken = class sealed
public
const
EOR_TOKEN_TYPE = 1;
/// imaginary tree navigation type; traverse "get child" link
DOWN = 2;
/// imaginary tree navigation type; finish with a child list
UP = 3;
MIN_TOKEN_TYPE = UP + 1;
EOF = Integer(cscEOF);
INVALID_TOKEN_TYPE = 0;
///
/// All tokens go to the parser (unless skip() is called in that rule)
/// on a particular "channel". The parser tunes to a particular channel
/// so that whitespace etc... can go to the parser on a "hidden" channel.
///
DEFAULT_CHANNEL = 0;
///
/// Anything on different channel than DEFAULT_CHANNEL is not parsed by parser.
///
HIDDEN_CHANNEL = 99;
public
class var
EOF_TOKEN: IToken;
INVALID_TOKEN: IToken;
///
/// In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR
/// will avoid creating a token for this symbol and try to fetch another.
///
SKIP_TOKEN: IToken;
private
class procedure Initialize; static;
end;
///
/// Global constants
///
TConstants = class sealed
public
const
VERSION = '3.1b1';
// Moved to version 2 for v3.1: added grammar name to enter/exit Rule
DEBUG_PROTOCOL_VERSION = '2';
ANTLRWORKS_DIR = 'antlrworks';
end;
TBaseRecognizer = class abstract(TANTLRObject, IBaseRecognizer)
public
const
MEMO_RULE_FAILED = -2;
MEMO_RULE_UNKNOWN = -1;
INITIAL_FOLLOW_STACK_SIZE = 100;
NEXT_TOKEN_RULE_NAME = 'nextToken';
// copies from Token object for convenience in actions
DEFAULT_TOKEN_CHANNEL = TToken.DEFAULT_CHANNEL;
HIDDEN = TToken.HIDDEN_CHANNEL;
strict protected
///
/// An externalized representation of the - shareable - internal state of
/// this lexer, parser or tree parser.
///
///
/// The state of a lexer, parser, or tree parser are collected into
/// external state objects so that the state can be shared. This sharing
/// is needed to have one grammar import others and share same error
/// variables and other state variables. It's a kind of explicit multiple
/// inheritance via delegation of methods and shared state.
///
FState: IRecognizerSharedState;
property State: IRecognizerSharedState read FState;
strict protected
///
/// Match needs to return the current input symbol, which gets put
/// into the label for the associated token ref; e.g., x=ID. Token
/// and tree parsers need to return different objects. Rather than test
/// for input stream type or change the IntStream interface, I use
/// a simple method to ask the recognizer to tell me what the current
/// input symbol is.
///
/// This is ignored for lexers.
function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; virtual;
///
/// Factor out what to do upon token mismatch so tree parsers can behave
/// differently. Override and call MismatchRecover(input, ttype, follow)
/// to get single token insertion and deletion. Use this to turn off
/// single token insertion and deletion. Override mismatchRecover
/// to call this instead.
///
procedure Mismatch(const Input: IIntStream; const TokenType: Integer;
const Follow: IBitSet); virtual;
///
/// Attempt to Recover from a single missing or extra token.
///
///
/// EXTRA TOKEN
///
/// LA(1) is not what we are looking for. If LA(2) has the right token,
/// however, then assume LA(1) is some extra spurious token. Delete it
/// and LA(2) as if we were doing a normal Match(), which advances the
/// input.
///
/// MISSING TOKEN
///
/// If current token is consistent with what could come after
/// ttype then it is ok to "insert" the missing token, else throw
/// exception For example, Input "i=(3;" is clearly missing the
/// ')'. When the parser returns from the nested call to expr, it
/// will have call chain:
///
/// stat -> expr -> atom
///
/// and it will be trying to Match the ')' at this point in the
/// derivation:
///
/// => ID '=' '(' INT ')' ('+' atom)* ';'
/// ^
/// Match() will see that ';' doesn't Match ')' and report a
/// mismatched token error. To Recover, it sees that LA(1)==';'
/// is in the set of tokens that can follow the ')' token
/// reference in rule atom. It can assume that you forgot the ')'.
///
function RecoverFromMismatchedToken(const Input: IIntStream;
const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; virtual;
///
/// Conjure up a missing token during error recovery.
///
///
/// The recognizer attempts to recover from single missing
/// symbols. But, actions might refer to that missing symbol.
/// For example, x=ID {f($x);}. The action clearly assumes
/// that there has been an identifier matched previously and that
/// $x points at that token. If that token is missing, but
/// the next token in the stream is what we want we assume that
/// this token is missing and we keep going. Because we
/// have to return some token to replace the missing token,
/// we have to conjure one up. This method gives the user control
/// over the tokens returned for missing tokens. Mostly,
/// you will want to create something special for identifier
/// tokens. For literals such as '{' and ',', the default
/// action in the parser or tree parser works. It simply creates
/// a CommonToken of the appropriate type. The text will be the token.
/// If you change what tokens must be created by the lexer,
/// override this method to create the appropriate tokens.
///
function GetMissingSymbol(const Input: IIntStream;
const E: ERecognitionException; const ExpectedTokenType: Integer;
const Follow: IBitSet): IANTLRInterface; virtual;
///
/// Push a rule's follow set using our own hardcoded stack
///
///
procedure PushFollow(const FSet: IBitSet);
/// Compute the context-sensitive FOLLOW set for current rule.
/// This is set of token types that can follow a specific rule
/// reference given a specific call chain. You get the set of
/// viable tokens that can possibly come next (lookahead depth 1)
/// given the current call chain. Contrast this with the
/// definition of plain FOLLOW for rule r:
///
/// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
///
/// where x in T* and alpha, beta in V*; T is set of terminals and
/// V is the set of terminals and nonterminals. In other words,
/// FOLLOW(r) is the set of all tokens that can possibly follow
/// references to r in *any* sentential form (context). At
/// runtime, however, we know precisely which context applies as
/// we have the call chain. We may compute the exact (rather
/// than covering superset) set of following tokens.
///
/// For example, consider grammar:
///
/// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
/// | "return" expr '.'
/// ;
/// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
/// atom : INT // FOLLOW(atom)=={'+',')',';','.'}
/// | '(' expr ')'
/// ;
///
/// The FOLLOW sets are all inclusive whereas context-sensitive
/// FOLLOW sets are precisely what could follow a rule reference.
/// For input input "i=(3);", here is the derivation:
///
/// stat => ID '=' expr ';'
/// => ID '=' atom ('+' atom)* ';'
/// => ID '=' '(' expr ')' ('+' atom)* ';'
/// => ID '=' '(' atom ')' ('+' atom)* ';'
/// => ID '=' '(' INT ')' ('+' atom)* ';'
/// => ID '=' '(' INT ')' ';'
///
/// At the "3" token, you'd have a call chain of
///
/// stat -> expr -> atom -> expr -> atom
///
/// What can follow that specific nested ref to atom? Exactly ')'
/// as you can see by looking at the derivation of this specific
/// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
///
/// You want the exact viable token set when recovering from a
/// token mismatch. Upon token mismatch, if LA(1) is member of
/// the viable next token set, then you know there is most likely
/// a missing token in the input stream. "Insert" one by just not
/// throwing an exception.
///
function ComputeContextSensitiveRuleFOLLOW: IBitSet; virtual;
(* Compute the error recovery set for the current rule. During
* rule invocation, the parser pushes the set of tokens that can
* follow that rule reference on the stack; this amounts to
* computing FIRST of what follows the rule reference in the
* enclosing rule. This local follow set only includes tokens
* from within the rule; i.e., the FIRST computation done by
* ANTLR stops at the end of a rule.
*
* EXAMPLE
*
* When you find a "no viable alt exception", the input is not
* consistent with any of the alternatives for rule r. The best
* thing to do is to consume tokens until you see something that
* can legally follow a call to r *or* any rule that called r.
* You don't want the exact set of viable next tokens because the
* input might just be missing a token--you might consume the
* rest of the input looking for one of the missing tokens.
*
* Consider grammar:
*
* a : '[' b ']'
* | '(' b ')'
* ;
* b : c '^' INT ;
* c : ID
* | INT
* ;
*
* At each rule invocation, the set of tokens that could follow
* that rule is pushed on a stack. Here are the various "local"
* follow sets:
*
* FOLLOW(b1_in_a) = FIRST(']') = ']'
* FOLLOW(b2_in_a) = FIRST(')') = ')'
* FOLLOW(c_in_b) = FIRST('^') = '^'
*
* Upon erroneous input "[]", the call chain is
*
* a -> b -> c
*
* and, hence, the follow context stack is:
*
* depth local follow set after call to rule
* 0 a (from main())
* 1 ']' b
* 3 '^' c
*
* Notice that ')' is not included, because b would have to have
* been called from a different context in rule a for ')' to be
* included.
*
* For error recovery, we cannot consider FOLLOW(c)
* (context-sensitive or otherwise). We need the combined set of
* all context-sensitive FOLLOW sets--the set of all tokens that
* could follow any reference in the call chain. We need to
* resync to one of those tokens. Note that FOLLOW(c)='^' and if
* we resync'd to that token, we'd consume until EOF. We need to
* sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
* In this case, for input "[]", LA(1) is in this set so we would
* not consume anything and after printing an error rule c would
* return normally. It would not find the required '^' though.
* At this point, it gets a mismatched token error and throws an
* exception (since LA(1) is not in the viable following token
* set). The rule exception handler tries to Recover, but finds
* the same recovery set and doesn't consume anything. Rule b
* exits normally returning to rule a. Now it finds the ']' (and
* with the successful Match exits errorRecovery mode).
*
* So, you cna see that the parser walks up call chain looking
* for the token that was a member of the recovery set.
*
* Errors are not generated in errorRecovery mode.
*
* ANTLR's error recovery mechanism is based upon original ideas:
*
* "Algorithms + Data Structures = Programs" by Niklaus Wirth
*
* and
*
* "A note on error recovery in recursive descent parsers":
* http://portal.acm.org/citation.cfm?id=947902.947905
*
* Later, Josef Grosch had some good ideas:
*
* "Efficient and Comfortable Error Recovery in Recursive Descent
* Parsers":
* ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
*
* Like Grosch I implemented local FOLLOW sets that are combined
* at run-time upon error to avoid overhead during parsing.
*)
function ComputeErrorRecoverySet: IBitSet; virtual;
function CombineFollows(const Exact: Boolean): IBitSet;
protected
{ IBaseRecognizer }
function GetInput: IIntStream; virtual; abstract;
function GetBacktrackingLevel: Integer;
function GetState: IRecognizerSharedState;
function GetNumberOfSyntaxErrors: Integer;
function GetGrammarFileName: String; virtual;
function GetSourceName: String; virtual; abstract;
function GetTokenNames: TStringArray; virtual;
procedure BeginBacktrack(const Level: Integer); virtual;
procedure EndBacktrack(const Level: Integer; const Successful: Boolean); virtual;
procedure Reset; virtual;
function Match(const Input: IIntStream; const TokenType: Integer;
const Follow: IBitSet): IANTLRInterface; virtual;
function MismatchIsUnwantedToken(const Input: IIntStream;
const TokenType: Integer): Boolean;
function MismatchIsMissingToken(const Input: IIntStream;
const Follow: IBitSet): Boolean;
procedure BeginResync; virtual;
procedure EndResync; virtual;
procedure ReportError(const E: ERecognitionException); virtual;
procedure MatchAny(const Input: IIntStream); virtual;
procedure DisplayRecognitionError(const TokenNames: TStringArray;
const E: ERecognitionException); virtual;
function GetErrorMessage(const E: ERecognitionException;
const TokenNames: TStringArray): String; virtual;
function GetErrorHeader(const E: ERecognitionException): String; virtual;
function GetTokenErrorDisplay(const T: IToken): String; virtual;
procedure EmitErrorMessage(const Msg: String); virtual;
procedure Recover(const Input: IIntStream; const RE: ERecognitionException); virtual;
function RecoverFromMismatchedSet(const Input: IIntStream;
const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; virtual;
procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; virtual;
procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; virtual;
//function GetRuleInvocationStack: IList; overload; virtual;
//function GetRuleInvocationStack(const E: Exception;
// const RecognizerClassName: String): IList; overload;
function ToStrings(const Tokens: IList): IList; virtual;
function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; virtual;
function AlreadyParsedRule(const Input: IIntStream;
const RuleIndex: Integer): Boolean; virtual;
procedure Memoize(const Input: IIntStream; const RuleIndex,
RuleStartIndex: Integer); virtual;
function GetRuleMemoizationChaceSize: Integer;
procedure TraceIn(const RuleName: String; const RuleIndex: Integer;
const InputSymbol: String); virtual;
procedure TraceOut(const RuleName: String; const RuleIndex: Integer;
const InputSymbol: String); virtual;
property Input: IIntStream read GetInput;
public
constructor Create; overload;
constructor Create(const AState: IRecognizerSharedState); overload;
end;
TCommonTokenStream = class(TANTLRObject, ICommonTokenStream, ITokenStream)
strict private
FTokenSource: ITokenSource;
/// Record every single token pulled from the source so we can reproduce
/// chunks of it later.
///
FTokens: IList;
/// ]]> to override some Tokens' channel numbers
FChannelOverrideMap: IDictionary;
/// ;]]> discard any tokens with this type
FDiscardSet: IHashList;
/// Skip tokens on any channel but this one; this is how we skip whitespace...
FChannel: Integer;
/// By default, track all incoming tokens
FDiscardOffChannelTokens: Boolean;
/// Track the last Mark() call result value for use in Rewind().
FLastMarker: Integer;
///
/// The index into the tokens list of the current token (next token
/// to consume). p==-1 indicates that the tokens list is empty
///
FP: Integer;
strict protected
/// Load all tokens from the token source and put in tokens.
/// This is done upon first LT request because you might want to
/// set some token type / channel overrides before filling buffer.
///
procedure FillBuffer; virtual;
/// Look backwards k tokens on-channel tokens
function LB(const K: Integer): IToken; virtual;
/// Given a starting index, return the index of the first on-channel
/// token.
///
function SkipOffTokenChannels(const I: Integer): Integer; virtual;
function SkipOffTokenChannelsReverse(const I: Integer): Integer; virtual;
protected
{ IIntStream }
function GetSourceName: String; virtual;
procedure Consume; virtual;
function LA(I: Integer): Integer; virtual;
function LAChar(I: Integer): Char;
function Mark: Integer; virtual;
function Index: Integer; virtual;
procedure Rewind(const Marker: Integer); overload; virtual;
procedure Rewind; overload; virtual;
procedure Release(const Marker: Integer); virtual;
procedure Seek(const Index: Integer); virtual;
function Size: Integer; virtual;
protected
{ ITokenStream }
function GetTokenSource: ITokenSource; virtual;
procedure SetTokenSource(const Value: ITokenSource); virtual;
function LT(const K: Integer): IToken; virtual;
function Get(const I: Integer): IToken; virtual;
function ToString(const Start, Stop: Integer): String; reintroduce; overload; virtual;
function ToString(const Start, Stop: IToken): String; reintroduce; overload; virtual;
protected
{ ICommonTokenStream }
procedure SetTokenTypeChannel(const TType, Channel: Integer);
procedure DiscardTokenType(const TType: Integer);
procedure DiscardOffChannelTokens(const Discard: Boolean);
function GetTokens: IList; overload;
function GetTokens(const Start, Stop: Integer): IList; overload;
function GetTokens(const Start, Stop: Integer;
const Types: IBitSet): IList; overload;
function GetTokens(const Start, Stop: Integer;
const Types: IList): IList; overload;
function GetTokens(const Start, Stop,
TokenType: Integer): IList; overload;
procedure Reset; virtual;
public
constructor Create; overload;
constructor Create(const ATokenSource: ITokenSource); overload;
constructor Create(const ATokenSource: ITokenSource;
const AChannel: Integer); overload;
constructor Create(const ALexer: ILexer); overload;
constructor Create(const ALexer: ILexer;
const AChannel: Integer); overload;
function ToString: String; overload; override;
end;
TDFA = class abstract(TANTLRObject, IDFA)
strict private
FSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
FEOT: TSmallintArray;
FEOF: TSmallintArray;
FMin: TCharArray;
FMax: TCharArray;
FAccept: TSmallintArray;
FSpecial: TSmallintArray;
FTransition: TSmallintMatrix;
FDecisionNumber: Integer;
FRecognizer: Pointer; { IBaseRecognizer }
function GetRecognizer: IBaseRecognizer;
procedure SetRecognizer(const Value: IBaseRecognizer);
strict protected
procedure NoViableAlt(const S: Integer; const Input: IIntStream);
property Recognizer: IBaseRecognizer read GetRecognizer write SetRecognizer;
property DecisionNumber: Integer read FDecisionNumber write FDecisionNumber;
property EOT: TSmallintArray read FEOT write FEOT;
property EOF: TSmallintArray read FEOF write FEOF;
property Min: TCharArray read FMin write FMin;
property Max: TCharArray read FMax write FMax;
property Accept: TSmallintArray read FAccept write FAccept;
property Special: TSmallintArray read FSpecial write FSpecial;
property Transition: TSmallintMatrix read FTransition write FTransition;
protected
{ IDFA }
function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler);
function Predict(const Input: IIntStream): Integer;
procedure Error(const NVAE: ENoViableAltException); virtual;
function SpecialStateTransition(const S: Integer;
const Input: IIntStream): Integer; virtual;
function Description: String; virtual;
function SpecialTransition(const State, Symbol: Integer): Integer;
public
class function UnpackEncodedString(const EncodedString: String): TSmallintArray; static;
class function UnpackEncodedStringArray(const EncodedStrings: TStringArray): TSmallintMatrix; overload; static;
class function UnpackEncodedStringArray(const EncodedStrings: array of String): TSmallintMatrix; overload; static;
class function UnpackEncodedStringToUnsignedChars(const EncodedString: String): TCharArray; static;
end;
TLexer = class abstract(TBaseRecognizer, ILexer, ITokenSource)
strict private
const
TOKEN_dot_EOF = Ord(cscEOF);
strict private
/// Where is the lexer drawing characters from?
FInput: ICharStream;
protected
{ IBaseRecognizer }
function GetSourceName: String; override;
function GetInput: IIntStream; override;
procedure Reset; override;
procedure ReportError(const E: ERecognitionException); override;
function GetErrorMessage(const E: ERecognitionException;
const TokenNames: TStringArray): String; override;
protected
{ ILexer }
function GetCharStream: ICharStream; virtual;
procedure SetCharStream(const Value: ICharStream); virtual;
function GetLine: Integer; virtual;
function GetCharPositionInLine: Integer; virtual;
function GetCharIndex: Integer; virtual;
function GetText: String; virtual;
procedure SetText(const Value: String); virtual;
function NextToken: IToken; virtual;
procedure Skip;
procedure DoTokens; virtual; abstract;
procedure Emit(const Token: IToken); overload; virtual;
function Emit: IToken; overload; virtual;
procedure Match(const S: String); reintroduce; overload; virtual;
procedure Match(const C: Integer); reintroduce; overload; virtual;
procedure MatchAny; reintroduce; overload; virtual;
procedure MatchRange(const A, B: Integer); virtual;
procedure Recover(const RE: ERecognitionException); reintroduce; overload; virtual;
function GetCharErrorDisplay(const C: Integer): String;
procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual;
procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual;
strict protected
property Input: ICharStream read FInput;
property CharIndex: Integer read GetCharIndex;
property Text: String read GetText write SetText;
public
constructor Create; overload;
constructor Create(const AInput: ICharStream); overload;
constructor Create(const AInput: ICharStream;
const AState: IRecognizerSharedState); overload;
end;
TParser = class(TBaseRecognizer, IParser)
strict private
FInput: ITokenStream;
protected
property Input: ITokenStream read FInput;
protected
{ IBaseRecognizer }
procedure Reset; override;
function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; override;
function GetMissingSymbol(const Input: IIntStream;
const E: ERecognitionException; const ExpectedTokenType: Integer;
const Follow: IBitSet): IANTLRInterface; override;
function GetSourceName: String; override;
function GetInput: IIntStream; override;
protected
{ IParser }
function GetTokenStream: ITokenStream; virtual;
procedure SetTokenStream(const Value: ITokenStream); virtual;
procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload;
procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload;
public
constructor Create(const AInput: ITokenStream); overload;
constructor Create(const AInput: ITokenStream;
const AState: IRecognizerSharedState); overload;
end;
TRuleReturnScope = class(TANTLRObject, IRuleReturnScope)
protected
{ IRuleReturnScope }
function GetStart: IANTLRInterface; virtual;
procedure SetStart(const Value: IANTLRInterface); virtual;
function GetStop: IANTLRInterface; virtual;
procedure SetStop(const Value: IANTLRInterface); virtual;
function GetTree: IANTLRInterface; virtual;
procedure SetTree(const Value: IANTLRInterface); virtual;
function GetTemplate: IANTLRInterface; virtual;
end;
TParserRuleReturnScope = class(TRuleReturnScope, IParserRuleReturnScope)
strict private
FStart: IToken;
FStop: IToken;
protected
{ IRuleReturnScope }
function GetStart: IANTLRInterface; override;
procedure SetStart(const Value: IANTLRInterface); override;
function GetStop: IANTLRInterface; override;
procedure SetStop(const Value: IANTLRInterface); override;
end;
TTokenRewriteStream = class(TCommonTokenStream, ITokenRewriteStream)
public
const
DEFAULT_PROGRAM_NAME = 'default';
PROGRAM_INIT_SIZE = 100;
MIN_TOKEN_INDEX = 0;
strict protected
// Define the rewrite operation hierarchy
type
IRewriteOperation = interface(IANTLRInterface)
['{285A54ED-58FF-44B1-A268-2686476D4419}']
{ Property accessors }
function GetInstructionIndex: Integer;
procedure SetInstructionIndex(const Value: Integer);
function GetIndex: Integer;
procedure SetIndex(const Value: Integer);
function GetText: IANTLRInterface;
procedure SetText(const Value: IANTLRInterface);
function GetParent: ITokenRewriteStream;
procedure SetParent(const Value: ITokenRewriteStream);
{ Methods }
/// Execute the rewrite operation by possibly adding to the buffer.
/// Return the index of the next token to operate on.
///
function Execute(const Buf: TStringBuilder): Integer;
{ Properties }
property InstructionIndex: Integer read GetInstructionIndex write SetInstructionIndex;
property Index: Integer read GetIndex write SetIndex;
property Text: IANTLRInterface read GetText write SetText;
property Parent: ITokenRewriteStream read GetParent write SetParent;
end;
TRewriteOperation = class(TANTLRObject, IRewriteOperation)
strict private
// What index into rewrites List are we?
FInstructionIndex: Integer;
// Token buffer index
FIndex: Integer;
FText: IANTLRInterface;
FParent: Pointer; {ITokenRewriteStream;}
protected
{ IRewriteOperation }
function GetInstructionIndex: Integer;
procedure SetInstructionIndex(const Value: Integer);
function GetIndex: Integer;
procedure SetIndex(const Value: Integer);
function GetText: IANTLRInterface;
procedure SetText(const Value: IANTLRInterface);
function GetParent: ITokenRewriteStream;
procedure SetParent(const Value: ITokenRewriteStream);
function Execute(const Buf: TStringBuilder): Integer; virtual;
protected
constructor Create(const AIndex: Integer; const AText: IANTLRInterface;
const AParent: ITokenRewriteStream);
property Index: Integer read FIndex write FIndex;
property Text: IANTLRInterface read FText write FText;
property Parent: ITokenRewriteStream read GetParent write SetParent;
public
function ToString: String; override;
end;
IInsertBeforeOp = interface(IRewriteOperation)
['{BFB732E2-BE6A-4691-AE3B-5C8013DE924E}']
end;
TInsertBeforeOp = class(TRewriteOperation, IInsertBeforeOp)
protected
{ IRewriteOperation }
function Execute(const Buf: TStringBuilder): Integer; override;
end;
/// I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
/// instructions.
///
IReplaceOp = interface(IRewriteOperation)
['{630C434A-99EA-4589-A65D-64A7B3DAC407}']
{ Property accessors }
function GetLastIndex: Integer;
procedure SetLastIndex(const Value: Integer);
{ Properties }
property LastIndex: Integer read GetLastIndex write SetLastIndex;
end;
TReplaceOp = class(TRewriteOperation, IReplaceOp)
private
FLastIndex: Integer;
protected
{ IRewriteOperation }
function Execute(const Buf: TStringBuilder): Integer; override;
protected
{ IReplaceOp }
function GetLastIndex: Integer;
procedure SetLastIndex(const Value: Integer);
public
constructor Create(const AStart, AStop: Integer;
const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
function ToString: String; override;
end;
IDeleteOp = interface(IRewriteOperation)
['{C39345BC-F170-4C3A-A989-65E6B9F0712B}']
end;
TDeleteOp = class(TReplaceOp)
public
function ToString: String; override;
end;
strict private
type
TRewriteOpComparer = class(TComparer)
public
function Compare(const Left, Right: T): Integer; override;
end;
strict private
/// You may have multiple, named streams of rewrite operations.
/// I'm calling these things "programs."
/// Maps String (name) -> rewrite (IList)
///
FPrograms: IDictionary>;
/// Map String (program name) -> Integer index
FLastRewriteTokenIndexes: IDictionary;
strict private
function InitializeProgram(const Name: String): IList;
protected
{ ITokenRewriteStream }
procedure Rollback(const InstructionIndex: Integer); overload; virtual;
procedure Rollback(const ProgramName: String;
const InstructionIndex: Integer); overload; virtual;
procedure DeleteProgram; overload; virtual;
procedure DeleteProgram(const ProgramName: String); overload; virtual;
procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; virtual;
procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
procedure InsertAfter(const ProgramName: String; const T: IToken;
const Text: IANTLRInterface); overload; virtual;
procedure InsertAfter(const ProgramName: String; const Index: Integer;
const Text: IANTLRInterface); overload; virtual;
procedure InsertAfter(const T: IToken; const Text: String); overload;
procedure InsertAfter(const Index: Integer; const Text: String); overload;
procedure InsertAfter(const ProgramName: String; const T: IToken;
const Text: String); overload;
procedure InsertAfter(const ProgramName: String; const Index: Integer;
const Text: String); overload;
procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; virtual;
procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
procedure InsertBefore(const ProgramName: String; const T: IToken;
const Text: IANTLRInterface); overload; virtual;
procedure InsertBefore(const ProgramName: String; const Index: Integer;
const Text: IANTLRInterface); overload; virtual;
procedure InsertBefore(const T: IToken; const Text: String); overload;
procedure InsertBefore(const Index: Integer; const Text: String); overload;
procedure InsertBefore(const ProgramName: String; const T: IToken;
const Text: String); overload;
procedure InsertBefore(const ProgramName: String; const Index: Integer;
const Text: String); overload;
procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; virtual;
procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; virtual;
procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; virtual;
procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; virtual;
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
const Text: IANTLRInterface); overload; virtual;
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
const Text: IANTLRInterface); overload; virtual;
procedure Replace(const Index: Integer; const Text: String); overload;
procedure Replace(const Start, Stop: Integer; const Text: String); overload;
procedure Replace(const IndexT: IToken; const Text: String); overload;
procedure Replace(const Start, Stop: IToken; const Text: String); overload;
procedure Replace(const ProgramName: String; const Start, Stop: Integer;
const Text: String); overload;
procedure Replace(const ProgramName: String; const Start, Stop: IToken;
const Text: String); overload;
procedure Delete(const Index: Integer); overload; virtual;
procedure Delete(const Start, Stop: Integer); overload; virtual;
procedure Delete(const IndexT: IToken); overload; virtual;
procedure Delete(const Start, Stop: IToken); overload; virtual;
procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; virtual;
procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; virtual;
function GetLastRewriteTokenIndex: Integer; overload; virtual;
function ToOriginalString: String; overload; virtual;
function ToOriginalString(const Start, Stop: Integer): String; overload; virtual;
function ToString(const ProgramName: String): String; overload; virtual;
function ToString(const ProgramName: String;
const Start, Stop: Integer): String; overload; virtual;
function ToDebugString: String; overload; virtual;
function ToDebugString(const Start, Stop: Integer): String; overload; virtual;
protected
{ ITokenStream }
function ToString(const Start, Stop: Integer): String; overload; override;
strict protected
procedure Init; virtual;
function GetProgram(const Name: String): IList; virtual;
function GetLastRewriteTokenIndex(const ProgramName: String): Integer; overload; virtual;
procedure SetLastRewriteTokenIndex(const ProgramName: String; const I: Integer); overload; virtual;
///
/// Return a map from token index to operation.
///
/// We need to combine operations and report invalid operations (like
/// overlapping replaces that are not completed nested). Inserts to
/// same index need to be combined etc... Here are the cases:
///
/// I.i.u I.j.v leave alone, nonoverlapping
/// I.i.u I.i.v combine: Iivu
///
/// R.i-j.u R.x-y.v | i-j in x-y delete first R
/// R.i-j.u R.i-j.v delete first R
/// R.i-j.u R.x-y.v | x-y in i-j ERROR
/// R.i-j.u R.x-y.v | boundaries overlap ERROR
///
/// I.i.u R.x-y.v | i in x-y delete I
/// I.i.u R.x-y.v | i not in x-y leave alone, nonoverlapping
/// R.x-y.v I.i.u | i in x-y ERROR
/// R.x-y.v I.x.u R.x-y.uv (combine, delete I)
/// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping
///
/// I.i.u = insert u before op @ index i
/// R.x-y.u = replace x-y indexed tokens with u
///
/// First we need to examine replaces. For any replace op:
///
/// 1. wipe out any insertions before op within that range.
/// 2. Drop any replace op before that is contained completely within
/// that range.
/// 3. Throw exception upon boundary overlap with any previous replace.
///
/// Then we can deal with inserts:
///
/// 1. for any inserts to same index, combine even if not adjacent.
/// 2. for any prior replace with same left boundary, combine this
/// insert with replace and delete this replace.
/// 3. throw exception if index in same range as previous replace
///
/// Don't actually delete; make op null in list. Easier to walk list.
/// Later we can throw as we add to index -> op map.
///
/// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the
/// inserted stuff would be before the replace range. But, if you
/// add tokens in front of a method body '{' and then delete the method
/// body, I think the stuff before the '{' you added should disappear too.
///
function ReduceToSingleOperationPerIndex(
const Rewrites: IList): IDictionary;
function GetKindOfOps(const Rewrites: IList;
const Kind: TGUID): IList; overload;
///
/// Get all operations before an index of a particular kind
///
function GetKindOfOps(const Rewrites: IList;
const Kind: TGUID; const Before: Integer): IList; overload;
function CatOpText(const A, B: IANTLRInterface): IANTLRInterface;
public
constructor Create; overload;
constructor Create(const ATokenSource: ITokenSource); overload;
constructor Create(const ATokenSource: ITokenSource;
const AChannel: Integer); overload;
constructor Create(const ALexer: ILexer); overload;
constructor Create(const ALexer: ILexer;
const AChannel: Integer); overload;
function ToString: String; overload; override;
end;
{ These functions return X or, if X = nil, an empty default instance }
function Def(const X: IToken): IToken; overload;
function Def(const X: IRuleReturnScope): IRuleReturnScope; overload;
implementation
uses
StrUtils,
Math,
Antlr.Runtime.Tree;
{ ERecognitionException }
constructor ERecognitionException.Create;
begin
Create('', nil);
end;
constructor ERecognitionException.Create(const AMessage: String);
begin
Create(AMessage, nil);
end;
constructor ERecognitionException.Create(const AInput: IIntStream);
begin
Create('', AInput);
end;
constructor ERecognitionException.Create(const AMessage: String;
const AInput: IIntStream);
var
TokenStream: ITokenStream;
CharStream: ICharStream;
begin
inherited Create(AMessage);
FInput := AInput;
FIndex := AInput.Index;
if Supports(AInput, ITokenStream, TokenStream) then
begin
FToken := TokenStream.LT(1);
FLine := FToken.Line;
FCharPositionInLine := FToken.CharPositionInLine;
end;
if Supports(AInput, ITreeNodeStream) then
ExtractInformationFromTreeNodeStream(AInput)
else
begin
if Supports(AInput, ICharStream, CharStream) then
begin
FC := AInput.LA(1);
FLine := CharStream.Line;
FCharPositionInLine := CharStream.CharPositionInLine;
end
else
FC := AInput.LA(1);
end;
end;
procedure ERecognitionException.ExtractInformationFromTreeNodeStream(
const Input: IIntStream);
var
Nodes: ITreeNodeStream;
Adaptor: ITreeAdaptor;
Payload, PriorPayload: IToken;
I, NodeType: Integer;
PriorNode: IANTLRInterface;
Tree: ITree;
Text: String;
CommonTree: ICommonTree;
begin
Nodes := Input as ITreeNodeStream;
FNode := Nodes.LT(1);
Adaptor := Nodes.TreeAdaptor;
Payload := Adaptor.GetToken(FNode);
if Assigned(Payload) then
begin
FToken := Payload;
if (Payload.Line <= 0) then
begin
// imaginary node; no line/pos info; scan backwards
I := -1;
PriorNode := Nodes.LT(I);
while Assigned(PriorNode) do
begin
PriorPayload := Adaptor.GetToken(PriorNode);
if Assigned(PriorPayload) and (PriorPayload.Line > 0) then
begin
// we found the most recent real line / pos info
FLine := PriorPayload.Line;
FCharPositionInLine := PriorPayload.CharPositionInLine;
FApproximateLineInfo := True;
Break;
end;
Dec(I);
PriorNode := Nodes.LT(I)
end;
end
else
begin
// node created from real token
FLine := Payload.Line;
FCharPositionInLine := Payload.CharPositionInLine;
end;
end else
if Supports(FNode, ITree, Tree) then
begin
FLine := Tree.Line;
FCharPositionInLine := Tree.CharPositionInLine;
if Supports(FNode, ICommonTree, CommonTree) then
FToken := CommonTree.Token;
end
else
begin
NodeType := Adaptor.GetNodeType(FNode);
Text := Adaptor.GetNodeText(FNode);
FToken := TCommonToken.Create(NodeType, Text);
end;
end;
function ERecognitionException.GetUnexpectedType: Integer;
var
Nodes: ITreeNodeStream;
Adaptor: ITreeAdaptor;
begin
if Supports(FInput, ITokenStream) then
Result := FToken.TokenType
else
if Supports(FInput, ITreeNodeStream, Nodes) then
begin
Adaptor := Nodes.TreeAdaptor;
Result := Adaptor.GetNodeType(FNode);
end else
Result := FC;
end;
{ EMismatchedTokenException }
constructor EMismatchedTokenException.Create(const AExpecting: Integer;
const AInput: IIntStream);
begin
inherited Create(AInput);
FExpecting := AExpecting;
end;
function EMismatchedTokenException.ToString: String;
begin
Result := 'MismatchedTokenException(' + IntToStr(UnexpectedType)
+ '!=' + IntToStr(Expecting) + ')';
end;
{ EUnwantedTokenException }
function EUnwantedTokenException.GetUnexpectedToken: IToken;
begin
Result := FToken;
end;
function EUnwantedTokenException.ToString: String;
var
Exp: String;
begin
if (Expecting = TToken.INVALID_TOKEN_TYPE) then
Exp := ''
else
Exp := ', expected ' + IntToStr(Expecting);
if (Token = nil) then
Result := 'UnwantedTokenException(found=nil' + Exp + ')'
else
Result := 'UnwantedTokenException(found=' + Token.Text + Exp + ')'
end;
{ EMissingTokenException }
constructor EMissingTokenException.Create(const AExpecting: Integer;
const AInput: IIntStream; const AInserted: IANTLRInterface);
begin
inherited Create(AExpecting, AInput);
FInserted := AInserted;
end;
function EMissingTokenException.GetMissingType: Integer;
begin
Result := Expecting;
end;
function EMissingTokenException.ToString: String;
begin
if Assigned(FInserted) and Assigned(FToken) then
Result := 'MissingTokenException(inserted ' + FInserted.ToString
+ ' at ' + FToken.Text + ')'
else
if Assigned(FToken) then
Result := 'MissingTokenException(at ' + FToken.Text + ')'
else
Result := 'MissingTokenException';
end;
{ EMismatchedTreeNodeException }
constructor EMismatchedTreeNodeException.Create(const AExpecting: Integer;
const AInput: IIntStream);
begin
inherited Create(AInput);
FExpecting := AExpecting;
end;
function EMismatchedTreeNodeException.ToString: String;
begin
Result := 'MismatchedTreeNodeException(' + IntToStr(UnexpectedType)
+ '!=' + IntToStr(Expecting) + ')';
end;
{ ENoViableAltException }
constructor ENoViableAltException.Create(
const AGrammarDecisionDescription: String; const ADecisionNumber,
AStateNumber: Integer; const AInput: IIntStream);
begin
inherited Create(AInput);
FGrammarDecisionDescription := AGrammarDecisionDescription;
FDecisionNumber := ADecisionNumber;
FStateNumber := AStateNumber;
end;
function ENoViableAltException.ToString: String;
begin
if Supports(Input, ICharStream) then
Result := 'NoViableAltException(''' + Char(UnexpectedType) + '''@['
+ FGrammarDecisionDescription + '])'
else
Result := 'NoViableAltException(''' + IntToStr(UnexpectedType) + '''@['
+ FGrammarDecisionDescription + '])'
end;
{ EEarlyExitException }
constructor EEarlyExitException.Create(const ADecisionNumber: Integer;
const AInput: IIntStream);
begin
inherited Create(AInput);
FDecisionNumber := ADecisionNumber;
end;
{ EMismatchedSetException }
constructor EMismatchedSetException.Create(const AExpecting: IBitSet;
const AInput: IIntStream);
begin
inherited Create(AInput);
FExpecting := AExpecting;
end;
function EMismatchedSetException.ToString: String;
begin
Result := 'MismatchedSetException(' + IntToStr(UnexpectedType)
+ '!=' + Expecting.ToString + ')';
end;
{ EMismatchedNotSetException }
function EMismatchedNotSetException.ToString: String;
begin
Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType)
+ '!=' + Expecting.ToString + ')';
end;
{ EFailedPredicateException }
constructor EFailedPredicateException.Create(const AInput: IIntStream;
const ARuleName, APredicateText: String);
begin
inherited Create(AInput);
FRuleName := ARuleName;
FPredicateText := APredicateText;
end;
function EFailedPredicateException.ToString: String;
begin
Result := 'FailedPredicateException(' + FRuleName + ',{' + FPredicateText + '}?)';
end;
{ EMismatchedRangeException }
constructor EMismatchedRangeException.Create(const AA, AB: Integer;
const AInput: IIntStream);
begin
inherited Create(FInput);
FA := AA;
FB := AB;
end;
function EMismatchedRangeException.ToString: String;
begin
Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType)
+ ' not in [' + IntToStr(FA)+ ',' + IntToStr(FB) + '])';
end;
{ TCharStreamState }
function TCharStreamState.GetCharPositionInLine: Integer;
begin
Result := FCharPositionInLine;
end;
function TCharStreamState.GetLine: Integer;
begin
Result := FLine;
end;
function TCharStreamState.GetP: Integer;
begin
Result := FP;
end;
procedure TCharStreamState.SetCharPositionInLine(const Value: Integer);
begin
FCharPositionInLine := Value;
end;
procedure TCharStreamState.SetLine(const Value: Integer);
begin
FLine := Value;
end;
procedure TCharStreamState.SetP(const Value: Integer);
begin
FP := Value;
end;
{ TANTLRStringStream }
constructor TANTLRStringStream.Create(const AInput: String);
begin
inherited Create;
FLine := 1;
FOwnsData := True;
FN := Length(AInput);
if (FN > 0) then
begin
GetMem(FData,FN * SizeOf(Char));
Move(AInput[1],FData^,FN * SizeOf(Char));
end;
end;
procedure TANTLRStringStream.Consume;
begin
if (FP < FN) then
begin
Inc(FCharPositionInLine);
if (FData[FP] = #10) then
begin
Inc(FLine);
FCharPositionInLine := 0;
end;
Inc(FP);
end;
end;
constructor TANTLRStringStream.Create(const AData: PChar;
const ANumberOfActualCharsInArray: Integer);
begin
inherited Create;
FLine := 1;
FOwnsData := False;
FData := AData;
FN := ANumberOfActualCharsInArray;
end;
constructor TANTLRStringStream.Create;
begin
inherited Create;
FLine := 1;
end;
destructor TANTLRStringStream.Destroy;
begin
if (FOwnsData) then
FreeMem(FData);
inherited;
end;
function TANTLRStringStream.GetCharPositionInLine: Integer;
begin
Result := FCharPositionInLine;
end;
function TANTLRStringStream.GetLine: Integer;
begin
Result := FLine;
end;
function TANTLRStringStream.GetSourceName: String;
begin
Result := FName;
end;
function TANTLRStringStream.Index: Integer;
begin
Result := FP;
end;
function TANTLRStringStream.LA(I: Integer): Integer;
begin
if (I = 0) then
Result := 0 // undefined
else begin
if (I < 0) then
begin
Inc(I); // e.g., translate LA(-1) to use offset i=0; then data[p+0-1]
if ((FP + I - 1) < 0) then
begin
Result := Integer(cscEOF);
Exit;
end;
end;
if ((FP + I - 1) >= FN) then
Result := Integer(cscEOF)
else
Result := Integer(FData[FP + I - 1]);
end;
end;
function TANTLRStringStream.LAChar(I: Integer): Char;
begin
Result := Char(LA(I));
end;
function TANTLRStringStream.LT(const I: Integer): Integer;
begin
Result := LA(I);
end;
function TANTLRStringStream.Mark: Integer;
var
State: ICharStreamState;
begin
if (FMarkers = nil) then
begin
FMarkers := TList.Create;
FMarkers.Add(nil); // depth 0 means no backtracking, leave blank
end;
Inc(FMarkDepth);
if (FMarkDepth >= FMarkers.Count) then
begin
State := TCharStreamState.Create;
FMarkers.Add(State);
end
else
State := FMarkers[FMarkDepth];
State.P := FP;
State.Line := FLine;
State.CharPositionInLine := FCharPositionInLine;
FLastMarker := FMarkDepth;
Result := FMarkDepth;
end;
procedure TANTLRStringStream.Release(const Marker: Integer);
begin
// unwind any other markers made after m and release m
FMarkDepth := Marker;
// release this marker
Dec(FMarkDepth);
end;
procedure TANTLRStringStream.Reset;
begin
FP := 0;
FLine := 1;
FCharPositionInLine := 0;
FMarkDepth := 0;
end;
procedure TANTLRStringStream.Rewind(const Marker: Integer);
var
State: ICharStreamState;
begin
State := FMarkers[Marker];
// restore stream state
Seek(State.P);
FLine := State.Line;
FCharPositionInLine := State.CharPositionInLine;
Release(Marker);
end;
procedure TANTLRStringStream.Rewind;
begin
Rewind(FLastMarker);
end;
procedure TANTLRStringStream.Seek(const Index: Integer);
begin
if (Index <= FP) then
FP := Index // just jump; don't update stream state (line, ...)
else begin
// seek forward, consume until p hits index
while (FP < Index) do
Consume;
end;
end;
procedure TANTLRStringStream.SetCharPositionInLine(const Value: Integer);
begin
FCharPositionInLine := Value;
end;
procedure TANTLRStringStream.SetLine(const Value: Integer);
begin
FLine := Value;
end;
function TANTLRStringStream.Size: Integer;
begin
Result := FN;
end;
function TANTLRStringStream.Substring(const Start, Stop: Integer): String;
begin
Result := Copy(FData, Start + 1, Stop - Start + 1);
end;
{ TANTLRFileStream }
constructor TANTLRFileStream.Create(const AFileName: String);
begin
Create(AFilename,TEncoding.Default);
end;
constructor TANTLRFileStream.Create(const AFileName: String;
const AEncoding: TEncoding);
begin
inherited Create;
FFileName := AFileName;
Load(FFileName, AEncoding);
end;
function TANTLRFileStream.GetSourceName: String;
begin
Result := FFileName;
end;
procedure TANTLRFileStream.Load(const FileName: String;
const Encoding: TEncoding);
var
FR: TStreamReader;
S: String;
begin
if (FFileName <> '') then
begin
if (Encoding = nil) then
FR := TStreamReader.Create(FileName,TEncoding.Default)
else
FR := TStreamReader.Create(FileName,Encoding);
try
if (FOwnsData) then
begin
FreeMem(FData);
FData := nil;
end;
FOwnsData := True;
S := FR.ReadToEnd;
FN := Length(S);
if (FN > 0) then
begin
GetMem(FData,FN * SizeOf(Char));
Move(S[1],FData^,FN * SizeOf(Char));
end;
finally
FR.Free;
end;
end;
end;
{ TBitSet }
class function TBitSet.BitSetOf(const El: Integer): IBitSet;
begin
Result := TBitSet.Create(El + 1);
Result.Add(El);
end;
class function TBitSet.BitSetOf(const A, B: Integer): IBitSet;
begin
Result := TBitSet.Create(Max(A,B) + 1);
Result.Add(A);
Result.Add(B);
end;
class function TBitSet.BitSetOf(const A, B, C: Integer): IBitSet;
begin
Result := TBitSet.Create;
Result.Add(A);
Result.Add(B);
Result.Add(C);
end;
class function TBitSet.BitSetOf(const A, B, C, D: Integer): IBitSet;
begin
Result := TBitSet.Create;
Result.Add(A);
Result.Add(B);
Result.Add(C);
Result.Add(D);
end;
procedure TBitSet.Add(const El: Integer);
var
N: Integer;
begin
N := WordNumber(El);
if (N >= Length(FBits)) then
GrowToInclude(El);
FBits[N] := FBits[N] or BitMask(El);
end;
class function TBitSet.BitMask(const BitNumber: Integer): UInt64;
var
BitPosition: Integer;
begin
BitPosition := BitNumber and MOD_MASK;
Result := UInt64(1) shl BitPosition;
end;
function TBitSet.BitSetOr(const A: IBitSet): IBitSet;
begin
Result := Clone as IBitSet;
Result.OrInPlace(A);
end;
function TBitSet.Clone: IANTLRInterface;
var
BS: TBitSet;
begin
BS := TBitSet.Create;
Result := BS;
SetLength(BS.FBits,Length(FBits));
if (Length(FBits) > 0) then
Move(FBits[0],BS.FBits[0],Length(FBits) * SizeOf(UInt64));
end;
constructor TBitSet.Create;
begin
Create(BITS);
end;
constructor TBitSet.Create(const ABits: array of UInt64);
begin
inherited Create;
SetLength(FBits, Length(ABits));
if (Length(ABits) > 0) then
Move(ABits[0], FBits[0], Length(ABits) * SizeOf(UInt64));
end;
constructor TBitSet.Create(const AItems: IList);
var
V: Integer;
begin
Create(BITS);
for V in AItems do
Add(V);
end;
constructor TBitSet.Create(const ANBits: Integer);
begin
inherited Create;
SetLength(FBits,((ANBits - 1) shr LOG_BITS) + 1);
end;
function TBitSet.Equals(Obj: TObject): Boolean;
var
OtherSet: TBitSet absolute Obj;
I, N: Integer;
begin
Result := False;
if (Obj = nil) or (not (Obj is TBitSet)) then
Exit;
N := Min(Length(FBits), Length(OtherSet.FBits));
// for any bits in common, compare
for I := 0 to N - 1 do
begin
if (FBits[I] <> OtherSet.FBits[I]) then
Exit;
end;
// make sure any extra bits are off
if (Length(FBits) > N) then
begin
for I := N + 1 to Length(FBits) - 1 do
begin
if (FBits[I] <> 0) then
Exit;
end;
end
else
if (Length(OtherSet.FBits) > N) then
begin
for I := N + 1 to Length(OtherSet.FBits) - 1 do
begin
if (OtherSet.FBits[I] <> 0) then
Exit;
end;
end;
Result := True;
end;
function TBitSet.GetIsNil: Boolean;
var
I: Integer;
begin
for I := Length(FBits) - 1 downto 0 do
if (FBits[I] <> 0) then
begin
Result := False;
Exit;
end;
Result := True;
end;
procedure TBitSet.GrowToInclude(const Bit: Integer);
var
NewSize: Integer;
begin
NewSize := Max(Length(FBits) shl 1,NumWordsToHold(Bit));
SetLength(FBits,NewSize);
end;
function TBitSet.LengthInLongWords: Integer;
begin
Result := Length(FBits);
end;
function TBitSet.Member(const El: Integer): Boolean;
var
N: Integer;
begin
if (El < 0) then
Result := False
else
begin
N := WordNumber(El);
if (N >= Length(FBits)) then
Result := False
else
Result := ((FBits[N] and BitMask(El)) <> 0);
end;
end;
function TBitSet.NumBits: Integer;
begin
Result := Length(FBits) shl LOG_BITS;
end;
class function TBitSet.NumWordsToHold(const El: Integer): Integer;
begin
Result := (El shr LOG_BITS) + 1;
end;
procedure TBitSet.OrInPlace(const A: IBitSet);
var
I, M: Integer;
ABits: TUInt64Array;
begin
if Assigned(A) then
begin
// If this is smaller than a, grow this first
if (A.LengthInLongWords > Length(FBits)) then
SetLength(FBits,A.LengthInLongWords);
M := Min(Length(FBits), A.LengthInLongWords);
ABits := A.ToPackedArray;
for I := M - 1 downto 0 do
FBits[I] := FBits[I] or ABits[I];
end;
end;
procedure TBitSet.Remove(const El: Integer);
var
N: Integer;
begin
N := WordNumber(El);
if (N < Length(FBits)) then
FBits[N] := (FBits[N] and not BitMask(El));
end;
function TBitSet.Size: Integer;
var
I, Bit: Integer;
W: UInt64;
begin
Result := 0;
for I := Length(FBits) - 1 downto 0 do
begin
W := FBits[I];
if (W <> 0) then
begin
for Bit := BITS - 1 downto 0 do
begin
if ((W and (UInt64(1) shl Bit)) <> 0) then
Inc(Result);
end;
end;
end;
end;
function TBitSet.ToArray: TIntegerArray;
var
I, En: Integer;
begin
SetLength(Result,Size);
En := 0;
for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do
begin
if Member(I) then
begin
Result[En] := I;
Inc(En);
end;
end;
end;
function TBitSet.ToPackedArray: TUInt64Array;
begin
Result := FBits;
end;
function TBitSet.ToString: String;
begin
Result := ToString(nil);
end;
function TBitSet.ToString(const TokenNames: TStringArray): String;
var
Buf: TStringBuilder;
I: Integer;
HavePrintedAnElement: Boolean;
begin
HavePrintedAnElement := False;
Buf := TStringBuilder.Create;
try
Buf.Append('{');
for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do
begin
if Member(I) then
begin
if (I > 0) and HavePrintedAnElement then
Buf.Append(',');
if Assigned(TokenNames) then
Buf.Append(TokenNames[I])
else
Buf.Append(I);
HavePrintedAnElement := True;
end;
end;
Buf.Append('}');
Result := Buf.ToString;
finally
Buf.Free;
end;
end;
class function TBitSet.WordNumber(const Bit: Integer): Integer;
begin
Result := Bit shr LOG_BITS; // Bit / BITS
end;
{ TRecognizerSharedState }
constructor TRecognizerSharedState.Create;
var
I: Integer;
begin
inherited;
SetLength(FFollowing,TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE);
for I := 0 to TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE - 1 do
FFollowing[I] := TBitSet.Create;
FFollowingStackPointer := -1;
FLastErrorIndex := -1;
FTokenStartCharIndex := -1;
end;
function TRecognizerSharedState.GetBacktracking: Integer;
begin
Result := FBacktracking;
end;
function TRecognizerSharedState.GetChannel: Integer;
begin
Result := FChannel;
end;
function TRecognizerSharedState.GetErrorRecovery: Boolean;
begin
Result := FErrorRecovery;
end;
function TRecognizerSharedState.GetFailed: Boolean;
begin
Result := FFailed;
end;
function TRecognizerSharedState.GetFollowing: TBitSetArray;
begin
Result := FFollowing;
end;
function TRecognizerSharedState.GetFollowingStackPointer: Integer;
begin
Result := FFollowingStackPointer;
end;
function TRecognizerSharedState.GetLastErrorIndex: Integer;
begin
Result := FLastErrorIndex;
end;
function TRecognizerSharedState.GetRuleMemo: TDictionaryArray;
begin
Result := FRuleMemo;
end;
function TRecognizerSharedState.GetRuleMemoCount: Integer;
begin
Result := Length(FRuleMemo);
end;
function TRecognizerSharedState.GetSyntaxErrors: Integer;
begin
Result := FSyntaxErrors;
end;
function TRecognizerSharedState.GetText: String;
begin
Result := FText;
end;
function TRecognizerSharedState.GetToken: IToken;
begin
Result := FToken;
end;
function TRecognizerSharedState.GetTokenStartCharIndex: Integer;
begin
Result := FTokenStartCharIndex;
end;
function TRecognizerSharedState.GetTokenStartCharPositionInLine: Integer;
begin
Result := FTokenStartCharPositionInLine;
end;
function TRecognizerSharedState.GetTokenStartLine: Integer;
begin
Result := FTokenStartLine;
end;
function TRecognizerSharedState.GetTokenType: Integer;
begin
Result := FTokenType;
end;
procedure TRecognizerSharedState.SetBacktracking(const Value: Integer);
begin
FBacktracking := Value;
end;
procedure TRecognizerSharedState.SetChannel(const Value: Integer);
begin
FChannel := Value;
end;
procedure TRecognizerSharedState.SetErrorRecovery(const Value: Boolean);
begin
FErrorRecovery := Value;
end;
procedure TRecognizerSharedState.SetFailed(const Value: Boolean);
begin
FFailed := Value;
end;
procedure TRecognizerSharedState.SetFollowing(const Value: TBitSetArray);
begin
FFollowing := Value;
end;
procedure TRecognizerSharedState.SetFollowingStackPointer(const Value: Integer);
begin
FFollowingStackPointer := Value;
end;
procedure TRecognizerSharedState.SetLastErrorIndex(const Value: Integer);
begin
FLastErrorIndex := Value;
end;
procedure TRecognizerSharedState.SetRuleMemoCount(const Value: Integer);
begin
SetLength(FRuleMemo, Value);
end;
procedure TRecognizerSharedState.SetSyntaxErrors(const Value: Integer);
begin
FSyntaxErrors := Value;
end;
procedure TRecognizerSharedState.SetText(const Value: String);
begin
FText := Value;
end;
procedure TRecognizerSharedState.SetToken(const Value: IToken);
begin
FToken := Value;
end;
procedure TRecognizerSharedState.SetTokenStartCharIndex(const Value: Integer);
begin
FTokenStartCharIndex := Value;
end;
procedure TRecognizerSharedState.SetTokenStartCharPositionInLine(
const Value: Integer);
begin
FTokenStartCharPositionInLine := Value;
end;
procedure TRecognizerSharedState.SetTokenStartLine(const Value: Integer);
begin
FTokenStartLine := Value;
end;
procedure TRecognizerSharedState.SetTokenType(const Value: Integer);
begin
FTokenType := Value;
end;
{ TCommonToken }
constructor TCommonToken.Create;
begin
inherited;
FChannel := TToken.DEFAULT_CHANNEL;
FCharPositionInLine := -1;
FIndex := -1;
end;
constructor TCommonToken.Create(const ATokenType: Integer);
begin
Create;
FTokenType := ATokenType;
end;
constructor TCommonToken.Create(const AInput: ICharStream; const ATokenType,
AChannel, AStart, AStop: Integer);
begin
Create;
FInput := AInput;
FTokenType := ATokenType;
FChannel := AChannel;
FStart := AStart;
FStop := AStop;
end;
constructor TCommonToken.Create(const ATokenType: Integer; const AText: String);
begin
Create;
FTokenType := ATokenType;
FChannel := TToken.DEFAULT_CHANNEL;
FText := AText;
end;
function TCommonToken.GetChannel: Integer;
begin
Result := FChannel;
end;
function TCommonToken.GetCharPositionInLine: Integer;
begin
Result := FCharPositionInLine;
end;
function TCommonToken.GetInputStream: ICharStream;
begin
Result := FInput;
end;
function TCommonToken.GetLine: Integer;
begin
Result := FLine;
end;
function TCommonToken.GetStartIndex: Integer;
begin
Result := FStart;
end;
function TCommonToken.GetStopIndex: Integer;
begin
Result := FStop;
end;
function TCommonToken.GetText: String;
begin
if (FText <> '') then
Result := FText
else
if (FInput = nil) then
Result := ''
else
Result := FInput.Substring(FStart, FStop);
end;
function TCommonToken.GetTokenIndex: Integer;
begin
Result := FIndex;
end;
function TCommonToken.GetTokenType: Integer;
begin
Result := FTokenType;
end;
procedure TCommonToken.SetChannel(const Value: Integer);
begin
FChannel := Value;
end;
procedure TCommonToken.SetCharPositionInLine(const Value: Integer);
begin
FCharPositionInLine := Value;
end;
procedure TCommonToken.SetInputStream(const Value: ICharStream);
begin
FInput := Value;
end;
procedure TCommonToken.SetLine(const Value: Integer);
begin
FLine := Value;
end;
procedure TCommonToken.SetStartIndex(const Value: Integer);
begin
FStart := Value;
end;
procedure TCommonToken.SetStopIndex(const Value: Integer);
begin
FStop := Value;
end;
procedure TCommonToken.SetText(const Value: String);
begin
(* Override the text for this token. The property getter
* will return this text rather than pulling from the buffer.
* Note that this does not mean that start/stop indexes are
* not valid. It means that the input was converted to a new
* string in the token object.
*)
FText := Value;
end;
procedure TCommonToken.SetTokenIndex(const Value: Integer);
begin
FIndex := Value;
end;
procedure TCommonToken.SetTokenType(const Value: Integer);
begin
FTokenType := Value;
end;
function TCommonToken.ToString: String;
var
ChannelStr, Txt: String;
begin
if (FChannel > 0) then
ChannelStr := ',channel=' + IntToStr(FChannel)
else
ChannelStr := '';
Txt := GetText;
if (Txt <> '') then
begin
Txt := ReplaceStr(Txt,#10,'\n');
Txt := ReplaceStr(Txt,#13,'\r');
Txt := ReplaceStr(Txt,#9,'\t');
end else
Txt := '';
Result := Format('[@%d,%d:%d=''%s'',<%d>%s,%d:%d]',
[FIndex,FStart,FStop,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]);
end;
constructor TCommonToken.Create(const AOldToken: IToken);
var
OldCommonToken: ICommonToken;
begin
Create;
FText := AOldToken.Text;
FTokenType := AOldToken.TokenType;
FLine := AOldToken.Line;
FIndex := AOldToken.TokenIndex;
FCharPositionInLine := AOldToken.CharPositionInLine;
FChannel := AOldToken.Channel;
if Supports(AOldToken, ICommonToken, OldCommonToken) then
begin
FStart := OldCommonToken.StartIndex;
FStop := OldCommonToken.StopIndex;
end;
end;
{ TClassicToken }
constructor TClassicToken.Create(const AOldToken: IToken);
begin
inherited Create;
FText := AOldToken.Text;
FTokenType := AOldToken.TokenType;
FLine := AOldToken.Line;
FCharPositionInLine := AOldToken.CharPositionInLine;
FChannel := AOldToken.Channel;
end;
constructor TClassicToken.Create(const ATokenType: Integer);
begin
inherited Create;
FTokenType := ATokenType;
end;
constructor TClassicToken.Create(const ATokenType: Integer; const AText: String;
const AChannel: Integer);
begin
inherited Create;
FTokenType := ATokenType;
FText := AText;
FChannel := AChannel;
end;
constructor TClassicToken.Create(const ATokenType: Integer;
const AText: String);
begin
inherited Create;
FTokenType := ATokenType;
FText := AText;
end;
function TClassicToken.GetChannel: Integer;
begin
Result := FChannel;
end;
function TClassicToken.GetCharPositionInLine: Integer;
begin
Result := FCharPositionInLine;
end;
function TClassicToken.GetInputStream: ICharStream;
begin
// No default implementation
Result := nil;
end;
function TClassicToken.GetLine: Integer;
begin
Result := FLine;
end;
function TClassicToken.GetText: String;
begin
Result := FText;
end;
function TClassicToken.GetTokenIndex: Integer;
begin
Result := FIndex;
end;
function TClassicToken.GetTokenType: Integer;
begin
Result := FTokenType;
end;
procedure TClassicToken.SetChannel(const Value: Integer);
begin
FChannel := Value;
end;
procedure TClassicToken.SetCharPositionInLine(const Value: Integer);
begin
FCharPositionInLine := Value;
end;
procedure TClassicToken.SetInputStream(const Value: ICharStream);
begin
// No default implementation
end;
procedure TClassicToken.SetLine(const Value: Integer);
begin
FLine := Value;
end;
procedure TClassicToken.SetText(const Value: String);
begin
FText := Value;
end;
procedure TClassicToken.SetTokenIndex(const Value: Integer);
begin
FIndex := Value;
end;
procedure TClassicToken.SetTokenType(const Value: Integer);
begin
FTokenType := Value;
end;
function TClassicToken.ToString: String;
var
ChannelStr, Txt: String;
begin
if (FChannel > 0) then
ChannelStr := ',channel=' + IntToStr(FChannel)
else
ChannelStr := '';
Txt := FText;
if (Txt <> '') then
begin
Txt := ReplaceStr(Txt,#10,'\n');
Txt := ReplaceStr(Txt,#13,'\r');
Txt := ReplaceStr(Txt,#9,'\t');
end else
Txt := '';
Result := Format('[@%d,''%s'',<%d>%s,%d:%d]',
[FIndex,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]);
end;
{ TToken }
class procedure TToken.Initialize;
begin
EOF_TOKEN := TCommonToken.Create(EOF);
INVALID_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE);
SKIP_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE);
end;
{ TBaseRecognizer }
constructor TBaseRecognizer.Create;
begin
inherited;
FState := TRecognizerSharedState.Create;
end;
function TBaseRecognizer.AlreadyParsedRule(const Input: IIntStream;
const RuleIndex: Integer): Boolean;
var
StopIndex: Integer;
begin
StopIndex := GetRuleMemoization(RuleIndex, Input.Index);
Result := (StopIndex <> MEMO_RULE_UNKNOWN);
if Result then
begin
if (StopIndex = MEMO_RULE_FAILED) then
FState.Failed := True
else
Input.Seek(StopIndex + 1); // jump to one past stop token
end;
end;
procedure TBaseRecognizer.BeginBacktrack(const Level: Integer);
begin
// No defeault implementation
end;
procedure TBaseRecognizer.BeginResync;
begin
// No defeault implementation
end;
procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream;
const TokenType: Integer);
var
TType: Integer;
begin
TType := Input.LA(1);
while (TType <> TToken.EOF) and (TType <> TokenType) do
begin
Input.Consume;
TType := Input.LA(1);
end;
end;
function TBaseRecognizer.CombineFollows(const Exact: Boolean): IBitSet;
var
I, Top: Integer;
LocalFollowSet: IBitSet;
begin
Top := FState.FollowingStackPointer;
Result := TBitSet.Create;
for I := Top downto 0 do
begin
LocalFollowSet := FState.Following[I];
Result.OrInPlace(LocalFollowSet);
if (Exact) then
begin
// can we see end of rule?
if LocalFollowSet.Member(TToken.EOR_TOKEN_TYPE) then
begin
// Only leave EOR in set if at top (start rule); this lets
// us know if have to include follow(start rule); i.e., EOF
if (I > 0) then
Result.Remove(TToken.EOR_TOKEN_TYPE);
end
else
// can't see end of rule, quit
Break;
end;
end;
end;
function TBaseRecognizer.ComputeContextSensitiveRuleFOLLOW: IBitSet;
begin
Result := CombineFollows(True);
end;
function TBaseRecognizer.ComputeErrorRecoverySet: IBitSet;
begin
Result := CombineFollows(False);
end;
procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream;
const BitSet: IBitSet);
var
TType: Integer;
begin
TType := Input.LA(1);
while (TType <> TToken.EOF) and (not BitSet.Member(TType)) do
begin
Input.Consume;
TType := Input.LA(1);
end;
end;
constructor TBaseRecognizer.Create(const AState: IRecognizerSharedState);
begin
if (AState = nil) then
Create
else
begin
inherited Create;
FState := AState;
end;
end;
procedure TBaseRecognizer.DisplayRecognitionError(
const TokenNames: TStringArray; const E: ERecognitionException);
var
Hdr, Msg: String;
begin
Hdr := GetErrorHeader(E);
Msg := GetErrorMessage(E, TokenNames);
EmitErrorMessage(Hdr + ' ' + Msg);
end;
procedure TBaseRecognizer.EmitErrorMessage(const Msg: String);
begin
WriteLn(Msg);
end;
procedure TBaseRecognizer.EndBacktrack(const Level: Integer;
const Successful: Boolean);
begin
// No defeault implementation
end;
procedure TBaseRecognizer.EndResync;
begin
// No defeault implementation
end;
function TBaseRecognizer.GetBacktrackingLevel: Integer;
begin
Result := FState.Backtracking;
end;
function TBaseRecognizer.GetCurrentInputSymbol(
const Input: IIntStream): IANTLRInterface;
begin
// No defeault implementation
Result := nil;
end;
function TBaseRecognizer.GetErrorHeader(const E: ERecognitionException): String;
begin
Result := 'line ' + IntToStr(E.Line) + ':' + IntToStr(E.CharPositionInLine);
end;
function TBaseRecognizer.GetErrorMessage(const E: ERecognitionException;
const TokenNames: TStringArray): String;
var
UTE: EUnwantedTokenException absolute E;
MTE: EMissingTokenException absolute E;
MMTE: EMismatchedTokenException absolute E;
MTNE: EMismatchedTreeNodeException absolute E;
NVAE: ENoViableAltException absolute E;
EEE: EEarlyExitException absolute E;
MSE: EMismatchedSetException absolute E;
MNSE: EMismatchedNotSetException absolute E;
FPE: EFailedPredicateException absolute E;
TokenName: String;
begin
Result := E.Message;
if (E is EUnwantedTokenException) then
begin
if (UTE.Expecting = TToken.EOF) then
TokenName := 'EOF'
else
TokenName := TokenNames[UTE.Expecting];
Result := 'extraneous input ' + GetTokenErrorDisplay(UTE.UnexpectedToken)
+ ' expecting ' + TokenName;
end
else
if (E is EMissingTokenException) then
begin
if (MTE.Expecting = TToken.EOF) then
TokenName := 'EOF'
else
TokenName := TokenNames[MTE.Expecting];
Result := 'missing ' + TokenName + ' at ' + GetTokenErrorDisplay(E.Token);
end
else
if (E is EMismatchedTokenException) then
begin
if (MMTE.Expecting = TToken.EOF) then
TokenName := 'EOF'
else
TokenName := TokenNames[MMTE.Expecting];
Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
+ ' expecting ' + TokenName;
end
else
if (E is EMismatchedTreeNodeException) then
begin
if (MTNE.Expecting = TToken.EOF) then
Result := 'EOF'
else
Result := TokenNames[MTNE.Expecting];
// The ternary operator is only necessary because of a bug in the .NET framework
Result := 'mismatched tree node: ';
if (MTNE.Node <> nil) and (MTNE.Node.ToString <> '') then
Result := Result + MTNE.Node.ToString;
Result := Result + ' expecting ' + TokenName;
end
else
if (E is ENoViableAltException) then
begin
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
// and "(decision="+nvae.decisionNumber+") and
// "state "+nvae.stateNumber
Result := 'no viable alternative at input ' + GetTokenErrorDisplay(E.Token);
end
else
if (E is EEarlyExitException) then
begin
// for development, can add "(decision="+eee.decisionNumber+")"
Result := 'required (...)+ loop did not match anyting at input '
+ GetTokenErrorDisplay(E.Token);
end else
if (E is EMismatchedSetException) then
begin
Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
+ ' expecting set ' + MSE.Expecting.ToString;
end
else
if (E is EMismatchedNotSetException) then
begin
Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token)
+ ' expecting set ' + MSE.Expecting.ToString;
end
else
if (E is EFailedPredicateException) then
begin
Result := 'rule ' + FPE.RuleName
+ ' failed predicate: {' + FPE.PredicateText + '}?';
end;
end;
function TBaseRecognizer.GetGrammarFileName: String;
begin
// No defeault implementation
Result := '';
end;
function TBaseRecognizer.GetMissingSymbol(const Input: IIntStream;
const E: ERecognitionException; const ExpectedTokenType: Integer;
const Follow: IBitSet): IANTLRInterface;
begin
// No defeault implementation
Result := nil;
end;
function TBaseRecognizer.GetNumberOfSyntaxErrors: Integer;
begin
Result := FState.SyntaxErrors;
end;
function TBaseRecognizer.GetRuleMemoization(const RuleIndex,
RuleStartIndex: Integer): Integer;
var
Dict: IDictionary;
begin
Dict := FState.RuleMemo[RuleIndex];
if (Dict = nil) then
begin
Dict := TDictionary.Create;
FState.RuleMemo[RuleIndex] := Dict;
end;
if (not Dict.TryGetValue(RuleStartIndex, Result)) then
Result := MEMO_RULE_UNKNOWN;
end;
function TBaseRecognizer.GetRuleMemoizationChaceSize: Integer;
var
RuleMap: IDictionary;
begin
Result := 0;
if Assigned(FState.RuleMemo) then
begin
for RuleMap in FState.RuleMemo do
if Assigned(RuleMap) then
Inc(Result,RuleMap.Count); // how many input indexes are recorded?
end;
end;
function TBaseRecognizer.GetState: IRecognizerSharedState;
begin
Result := FState;
end;
function TBaseRecognizer.GetTokenErrorDisplay(const T: IToken): String;
begin
Result := T.Text;
if (Result = '') then
begin
if (T.TokenType = TToken.EOF) then
Result := ''
else
Result := '<' + IntToStr(T.TokenType) + '>';
end;
Result := ReplaceStr(Result,#10,'\n');
Result := ReplaceStr(Result,#13,'\r');
Result := ReplaceStr(Result,#9,'\t');
Result := '''' + Result + '''';
end;
function TBaseRecognizer.GetTokenNames: TStringArray;
begin
// no default implementation
Result := nil;
end;
function TBaseRecognizer.Match(const Input: IIntStream;
const TokenType: Integer; const Follow: IBitSet): IANTLRInterface;
begin
Result := GetCurrentInputSymbol(Input);
if (Input.LA(1) = TokenType) then
begin
Input.Consume;
FState.ErrorRecovery := False;
FState.Failed := False;
end else
begin
if (FState.Backtracking > 0) then
FState.Failed := True
else
begin
Mismatch(Input, TokenType, Follow);
Result := RecoverFromMismatchedToken(Input, TokenType, Follow);
end;
end;
end;
procedure TBaseRecognizer.MatchAny(const Input: IIntStream);
begin
FState.ErrorRecovery := False;
FState.Failed := False;
Input.Consume;
end;
procedure TBaseRecognizer.Memoize(const Input: IIntStream; const RuleIndex,
RuleStartIndex: Integer);
var
StopTokenIndex: Integer;
Dict: IDictionary;
begin
Dict := FState.RuleMemo[RuleIndex];
if Assigned(Dict) then
begin
if FState.Failed then
StopTokenIndex := MEMO_RULE_FAILED
else
StopTokenIndex := Input.Index - 1;
Dict.AddOrSetValue(RuleStartIndex, StopTokenIndex);
end;
end;
procedure TBaseRecognizer.Mismatch(const Input: IIntStream;
const TokenType: Integer; const Follow: IBitSet);
begin
if MismatchIsUnwantedToken(Input, TokenType) then
raise EUnwantedTokenException.Create(TokenType, Input)
else
if MismatchIsMissingToken(Input, Follow) then
raise EMissingTokenException.Create(TokenType, Input, nil)
else
raise EMismatchedTokenException.Create(TokenType, Input);
end;
function TBaseRecognizer.MismatchIsMissingToken(const Input: IIntStream;
const Follow: IBitSet): Boolean;
var
ViableTokensFollowingThisRule, Follow2: IBitSet;
begin
if (Follow = nil) then
// we have no information about the follow; we can only consume
// a single token and hope for the best
Result := False
else
begin
Follow2 := Follow;
// compute what can follow this grammar element reference
if (Follow.Member(TToken.EOR_TOKEN_TYPE)) then
begin
ViableTokensFollowingThisRule := ComputeContextSensitiveRuleFOLLOW();
Follow2 := Follow.BitSetOr(ViableTokensFollowingThisRule);
if (FState.FollowingStackPointer >= 0) then
// remove EOR if we're not the start symbol
Follow2.Remove(TToken.EOR_TOKEN_TYPE);
end;
// if current token is consistent with what could come after set
// then we know we're missing a token; error recovery is free to
// "insert" the missing token
// BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
// in follow set to indicate that the fall of the start symbol is
// in the set (EOF can follow).
if (Follow2.Member(Input.LA(1)) or Follow2.Member(TToken.EOR_TOKEN_TYPE)) then
Result := True
else
Result := False;
end;
end;
function TBaseRecognizer.MismatchIsUnwantedToken(const Input: IIntStream;
const TokenType: Integer): Boolean;
begin
Result := (Input.LA(2) = TokenType);
end;
procedure TBaseRecognizer.PushFollow(const FSet: IBitSet);
var
F: TBitSetArray;
I: Integer;
begin
if ((FState.FollowingStackPointer + 1) >= Length(FState.Following)) then
begin
SetLength(F, Length(FState.Following) * 2);
FillChar(F[0], Length(F) * SizeOf(IBitSet), 0);
for I := 0 to Length(FState.Following) - 1 do
F[I] := FState.Following[I];
FState.Following := F;
end;
FState.FollowingStackPointer := FState.FollowingStackPointer + 1;
FState.Following[FState.FollowingStackPointer] := FSet;
end;
procedure TBaseRecognizer.Recover(const Input: IIntStream;
const RE: ERecognitionException);
var
FollowSet: IBitSet;
begin
if (FState.LastErrorIndex = Input.Index) then
// uh oh, another error at same token index; must be a case
// where LT(1) is in the recovery token set so nothing is
// consumed; consume a single token so at least to prevent
// an infinite loop; this is a failsafe.
Input.Consume;
FState.LastErrorIndex := Input.Index;
FollowSet := ComputeErrorRecoverySet;
BeginResync;
ConsumeUntil(Input,FollowSet);
EndResync;
end;
function TBaseRecognizer.RecoverFromMismatchedSet(const Input: IIntStream;
const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface;
begin
if MismatchIsMissingToken(Input, Follow) then
begin
ReportError(E);
// we don't know how to conjure up a token for sets yet
Result := GetMissingSymbol(Input, E, TToken.INVALID_TOKEN_TYPE, Follow);
end
else
begin
// TODO do single token deletion like above for Token mismatch
Result := nil;
raise E;
end;
end;
function TBaseRecognizer.RecoverFromMismatchedToken(const Input: IIntStream;
const TokenType: Integer; const Follow: IBitSet): IANTLRInterface;
var
E: ERecognitionException;
begin
// if next token is what we are looking for then "delete" this token
if MismatchIsUnwantedToken(Input, TokenType) then
begin
E := EUnwantedTokenException.Create(TokenType, Input);
BeginResync;
Input.Consume; // simply delete extra token
EndResync;
ReportError(E); // report after consuming so AW sees the token in the exception
// we want to return the token we're actually matching
Result := GetCurrentInputSymbol(Input);
Input.Consume; // move past ttype token as if all were ok
end
else
begin
// can't recover with single token deletion, try insertion
if MismatchIsMissingToken(Input, Follow) then
begin
E := nil;
Result := GetMissingSymbol(Input, E, TokenType, Follow);
E := EMissingTokenException.Create(TokenType, Input, Result);
ReportError(E); // report after inserting so AW sees the token in the exception
end
else
begin
// even that didn't work; must throw the exception
raise EMismatchedTokenException.Create(TokenType, Input);
end;
end;
end;
procedure TBaseRecognizer.ReportError(const E: ERecognitionException);
begin
// if we've already reported an error and have not matched a token
// yet successfully, don't report any errors.
if (not FState.ErrorRecovery) then
begin
FState.SyntaxErrors := FState.SyntaxErrors + 1; // don't count spurious
FState.ErrorRecovery := True;
DisplayRecognitionError(GetTokenNames, E);
end;
end;
procedure TBaseRecognizer.Reset;
var
I: Integer;
begin
// wack everything related to error recovery
if (FState = nil) then
Exit; // no shared state work to do
FState.FollowingStackPointer := -1;
FState.ErrorRecovery := False;
FState.LastErrorIndex := -1;
FState.Failed := False;
FState.SyntaxErrors := 0;
// wack everything related to backtracking and memoization
FState.Backtracking := 0;
if Assigned(FState.RuleMemo) then
for I := 0 to Length(FState.RuleMemo) - 1 do
begin
// wipe cache
FState.RuleMemo[I] := nil;
end;
end;
function TBaseRecognizer.ToStrings(const Tokens: IList): IList;
var
Token: IToken;
begin
if (Tokens = nil) then
Result := nil
else
begin
Result := TList.Create;
for Token in Tokens do
Result.Add(Token.Text);
end;
end;
procedure TBaseRecognizer.TraceIn(const RuleName: String;
const RuleIndex: Integer; const InputSymbol: String);
begin
Write('enter ' + RuleName + ' ' + InputSymbol);
if (FState.Failed) then
WriteLn(' failed=True');
if (FState.Backtracking > 0) then
Write(' backtracking=' + IntToStr(FState.Backtracking));
WriteLn;
end;
procedure TBaseRecognizer.TraceOut(const RuleName: String;
const RuleIndex: Integer; const InputSymbol: String);
begin
Write('exit ' + RuleName + ' ' + InputSymbol);
if (FState.Failed) then
WriteLn(' failed=True');
if (FState.Backtracking > 0) then
Write(' backtracking=' + IntToStr(FState.Backtracking));
WriteLn;
end;
{ TCommonTokenStream }
procedure TCommonTokenStream.Consume;
begin
if (FP < FTokens.Count) then
begin
Inc(FP);
FP := SkipOffTokenChannels(FP); // leave p on valid token
end;
end;
constructor TCommonTokenStream.Create;
begin
inherited;
FP := -1;
FChannel := TToken.DEFAULT_CHANNEL;
FTokens := TList.Create;
FTokens.Capacity := 500;
end;
constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource);
begin
Create;
FTokenSource := ATokenSource;
end;
procedure TCommonTokenStream.DiscardOffChannelTokens(const Discard: Boolean);
begin
FDiscardOffChannelTokens := Discard;
end;
procedure TCommonTokenStream.DiscardTokenType(const TType: Integer);
begin
if (FDiscardSet = nil) then
FDiscardSet := THashList.Create;
FDiscardSet.Add(TType, TType);
end;
procedure TCommonTokenStream.FillBuffer;
var
Index: Integer;
T: IToken;
Discard: Boolean;
begin
Index := 0;
T := FTokenSource.NextToken;
while Assigned(T) and (T.TokenType <> Integer(cscEOF)) do
begin
Discard := False;
// is there a channel override for token type?
if Assigned(FChannelOverrideMap) then
if FChannelOverrideMap.ContainsKey(T.TokenType) then
T.Channel := FChannelOverrideMap[T.TokenType];
if Assigned(FDiscardSet) and FDiscardSet.ContainsKey(T.TokenType) then
Discard := True
else
if FDiscardOffChannelTokens and (T.Channel <> FChannel) then
Discard := True;
if (not Discard) then
begin
T.TokenIndex := Index;
FTokens.Add(T);
Inc(Index);
end;
T := FTokenSource.NextToken;
end;
// leave p pointing at first token on channel
FP := 0;
FP := SkipOffTokenChannels(FP);
end;
function TCommonTokenStream.Get(const I: Integer): IToken;
begin
Result := FTokens[I];
end;
function TCommonTokenStream.GetSourceName: String;
begin
Result := FTokenSource.SourceName;
end;
function TCommonTokenStream.GetTokens(const Start, Stop: Integer;
const Types: IList): IList;
begin
Result := GetTokens(Start, Stop, TBitSet.Create(Types));
end;
function TCommonTokenStream.GetTokens(const Start, Stop,
TokenType: Integer): IList;
begin
Result := GetTokens(Start, Stop, TBitSet.BitSetOf(TokenType));
end;
function TCommonTokenStream.GetTokens(const Start, Stop: Integer;
const Types: IBitSet): IList;
var
I, StartIndex, StopIndex: Integer;
T: IToken;
begin
if (FP = -1) then
FillBuffer;
StopIndex := Min(Stop,FTokens.Count - 1);
StartIndex := Max(Start,0);
if (StartIndex > StopIndex) then
Result := nil
else
begin
Result := TList.Create;
for I := StartIndex to StopIndex do
begin
T := FTokens[I];
if (Types = nil) or Types.Member(T.TokenType) then
Result.Add(T);
end;
if (Result.Count = 0) then
Result := nil;
end;
end;
function TCommonTokenStream.GetTokens: IList;
begin
if (FP = -1) then
FillBuffer;
Result := FTokens;
end;
function TCommonTokenStream.GetTokens(const Start,
Stop: Integer): IList;
begin
Result := GetTokens(Start, Stop, IBitSet(nil));
end;
function TCommonTokenStream.GetTokenSource: ITokenSource;
begin
Result := FTokenSource;
end;
function TCommonTokenStream.Index: Integer;
begin
Result := FP;
end;
function TCommonTokenStream.LA(I: Integer): Integer;
begin
Result := LT(I).TokenType;
end;
function TCommonTokenStream.LAChar(I: Integer): Char;
begin
Result := Char(LA(I));
end;
function TCommonTokenStream.LB(const K: Integer): IToken;
var
I, N: Integer;
begin
if (FP = -1) then
FillBuffer;
if (K = 0) then
Result := nil
else
if ((FP - K) < 0) then
Result := nil
else
begin
I := FP;
N := 1;
// find k good tokens looking backwards
while (N <= K) do
begin
// skip off-channel tokens
I := SkipOffTokenChannelsReverse(I - 1); // leave p on valid token
Inc(N);
end;
if (I < 0) then
Result := nil
else
Result := FTokens[I];
end;
end;
function TCommonTokenStream.LT(const K: Integer): IToken;
var
I, N: Integer;
begin
if (FP = -1) then
FillBuffer;
if (K = 0) then
Result := nil
else
if (K < 0) then
Result := LB(-K)
else
if ((FP + K - 1) >= FTokens.Count) then
Result := TToken.EOF_TOKEN
else
begin
I := FP;
N := 1;
// find k good tokens
while (N < K) do
begin
// skip off-channel tokens
I := SkipOffTokenChannels(I + 1); // leave p on valid token
Inc(N);
end;
if (I >= FTokens.Count) then
Result := TToken.EOF_TOKEN
else
Result := FTokens[I];
end;
end;
function TCommonTokenStream.Mark: Integer;
begin
if (FP = -1) then
FillBuffer;
FLastMarker := Index;
Result := FLastMarker;
end;
procedure TCommonTokenStream.Release(const Marker: Integer);
begin
// no resources to release
end;
procedure TCommonTokenStream.Reset;
begin
FP := 0;
FLastMarker := 0;
end;
procedure TCommonTokenStream.Rewind(const Marker: Integer);
begin
Seek(Marker);
end;
procedure TCommonTokenStream.Rewind;
begin
Seek(FLastMarker);
end;
procedure TCommonTokenStream.Seek(const Index: Integer);
begin
FP := Index;
end;
procedure TCommonTokenStream.SetTokenSource(const Value: ITokenSource);
begin
FTokenSource := Value;
FTokens.Clear;
FP := -1;
FChannel := TToken.DEFAULT_CHANNEL;
end;
procedure TCommonTokenStream.SetTokenTypeChannel(const TType, Channel: Integer);
begin
if (FChannelOverrideMap = nil) then
FChannelOverrideMap := TDictionary.Create;
FChannelOverrideMap[TType] := Channel;
end;
function TCommonTokenStream.Size: Integer;
begin
Result := FTokens.Count;
end;
function TCommonTokenStream.SkipOffTokenChannels(const I: Integer): Integer;
var
N: Integer;
begin
Result := I;
N := FTokens.Count;
while (Result < N) and (FTokens[Result].Channel <> FChannel) do
Inc(Result);
end;
function TCommonTokenStream.SkipOffTokenChannelsReverse(
const I: Integer): Integer;
begin
Result := I;
while (Result >= 0) and (FTokens[Result].Channel <> FChannel) do
Dec(Result);
end;
function TCommonTokenStream.ToString: String;
begin
if (FP = -1) then
FillBuffer;
Result := ToString(0, FTokens.Count - 1);
end;
function TCommonTokenStream.ToString(const Start, Stop: Integer): String;
var
I, Finish: Integer;
Buf: TStringBuilder;
T: IToken;
begin
if (Start < 0) or (Stop < 0) then
Result := ''
else
begin
if (FP = -1) then
FillBuffer;
if (Stop >= FTokens.Count) then
Finish := FTokens.Count - 1
else
Finish := Stop;
Buf := TStringBuilder.Create;
try
for I := Start to Finish do
begin
T := FTokens[I];
Buf.Append(T.Text);
end;
Result := Buf.ToString;
finally
Buf.Free;
end;
end;
end;
function TCommonTokenStream.ToString(const Start, Stop: IToken): String;
begin
if Assigned(Start) and Assigned(Stop) then
Result := ToString(Start.TokenIndex, Stop.TokenIndex)
else
Result := '';
end;
constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource;
const AChannel: Integer);
begin
Create(ATokenSource);
FChannel := AChannel;
end;
constructor TCommonTokenStream.Create(const ALexer: ILexer);
begin
Create(ALexer as ITokenSource);
end;
constructor TCommonTokenStream.Create(const ALexer: ILexer;
const AChannel: Integer);
begin
Create(ALexer as ITokenSource, AChannel);
end;
{ TDFA }
function TDFA.Description: String;
begin
Result := 'n/a';
end;
procedure TDFA.Error(const NVAE: ENoViableAltException);
begin
// No default implementation
end;
function TDFA.GetRecognizer: IBaseRecognizer;
begin
Result := IBaseRecognizer(FRecognizer);
end;
function TDFA.GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler;
begin
Result := FSpecialStateTransitionHandler;
end;
procedure TDFA.NoViableAlt(const S: Integer; const Input: IIntStream);
var
NVAE: ENoViableAltException;
begin
if (Recognizer.State.Backtracking > 0) then
Recognizer.State.Failed := True
else
begin
NVAE := ENoViableAltException.Create(Description, FDecisionNumber, S, Input);
Error(NVAE);
raise NVAE;
end;
end;
function TDFA.Predict(const Input: IIntStream): Integer;
var
Mark, S, SNext, SpecialState: Integer;
C: Char;
begin
Result := 0;
Mark := Input.Mark; // remember where decision started in input
S := 0; // we always start at s0
try
while True do
begin
SpecialState := FSpecial[S];
if (SpecialState >= 0) then
begin
S := FSpecialStateTransitionHandler(Self, SpecialState, Input);
if (S = -1) then
begin
NoViableAlt(S, Input);
Exit;
end;
Input.Consume;
Continue;
end;
if (FAccept[S] >= 1) then
begin
Result := FAccept[S];
Exit;
end;
// look for a normal char transition
C := Char(Input.LA(1)); // -1 == \uFFFF, all tokens fit in 65000 space
if (C >= FMin[S]) and (C <= FMax[S]) then
begin
SNext := FTransition[S,Integer(C) - Integer(FMin[S])]; // move to next state
if (SNext < 0) then
begin
// was in range but not a normal transition
// must check EOT, which is like the else clause.
// eot[s]>=0 indicates that an EOT edge goes to another
// state.
if (FEOT[S] >= 0) then // EOT Transition to accept state?
begin
S := FEOT[S];
Input.Consume;
// TODO: I had this as return accept[eot[s]]
// which assumed here that the EOT edge always
// went to an accept...faster to do this, but
// what about predicated edges coming from EOT
// target?
Continue;
end;
NoViableAlt(S, Input);
Exit;
end;
S := SNext;
Input.Consume;
Continue;
end;
if (FEOT[S] >= 0) then
begin
// EOT Transition?
S := FEOT[S];
Input.Consume;
Continue;
end;
if (C = Char(TToken.EOF)) and (FEOF[S] >= 0) then
begin
// EOF Transition to accept state?
Result := FAccept[FEOF[S]];
Exit;
end;
// not in range and not EOF/EOT, must be invalid symbol
NoViableAlt(S, Input);
Exit;
end;
finally
Input.Rewind(Mark);
end;
end;
procedure TDFA.SetRecognizer(const Value: IBaseRecognizer);
begin
FRecognizer := Pointer(Value);
end;
procedure TDFA.SetSpecialStateTransitionHandler(
const Value: TSpecialStateTransitionHandler);
begin
FSpecialStateTransitionHandler := Value;
end;
function TDFA.SpecialStateTransition(const S: Integer;
const Input: IIntStream): Integer;
begin
// No default implementation
Result := -1;
end;
function TDFA.SpecialTransition(const State, Symbol: Integer): Integer;
begin
Result := 0;
end;
class function TDFA.UnpackEncodedString(
const EncodedString: String): TSmallintArray;
var
I, J, DI, Size: Integer;
N, V: Char;
begin
Size := 0;
I := 1;
while (I <= Length(EncodedString)) do
begin
Inc(Size,Integer(EncodedString[I]));
Inc(I,2);
end;
SetLength(Result,Size);
DI := 0;
I := 1;
while (I <= Length(EncodedString)) do
begin
N := EncodedString[I];
V := EncodedString[I + 1];
// add v n times to data
for J := 1 to Integer(N) do
begin
Result[DI] := Smallint(V);
Inc(DI);
end;
Inc(I,2);
end;
end;
class function TDFA.UnpackEncodedStringArray(
const EncodedStrings: array of String): TSmallintMatrix;
var
I: Integer;
begin
SetLength(Result,Length(EncodedStrings));
for I := 0 to Length(EncodedStrings) - 1 do
Result[I] := UnpackEncodedString(EncodedStrings[I]);
end;
class function TDFA.UnpackEncodedStringArray(
const EncodedStrings: TStringArray): TSmallintMatrix;
var
I: Integer;
begin
SetLength(Result,Length(EncodedStrings));
for I := 0 to Length(EncodedStrings) - 1 do
Result[I] := UnpackEncodedString(EncodedStrings[I]);
end;
class function TDFA.UnpackEncodedStringToUnsignedChars(
const EncodedString: String): TCharArray;
var
I, J, DI, Size: Integer;
N, V: Char;
begin
Size := 0;
I := 1;
while (I <= Length(EncodedString)) do
begin
Inc(Size,Integer(EncodedString[I]));
Inc(I,2);
end;
SetLength(Result,Size);
DI := 0;
I := 1;
while (I <= Length(EncodedString)) do
begin
N := EncodedString[I];
V := EncodedString[I + 1];
// add v n times to data
for J := 1 to Integer(N) do
begin
Result[DI] := V;
Inc(DI);
end;
Inc(I,2);
end;
end;
{ TLexer }
constructor TLexer.Create;
begin
inherited;
end;
constructor TLexer.Create(const AInput: ICharStream);
begin
inherited Create;
FInput := AInput;
end;
constructor TLexer.Create(const AInput: ICharStream;
const AState: IRecognizerSharedState);
begin
inherited Create(AState);
FInput := AInput;
end;
function TLexer.Emit: IToken;
begin
Result := TCommonToken.Create(FInput, FState.TokenType, FState.Channel,
FState.TokenStartCharIndex, GetCharIndex - 1);
Result.Line := FState.TokenStartLine;
Result.Text := FState.Text;
Result.CharPositionInLine := FState.TokenStartCharPositionInLine;
Emit(Result);
end;
procedure TLexer.Emit(const Token: IToken);
begin
FState.Token := Token;
end;
function TLexer.GetCharErrorDisplay(const C: Integer): String;
begin
case C of
// TToken.EOF
TOKEN_dot_EOF:
Result := '';
10:
Result := '\n';
9:
Result := '\t';
13:
Result := '\r';
else
Result := Char(C);
end;
Result := '''' + Result + '''';
end;
function TLexer.GetCharIndex: Integer;
begin
Result := FInput.Index;
end;
function TLexer.GetCharPositionInLine: Integer;
begin
Result := FInput.CharPositionInLine;
end;
function TLexer.GetCharStream: ICharStream;
begin
Result := FInput;
end;
function TLexer.GetErrorMessage(const E: ERecognitionException;
const TokenNames: TStringArray): String;
var
MTE: EMismatchedTokenException absolute E;
NVAE: ENoViableAltException absolute E;
EEE: EEarlyExitException absolute E;
MNSE: EMismatchedNotSetException absolute E;
MSE: EMismatchedSetException absolute E;
MRE: EMismatchedRangeException absolute E;
begin
if (E is EMismatchedTokenException) then
Result := 'mismatched character ' + GetCharErrorDisplay(E.Character)
+ ' expecting ' + GetCharErrorDisplay(MTE.Expecting)
else
if (E is ENoViableAltException) then
// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
// and "(decision="+nvae.decisionNumber+") and
// "state "+nvae.stateNumber
Result := 'no viable alternative at character ' + GetCharErrorDisplay(NVAE.Character)
else
if (E is EEarlyExitException) then
// for development, can add "(decision="+eee.decisionNumber+")"
Result := 'required (...)+ loop did not match anything at character '
+ GetCharErrorDisplay(EEE.Character)
else
if (E is EMismatchedNotSetException) then
Result := 'mismatched character ' + GetCharErrorDisplay(MNSE.Character)
+ ' expecting set ' + MNSE.Expecting.ToString
else
if (E is EMismatchedSetException) then
Result := 'mismatched character ' + GetCharErrorDisplay(MSE.Character)
+ ' expecting set ' + MSE.Expecting.ToString
else
if (E is EMismatchedRangeException) then
Result := 'mismatched character ' + GetCharErrorDisplay(MRE.Character)
+ ' expecting set ' + GetCharErrorDisplay(MRE.A) + '..'
+ GetCharErrorDisplay(MRE.B)
else
Result := inherited GetErrorMessage(E, TokenNames);
end;
function TLexer.GetInput: IIntStream;
begin
Result := FInput;
end;
function TLexer.GetLine: Integer;
begin
Result := FInput.Line;
end;
function TLexer.GetSourceName: String;
begin
Result := FInput.SourceName;
end;
function TLexer.GetText: String;
begin
if (FState.Text <> '') then
Result := FState.Text
else
Result := FInput.Substring(FState.TokenStartCharIndex, GetCharIndex - 1)
end;
procedure TLexer.Match(const S: String);
var
I: Integer;
MTE: EMismatchedTokenException;
begin
for I := 1 to Length(S) do
begin
if (FInput.LA(1) <> Integer(S[I])) then
begin
if (FState.Backtracking > 0) then
begin
FState.Failed := True;
Exit;
end;
MTE := EMismatchedTokenException.Create(Integer(S[I]), FInput);
Recover(MTE); // don't really recover; just consume in lexer
raise MTE;
end;
FInput.Consume;
FState.Failed := False;
end;
end;
procedure TLexer.Match(const C: Integer);
var
MTE: EMismatchedTokenException;
begin
if (FInput.LA(1) <> C) then
begin
if (FState.Backtracking > 0) then
begin
FState.Failed := True;
Exit;
end;
MTE := EMismatchedTokenException.Create(C, FInput);
Recover(MTE);
raise MTE;
end;
FInput.Consume;
FState.Failed := False;
end;
procedure TLexer.MatchAny;
begin
FInput.Consume;
end;
procedure TLexer.MatchRange(const A, B: Integer);
var
MRE: EMismatchedRangeException;
begin
if (FInput.LA(1) < A) or (FInput.LA(1) > B) then
begin
if (FState.Backtracking > 0) then
begin
FState.Failed := True;
Exit;
end;
MRE := EMismatchedRangeException.Create(A, B, FInput);
Recover(MRE);
raise MRE;
end;
FInput.Consume;
FState.Failed := False;
end;
function TLexer.NextToken: IToken;
begin
while True do
begin
FState.Token := nil;
FState.Channel := TToken.DEFAULT_CHANNEL;
FState.TokenStartCharIndex := FInput.Index;
FState.TokenStartCharPositionInLine := FInput.CharPositionInLine;
FState.TokenStartLine := Finput.Line;
FState.Text := '';
if (FInput.LA(1) = Integer(cscEOF)) then
begin
Result := TToken.EOF_TOKEN;
Exit;
end;
try
DoTokens;
if (FState.Token = nil) then
Emit
else
if (FState.Token = TToken.SKIP_TOKEN) then
Continue;
Exit(FState.Token);
except
on NVA: ENoViableAltException do
begin
ReportError(NVA);
Recover(NVA); // throw out current char and try again
end;
on RE: ERecognitionException do
begin
ReportError(RE);
// Match() routine has already called Recover()
end;
end;
end;
end;
procedure TLexer.Recover(const RE: ERecognitionException);
begin
FInput.Consume;
end;
procedure TLexer.ReportError(const E: ERecognitionException);
begin
DisplayRecognitionError(GetTokenNames, E);
end;
procedure TLexer.Reset;
begin
inherited; // reset all recognizer state variables
// wack Lexer state variables
if Assigned(FInput) then
FInput.Seek(0); // rewind the input
if (FState = nil) then
Exit; // no shared state work to do
FState.Token := nil;
FState.TokenType := TToken.INVALID_TOKEN_TYPE;
FState.Channel := TToken.DEFAULT_CHANNEL;
FState.TokenStartCharIndex := -1;
FState.TokenStartCharPositionInLine := -1;
FState.TokenStartLine := -1;
FState.Text := '';
end;
procedure TLexer.SetCharStream(const Value: ICharStream);
begin
FInput := nil;
Reset;
FInput := Value;
end;
procedure TLexer.SetText(const Value: String);
begin
FState.Text := Value;
end;
procedure TLexer.Skip;
begin
FState.Token := TToken.SKIP_TOKEN;
end;
procedure TLexer.TraceIn(const RuleName: String; const RuleIndex: Integer);
var
InputSymbol: String;
begin
InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':'
+ IntToStr(GetCharPositionInLine);
inherited TraceIn(RuleName, RuleIndex, InputSymbol);
end;
procedure TLexer.TraceOut(const RuleName: String; const RuleIndex: Integer);
var
InputSymbol: String;
begin
InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':'
+ IntToStr(GetCharPositionInLine);
inherited TraceOut(RuleName, RuleIndex, InputSymbol);
end;
{ TParser }
constructor TParser.Create(const AInput: ITokenStream);
begin
inherited Create; // highlight that we go to base class to set state object
SetTokenStream(AInput);
end;
constructor TParser.Create(const AInput: ITokenStream;
const AState: IRecognizerSharedState);
begin
inherited Create(AState); // share the state object with another parser
SetTokenStream(AInput);
end;
function TParser.GetCurrentInputSymbol(
const Input: IIntStream): IANTLRInterface;
begin
Result := FInput.LT(1)
end;
function TParser.GetInput: IIntStream;
begin
Result := FInput;
end;
function TParser.GetMissingSymbol(const Input: IIntStream;
const E: ERecognitionException; const ExpectedTokenType: Integer;
const Follow: IBitSet): IANTLRInterface;
var
TokenText: String;
T: ICommonToken;
Current: IToken;
begin
if (ExpectedTokenType = TToken.EOF) then
TokenText := ''
else
TokenText := '';
T := TCommonToken.Create(ExpectedTokenType, TokenText);
Current := FInput.LT(1);
if (Current.TokenType = TToken.EOF) then
Current := FInput.LT(-1);
T.Line := Current.Line;
T.CharPositionInLine := Current.CharPositionInLine;
T.Channel := DEFAULT_TOKEN_CHANNEL;
Result := T;
end;
function TParser.GetSourceName: String;
begin
Result := FInput.SourceName;
end;
function TParser.GetTokenStream: ITokenStream;
begin
Result := FInput;
end;
procedure TParser.Reset;
begin
inherited; // reset all recognizer state variables
if Assigned(FInput) then
FInput.Seek(0); // rewind the input
end;
procedure TParser.SetTokenStream(const Value: ITokenStream);
begin
FInput := nil;
Reset;
FInput := Value;
end;
procedure TParser.TraceIn(const RuleName: String; const RuleIndex: Integer);
begin
inherited TraceIn(RuleName, RuleIndex, FInput.LT(1).ToString);
end;
procedure TParser.TraceOut(const RuleName: String; const RuleIndex: Integer);
begin
inherited TraceOut(RuleName, RuleIndex, FInput.LT(1).ToString);
end;
{ TRuleReturnScope }
function TRuleReturnScope.GetStart: IANTLRInterface;
begin
Result := nil;
end;
function TRuleReturnScope.GetStop: IANTLRInterface;
begin
Result := nil;
end;
function TRuleReturnScope.GetTemplate: IANTLRInterface;
begin
Result := nil;
end;
function TRuleReturnScope.GetTree: IANTLRInterface;
begin
Result := nil;
end;
procedure TRuleReturnScope.SetStart(const Value: IANTLRInterface);
begin
raise EInvalidOperation.Create('Setter has not been defined for this property.');
end;
procedure TRuleReturnScope.SetStop(const Value: IANTLRInterface);
begin
raise EInvalidOperation.Create('Setter has not been defined for this property.');
end;
procedure TRuleReturnScope.SetTree(const Value: IANTLRInterface);
begin
raise EInvalidOperation.Create('Setter has not been defined for this property.');
end;
{ TParserRuleReturnScope }
function TParserRuleReturnScope.GetStart: IANTLRInterface;
begin
Result := FStart;
end;
function TParserRuleReturnScope.GetStop: IANTLRInterface;
begin
Result := FStop;
end;
procedure TParserRuleReturnScope.SetStart(const Value: IANTLRInterface);
begin
FStart := Value as IToken;
end;
procedure TParserRuleReturnScope.SetStop(const Value: IANTLRInterface);
begin
FStop := Value as IToken;
end;
{ TTokenRewriteStream }
procedure TTokenRewriteStream.Delete(const Start, Stop: IToken);
begin
Delete(DEFAULT_PROGRAM_NAME, Start, Stop);
end;
procedure TTokenRewriteStream.Delete(const IndexT: IToken);
begin
Delete(DEFAULT_PROGRAM_NAME, IndexT, IndexT);
end;
constructor TTokenRewriteStream.Create;
begin
inherited;
Init;
end;
constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource);
begin
inherited Create(ATokenSource);
Init;
end;
constructor TTokenRewriteStream.Create(const ALexer: ILexer);
begin
Create(ALexer as ITokenSource);
end;
constructor TTokenRewriteStream.Create(const ALexer: ILexer;
const AChannel: Integer);
begin
Create(ALexer as ITokenSource, AChannel);
end;
function TTokenRewriteStream.CatOpText(const A, B: IANTLRInterface): IANTLRInterface;
var
X, Y: String;
begin
if Assigned(A) then
X := A.ToString
else
X := '';
if Assigned(B) then
Y := B.ToString
else
Y := '';
Result := TANTLRString.Create(X + Y);
end;
constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource;
const AChannel: Integer);
begin
inherited Create(ATokenSource, AChannel);
Init;
end;
procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start,
Stop: IToken);
begin
Replace(ProgramName, Start, Stop, nil);
end;
procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start,
Stop: Integer);
begin
Replace(ProgramName, Start, Stop, nil);
end;
procedure TTokenRewriteStream.Delete(const Start, Stop: Integer);
begin
Delete(DEFAULT_PROGRAM_NAME, Start, Stop);
end;
procedure TTokenRewriteStream.Delete(const Index: Integer);
begin
Delete(DEFAULT_PROGRAM_NAME, Index, Index);
end;
procedure TTokenRewriteStream.DeleteProgram(const ProgramName: String);
begin
Rollback(ProgramName, MIN_TOKEN_INDEX);
end;
procedure TTokenRewriteStream.DeleteProgram;
begin
DeleteProgram(DEFAULT_PROGRAM_NAME);
end;
function TTokenRewriteStream.GetLastRewriteTokenIndex: Integer;
begin
Result := GetLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
end;
function TTokenRewriteStream.GetKindOfOps(
const Rewrites: IList;
const Kind: TGUID): IList;
begin
Result := GetKindOfOps(Rewrites, Kind, Rewrites.Count);
end;
function TTokenRewriteStream.GetKindOfOps(
const Rewrites: IList; const Kind: TGUID;
const Before: Integer): IList;
var
I: Integer;
Op: IRewriteOperation;
Obj: IInterface;
begin
Result := TList.Create;
I := 0;
while (I < Before) and (I < Rewrites.Count) do
begin
Op := Rewrites[I];
if Assigned(Op) and (Op.QueryInterface(Kind, Obj) = 0) then
Result.Add(Op);
Inc(I);
end;
end;
function TTokenRewriteStream.GetLastRewriteTokenIndex(
const ProgramName: String): Integer;
begin
if (not FLastRewriteTokenIndexes.TryGetValue(ProgramName, Result)) then
Result := -1;
end;
function TTokenRewriteStream.GetProgram(
const Name: String): IList;
var
InstructionStream: IList;
begin
InstructionStream := FPrograms[Name];
if (InstructionStream = nil) then
InstructionStream := InitializeProgram(Name);
Result := InstructionStream;
end;
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
const T: IToken; const Text: IANTLRInterface);
begin
InsertAfter(ProgramName, T.TokenIndex, Text);
end;
procedure TTokenRewriteStream.Init;
var
List: IList;
begin
FPrograms := TDictionary>.Create;
List := TList.Create;
List.Capacity := PROGRAM_INIT_SIZE;
FPrograms.Add(DEFAULT_PROGRAM_NAME, List);
FLastRewriteTokenIndexes := TDictionary.Create;
end;
function TTokenRewriteStream.InitializeProgram(
const Name: String): IList;
begin
Result := TList.Create;
Result.Capacity := PROGRAM_INIT_SIZE;
FPrograms[Name] := Result;
end;
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
const Index: Integer; const Text: IANTLRInterface);
begin
// to insert after, just insert before next index (even if past end)
InsertBefore(ProgramName, Index + 1, Text);
end;
procedure TTokenRewriteStream.InsertAfter(const T: IToken;
const Text: IANTLRInterface);
begin
InsertAfter(DEFAULT_PROGRAM_NAME, T, Text);
end;
procedure TTokenRewriteStream.InsertAfter(const Index: Integer;
const Text: IANTLRInterface);
begin
InsertAfter(DEFAULT_PROGRAM_NAME, Index, Text);
end;
procedure TTokenRewriteStream.InsertBefore(const Index: Integer;
const Text: IANTLRInterface);
begin
InsertBefore(DEFAULT_PROGRAM_NAME, Index, Text);
end;
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
const T: IToken; const Text: IANTLRInterface);
begin
InsertBefore(ProgramName, T.TokenIndex, Text);
end;
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
const Index: Integer; const Text: IANTLRInterface);
var
Op: IRewriteOperation;
begin
Op := TInsertBeforeOp.Create(Index, Text, Self);
GetProgram(ProgramName).Add(Op);
end;
procedure TTokenRewriteStream.InsertBefore(const T: IToken;
const Text: IANTLRInterface);
begin
InsertBefore(DEFAULT_PROGRAM_NAME, T, Text);
end;
procedure TTokenRewriteStream.Replace(const Start, Stop: IToken;
const Text: IANTLRInterface);
begin
Replace(DEFAULT_PROGRAM_NAME, Stop, Stop, Text);
end;
procedure TTokenRewriteStream.Replace(const IndexT: IToken;
const Text: IANTLRInterface);
begin
Replace(DEFAULT_PROGRAM_NAME, IndexT, IndexT, Text);
end;
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
Stop: Integer; const Text: IANTLRInterface);
var
Op: IRewriteOperation;
Rewrites: IList;
begin
if (Start > Stop) or (Start < 0) or (Stop < 0) or (Stop >= GetTokens.Count) then
raise EArgumentOutOfRangeException.Create('replace: range invalid: '
+ IntToStr(Start) + '..' + IntToStr(Stop) + '(size='
+ IntToStr(GetTokens.Count) + ')');
Op := TReplaceOp.Create(Start, Stop, Text, Self);
Rewrites := GetProgram(ProgramName);
Op.InstructionIndex := Rewrites.Count;
Rewrites.Add(Op);
end;
function TTokenRewriteStream.ReduceToSingleOperationPerIndex(
const Rewrites: IList): IDictionary;
var
I, J: Integer;
Op: IRewriteOperation;
ROp, PrevROp: IReplaceOp;
IOp, PrevIOp: IInsertBeforeOp;
Inserts, PrevInserts, PrevReplaces: IList;
Disjoint, Same: Boolean;
begin
// WALK REPLACES
for I := 0 to Rewrites.Count - 1 do
begin
Op := Rewrites[I];
if (Op = nil) then
Continue;
if (not Supports(Op, IReplaceOp, ROp)) then
Continue;
// Wipe prior inserts within range
Inserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I);
for J := 0 to Inserts.Count - 1 do
begin
IOp := Inserts[J] as IInsertBeforeOp;
if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then
begin
// delete insert as it's a no-op.
Rewrites[IOp.InstructionIndex] := nil;
end;
end;
// Drop any prior replaces contained within
PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I);
for J := 0 to PrevReplaces.Count - 1 do
begin
PrevROp := PrevReplaces[J] as IReplaceOp;
if (PrevROp.Index >= ROp.Index) and (PrevROp.LastIndex <= ROp.LastIndex) then
begin
// delete replace as it's a no-op.
Rewrites[PrevROp.InstructionIndex] := nil;
Continue;
end;
// throw exception unless disjoint or identical
Disjoint := (PrevROp.LastIndex < ROp.Index) or (PrevROp.Index > ROp.LastIndex);
Same := (PrevROp.Index = ROp.Index) and (PrevROp.LastIndex = ROp.LastIndex);
if (not Disjoint) and (not Same) then
raise EArgumentOutOfRangeException.Create('replace of boundaries of '
+ ROp.ToString + ' overlap with previous ' + PrevROp.ToString);
end;
end;
// WALK INSERTS
for I := 0 to Rewrites.Count - 1 do
begin
Op := Rewrites[I];
if (Op = nil) then
Continue;
if (not Supports(Op, IInsertBeforeOp, IOp)) then
Continue;
// combine current insert with prior if any at same index
PrevInserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I);
for J := 0 to PrevInserts.Count - 1 do
begin
PrevIOp := PrevInserts[J] as IInsertBeforeOp;
if (PrevIOp.Index = IOp.Index) then
begin
// combine objects
// convert to strings...we're in process of toString'ing
// whole token buffer so no lazy eval issue with any templates
IOp.Text := CatOpText(IOp.Text, PrevIOp.Text);
// delete redundant prior insert
Rewrites[PrevIOp.InstructionIndex] := nil;
end;
end;
// look for replaces where iop.index is in range; error
PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I);
for J := 0 to PrevReplaces.Count - 1 do
begin
Rop := PrevReplaces[J] as IReplaceOp;
if (IOp.Index = ROp.Index) then
begin
ROp.Text := CatOpText(IOp.Text, ROp.Text);
Rewrites[I] := nil; // delete current insert
Continue;
end;
if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then
raise EArgumentOutOfRangeException.Create('insert op '
+ IOp.ToString + ' within boundaries of previous ' + ROp.ToString);
end;
end;
Result := TDictionary.Create;
for Op in Rewrites do
begin
if (Op = nil) then
Continue; // ignore deleted ops
if (Result.ContainsKey(Op.Index)) then
raise Exception.Create('should only be one op per index');
Result.Add(Op.Index, Op);
end;
end;
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
Stop: IToken; const Text: IANTLRInterface);
begin
Replace(ProgramName, Start.TokenIndex, Stop.TokenIndex, Text);
end;
procedure TTokenRewriteStream.Replace(const Index: Integer;
const Text: IANTLRInterface);
begin
Replace(DEFAULT_PROGRAM_NAME, Index, Index, Text);
end;
procedure TTokenRewriteStream.Replace(const Start, Stop: Integer;
const Text: IANTLRInterface);
begin
Replace(DEFAULT_PROGRAM_NAME, Start, Stop, Text);
end;
procedure TTokenRewriteStream.Rollback(const InstructionIndex: Integer);
begin
Rollback(DEFAULT_PROGRAM_NAME, InstructionIndex);
end;
procedure TTokenRewriteStream.Rollback(const ProgramName: String;
const InstructionIndex: Integer);
var
InstructionStream: IList;
begin
InstructionStream := FPrograms[ProgramName];
if Assigned(InstructionStream) then
FPrograms[ProgramName] := InstructionStream.GetRange(MIN_TOKEN_INDEX,
InstructionIndex - MIN_TOKEN_INDEX);
end;
procedure TTokenRewriteStream.SetLastRewriteTokenIndex(
const ProgramName: String; const I: Integer);
begin
FLastRewriteTokenIndexes[ProgramName] := I;
end;
function TTokenRewriteStream.ToDebugString: String;
begin
Result := ToDebugString(MIN_TOKEN_INDEX, Size - 1);
end;
function TTokenRewriteStream.ToDebugString(const Start, Stop: Integer): String;
var
Buf: TStringBuilder;
I: Integer;
begin
Buf := TStringBuilder.Create;
try
if (Start >= MIN_TOKEN_INDEX) then
for I := Start to Min(Stop,GetTokens.Count - 1) do
Buf.Append(Get(I).ToString);
finally
Buf.Free;
end;
end;
function TTokenRewriteStream.ToOriginalString: String;
begin
Result := ToOriginalString(MIN_TOKEN_INDEX, Size - 1);
end;
function TTokenRewriteStream.ToOriginalString(const Start,
Stop: Integer): String;
var
Buf: TStringBuilder;
I: Integer;
begin
Buf := TStringBuilder.Create;
try
if (Start >= MIN_TOKEN_INDEX) then
for I := Start to Min(Stop, GetTokens.Count - 1) do
Buf.Append(Get(I).Text);
Result := Buf.ToString;
finally
Buf.Free;
end;
end;
function TTokenRewriteStream.ToString: String;
begin
Result := ToString(MIN_TOKEN_INDEX, Size - 1);
end;
function TTokenRewriteStream.ToString(const ProgramName: String): String;
begin
Result := ToString(ProgramName, MIN_TOKEN_INDEX, Size - 1);
end;
function TTokenRewriteStream.ToString(const ProgramName: String; const Start,
Stop: Integer): String;
var
Rewrites: IList;
I, StartIndex, StopIndex: Integer;
IndexToOp: IDictionary;
Buf: TStringBuilder;
Tokens: IList;
T: IToken;
Op: IRewriteOperation;
Pair: TPair;
begin
Rewrites := FPrograms[ProgramName];
Tokens := GetTokens;
// ensure start/end are in range
StopIndex := Min(Stop,Tokens.Count - 1);
StartIndex := Max(Start,0);
if (Rewrites = nil) or (Rewrites.Count = 0) then
begin
// no instructions to execute
Result := ToOriginalString(StartIndex, StopIndex);
Exit;
end;
Buf := TStringBuilder.Create;
try
// First, optimize instruction stream
IndexToOp := ReduceToSingleOperationPerIndex(Rewrites);
// Walk buffer, executing instructions and emitting tokens
I := StartIndex;
while (I <= StopIndex) and (I < Tokens.Count) do
begin
if (not IndexToOp.TryGetValue(I, Op)) then
Op := nil;
IndexToOp.Remove(I); // remove so any left have index size-1
T := Tokens[I];
if (Op = nil) then
begin
// no operation at that index, just dump token
Buf.Append(T.Text);
Inc(I); // move to next token
end
else
I := Op.Execute(Buf); // execute operation and skip
end;
// include stuff after end if it's last index in buffer
// So, if they did an insertAfter(lastValidIndex, "foo"), include
// foo if end==lastValidIndex.
if (StopIndex = Tokens.Count - 1) then
begin
// Scan any remaining operations after last token
// should be included (they will be inserts).
for Pair in IndexToOp do
begin
if (Pair.Value.Index >= Tokens.Count - 1) then
Buf.Append(Pair.Value.Text.ToString);
end;
end;
Result := Buf.ToString;
finally
Buf.Free;
end;
end;
function TTokenRewriteStream.ToString(const Start, Stop: Integer): String;
begin
Result := ToString(DEFAULT_PROGRAM_NAME, Start, Stop);
end;
procedure TTokenRewriteStream.InsertBefore(const Index: Integer;
const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertBefore(Index, S);
end;
procedure TTokenRewriteStream.InsertBefore(const T: IToken; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertBefore(T, S);
end;
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
const Index: Integer; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertBefore(ProgramName, Index, S);
end;
procedure TTokenRewriteStream.InsertBefore(const ProgramName: String;
const T: IToken; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertBefore(ProgramName, T, S);
end;
procedure TTokenRewriteStream.InsertAfter(const Index: Integer;
const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertAfter(Index,S);
end;
procedure TTokenRewriteStream.InsertAfter(const T: IToken; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertAfter(T,S);
end;
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
const Index: Integer; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertAfter(ProgramName,Index,S);
end;
procedure TTokenRewriteStream.InsertAfter(const ProgramName: String;
const T: IToken; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
InsertAfter(ProgramName,T,S);
end;
procedure TTokenRewriteStream.Replace(const IndexT: IToken; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
Replace(IndexT, S);
end;
procedure TTokenRewriteStream.Replace(const Start, Stop: Integer;
const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
Replace(Start, Stop, S);
end;
procedure TTokenRewriteStream.Replace(const Index: Integer; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
Replace(Index, S);
end;
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
Stop: IToken; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
Replace(ProgramName, Start, Stop, S);
end;
procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start,
Stop: Integer; const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
Replace(ProgramName, Start, Stop, S);
end;
procedure TTokenRewriteStream.Replace(const Start, Stop: IToken;
const Text: String);
var
S: IANTLRString;
begin
S := TANTLRString.Create(Text);
Replace(Start, Stop, S);
end;
{ TTokenRewriteStream.TRewriteOperation }
constructor TTokenRewriteStream.TRewriteOperation.Create(const AIndex: Integer;
const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
begin
inherited Create;
FIndex := AIndex;
FText := AText;
FParent := Pointer(AParent);
end;
function TTokenRewriteStream.TRewriteOperation.Execute(
const Buf: TStringBuilder): Integer;
begin
Result := FIndex;
end;
function TTokenRewriteStream.TRewriteOperation.GetIndex: Integer;
begin
Result := FIndex;
end;
function TTokenRewriteStream.TRewriteOperation.GetInstructionIndex: Integer;
begin
Result := FInstructionIndex;
end;
function TTokenRewriteStream.TRewriteOperation.GetParent: ITokenRewriteStream;
begin
Result := ITokenRewriteStream(FParent);
end;
function TTokenRewriteStream.TRewriteOperation.GetText: IANTLRInterface;
begin
Result := FText;
end;
procedure TTokenRewriteStream.TRewriteOperation.SetIndex(const Value: Integer);
begin
FIndex := Value;
end;
procedure TTokenRewriteStream.TRewriteOperation.SetInstructionIndex(
const Value: Integer);
begin
FInstructionIndex := Value;
end;
procedure TTokenRewriteStream.TRewriteOperation.SetParent(
const Value: ITokenRewriteStream);
begin
FParent := Pointer(Value);
end;
procedure TTokenRewriteStream.TRewriteOperation.SetText(
const Value: IANTLRInterface);
begin
FText := Value;
end;
function TTokenRewriteStream.TRewriteOperation.ToString: String;
var
OpName: String;
DollarIndex: Integer;
begin
OpName := ClassName;
DollarIndex := Pos('$',OpName) - 1; // Delphi strings are 1-based
if (DollarIndex >= 0) then
OpName := Copy(OpName,DollarIndex + 1,Length(OpName) - (DollarIndex + 1));
Result := '<' + OpName + '@' + IntToStr(FIndex) + ':"' + FText.ToString + '">';
end;
{ TTokenRewriteStream.TRewriteOpComparer }
function TTokenRewriteStream.TRewriteOpComparer.Compare(const Left,
Right: T): Integer;
begin
if (Left.GetIndex < Right.GetIndex) then
Result := -1
else
if (Left.GetIndex > Right.GetIndex) then
Result := 1
else
Result := 0;
end;
{ TTokenRewriteStream.TInsertBeforeOp }
function TTokenRewriteStream.TInsertBeforeOp.Execute(
const Buf: TStringBuilder): Integer;
begin
Buf.Append(Text.ToString);
Buf.Append(Parent.Get(Index).Text);
Result := Index + 1;
end;
{ TTokenRewriteStream.TReplaceOp }
constructor TTokenRewriteStream.TReplaceOp.Create(const AStart, AStop: Integer;
const AText: IANTLRInterface; const AParent: ITokenRewriteStream);
begin
inherited Create(AStart, AText, AParent);
FLastIndex := AStop;
end;
function TTokenRewriteStream.TReplaceOp.Execute(
const Buf: TStringBuilder): Integer;
begin
if (Text <> nil) then
Buf.Append(Text.ToString);
Result := FLastIndex + 1;
end;
function TTokenRewriteStream.TReplaceOp.GetLastIndex: Integer;
begin
Result := FLastIndex;
end;
procedure TTokenRewriteStream.TReplaceOp.SetLastIndex(const Value: Integer);
begin
FLastIndex := Value;
end;
function TTokenRewriteStream.TReplaceOp.ToString: String;
begin
Result := '';
end;
{ TTokenRewriteStream.TDeleteOp }
function TTokenRewriteStream.TDeleteOp.ToString: String;
begin
Result := '';
end;
{ Utilities }
var
EmptyToken: IToken = nil;
EmptyRuleReturnScope: IRuleReturnScope = nil;
function Def(const X: IToken): IToken; overload;
begin
if Assigned(X) then
Result := X
else
begin
if (EmptyToken = nil) then
EmptyToken := TCommonToken.Create;
Result := EmptyToken;
end;
end;
function Def(const X: IRuleReturnScope): IRuleReturnScope;
begin
if Assigned(X) then
Result := X
else
begin
if (EmptyRuleReturnScope = nil) then
EmptyRuleReturnScope := TRuleReturnScope.Create;
Result := EmptyRuleReturnScope;
end;
end;
initialization
TToken.Initialize;
end.