1 /** \file 2 * While the C runtime does not need to model the state of 3 * multiple lexers and parsers in the same way as the Java runtime does 4 * it is no overhead to reflect that model. In fact the 5 * C runtime has always been able to share recognizer state. 6 * 7 * This 'class' therefore defines all the elements of a recognizer 8 * (either lexer, parser or tree parser) that are need to 9 * track the current recognition state. Multiple recognizers 10 * may then share this state, for instance when one grammar 11 * imports another. 12 */ 13 14 #ifndef _ANTLR3_RECOGNIZER_SHARED_STATE_H 15 #define _ANTLR3_RECOGNIZER_SHARED_STATE_H 16 17 // [The "BSD licence"] 18 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC 19 // http://www.temporal-wave.com 20 // http://www.linkedin.com/in/jimidle 21 // 22 // All rights reserved. 23 // 24 // Redistribution and use in source and binary forms, with or without 25 // modification, are permitted provided that the following conditions 26 // are met: 27 // 1. Redistributions of source code must retain the above copyright 28 // notice, this list of conditions and the following disclaimer. 29 // 2. Redistributions in binary form must reproduce the above copyright 30 // notice, this list of conditions and the following disclaimer in the 31 // documentation and/or other materials provided with the distribution. 32 // 3. The name of the author may not be used to endorse or promote products 33 // derived from this software without specific prior written permission. 34 // 35 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 36 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 37 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 38 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 39 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 40 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 44 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 46 #include <antlr3defs.h> 47 48 #ifdef __cplusplus 49 extern "C" { 50 #endif 51 52 /** All the data elements required to track the current state 53 * of any recognizer (lexer, parser, tree parser). 54 * May be share between multiple recognizers such that 55 * grammar inheritance is easily supported. 56 */ 57 typedef struct ANTLR3_RECOGNIZER_SHARED_STATE_struct 58 { 59 /** If set to ANTLR3_TRUE then the recognizer has an exception 60 * condition (this is tested by the generated code for the rules of 61 * the grammar). 62 */ 63 ANTLR3_BOOLEAN error; 64 65 /** Points to the first in a possible chain of exceptions that the 66 * recognizer has discovered. 67 */ 68 pANTLR3_EXCEPTION exception; 69 70 /** Track around a hint from the creator of the recognizer as to how big this 71 * thing is going to get, as the actress said to the bishop. This allows us 72 * to tune hash tables accordingly. This might not be the best place for this 73 * in the end but we will see. 74 */ 75 ANTLR3_UINT32 sizeHint; 76 77 /** Track the set of token types that can follow any rule invocation. 78 * Stack structure, to support: List<BitSet>. 79 */ 80 pANTLR3_STACK following; 81 82 83 /** This is true when we see an error and before having successfully 84 * matched a token. Prevents generation of more than one error message 85 * per error. 86 */ 87 ANTLR3_BOOLEAN errorRecovery; 88 89 /** The index into the input stream where the last error occurred. 90 * This is used to prevent infinite loops where an error is found 91 * but no token is consumed during recovery...another error is found, 92 * ad nauseam. This is a failsafe mechanism to guarantee that at least 93 * one token/tree node is consumed for two errors. 94 */ 95 ANTLR3_MARKER lastErrorIndex; 96 97 /** In lieu of a return value, this indicates that a rule or token 98 * has failed to match. Reset to false upon valid token match. 99 */ 100 ANTLR3_BOOLEAN failed; 101 102 /** When the recognizer terminates, the error handling functions 103 * will have incremented this value if any error occurred (that was displayed). It can then be 104 * used by the grammar programmer without having to use static globals. 105 */ 106 ANTLR3_UINT32 errorCount; 107 108 /** If 0, no backtracking is going on. Safe to exec actions etc... 109 * If >0 then it's the level of backtracking. 110 */ 111 ANTLR3_INT32 backtracking; 112 113 /** ANTLR3_VECTOR of ANTLR3_LIST for rule memoizing. 114 * Tracks the stop token index for each rule. ruleMemo[ruleIndex] is 115 * the memoization table for ruleIndex. For key ruleStartIndex, you 116 * get back the stop token for associated rule or MEMO_RULE_FAILED. 117 * 118 * This is only used if rule memoization is on. 119 */ 120 pANTLR3_INT_TRIE ruleMemo; 121 122 /** Pointer to an array of token names 123 * that are generally useful in error reporting. The generated parsers install 124 * this pointer. The table it points to is statically allocated as 8 bit ascii 125 * at parser compile time - grammar token names are thus restricted in character 126 * sets, which does not seem to terrible. 127 */ 128 pANTLR3_UINT8 * tokenNames; 129 130 /** User programmable pointer that can be used for instance as a place to 131 * store some tracking structure specific to the grammar that would not normally 132 * be available to the error handling functions. 133 */ 134 void * userp; 135 136 /** The goal of all lexer rules/methods is to create a token object. 137 * This is an instance variable as multiple rules may collaborate to 138 * create a single token. For example, NUM : INT | FLOAT ; 139 * In this case, you want the INT or FLOAT rule to set token and not 140 * have it reset to a NUM token in rule NUM. 141 */ 142 pANTLR3_COMMON_TOKEN token; 143 144 /** The goal of all lexer rules being to create a token, then a lexer 145 * needs to build a token factory to create them. 146 */ 147 pANTLR3_TOKEN_FACTORY tokFactory; 148 149 /** A lexer is a source of tokens, produced by all the generated (or 150 * hand crafted if you like) matching rules. As such it needs to provide 151 * a token source interface implementation. 152 */ 153 pANTLR3_TOKEN_SOURCE tokSource; 154 155 /** The channel number for the current token 156 */ 157 ANTLR3_UINT32 channel; 158 159 /** The token type for the current token 160 */ 161 ANTLR3_UINT32 type; 162 163 /** The input line (where it makes sense) on which the first character of the current 164 * token resides. 165 */ 166 ANTLR3_INT32 tokenStartLine; 167 168 /** The character position of the first character of the current token 169 * within the line specified by tokenStartLine 170 */ 171 ANTLR3_INT32 tokenStartCharPositionInLine; 172 173 /** What character index in the stream did the current token start at? 174 * Needed, for example, to get the text for current token. Set at 175 * the start of nextToken. 176 */ 177 ANTLR3_MARKER tokenStartCharIndex; 178 179 /** Text for the current token. This can be overridden by setting this 180 * variable directly or by using the SETTEXT() macro (preferred) in your 181 * lexer rules. 182 */ 183 pANTLR3_STRING text; 184 185 /** User controlled variables that will be installed in a newly created 186 * token. 187 */ 188 ANTLR3_UINT32 user1, user2, user3; 189 void * custom; 190 191 /** Input stream stack, which allows the C programmer to switch input streams 192 * easily and allow the standard nextToken() implementation to deal with it 193 * as this is a common requirement. 194 */ 195 pANTLR3_STACK streams; 196 197 /// A stack of token/tree rewrite streams that are available for use 198 /// by a parser or tree parser that is using rewrites to generate 199 /// an AST. This saves each rule in the recongizer from having to 200 /// allocate and deallocate rewtire streams on entry and exit. As 201 /// the parser recurses throgh the rules it will reach a steady state 202 /// of the maximum number of allocated streams, which instead of 203 /// deallocating them at rule exit, it will place on this stack for 204 /// reuse. The streams are then all finally freed when this stack 205 /// is freed. 206 /// 207 pANTLR3_VECTOR rStreams; 208 209 } 210 ANTLR3_RECOGNIZER_SHARED_STATE; 211 212 #ifdef __cplusplus 213 } 214 #endif 215 216 #endif 217 218 219