1 /** \file 2 * Defines the interface for an ANTLR3 common token stream. Custom token streams should create 3 * one of these and then override any functions by installing their own pointers 4 * to implement the various functions. 5 */ 6 #ifndef _ANTLR3_TOKENSTREAM_HPP 7 #define _ANTLR3_TOKENSTREAM_HPP 8 9 // [The "BSD licence"] 10 // Copyright (c) 2005-2009 Gokulakannan Somasundaram, ElectronDB 11 12 // 13 // All rights reserved. 14 // 15 // Redistribution and use in source and binary forms, with or without 16 // modification, are permitted provided that the following conditions 17 // are met: 18 // 1. Redistributions of source code must retain the above copyright 19 // notice, this list of conditions and the following disclaimer. 20 // 2. Redistributions in binary form must reproduce the above copyright 21 // notice, this list of conditions and the following disclaimer in the 22 // documentation and/or other materials provided with the distribution. 23 // 3. The name of the author may not be used to endorse or promote products 24 // derived from this software without specific prior written permission. 25 // 26 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 27 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 28 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 30 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 31 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 35 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37 #include "antlr3defs.hpp" 38 39 /** Definition of a token source, which has a pointer to a function that 40 * returns the next token (using a token factory if it is going to be 41 * efficient) and a pointer to an ANTLR3_INPUT_STREAM. This is slightly 42 * different to the Java interface because we have no way to implement 43 * multiple interfaces without defining them in the interface structure 44 * or casting (void *), which is too convoluted. 45 */ 46 ANTLR_BEGIN_NAMESPACE() 47 48 //We are not making it subclass AllocPolicy, as this will always be a base class 49 template<class ImplTraits> 50 class TokenSource 51 { 52 public: 53 typedef typename ImplTraits::CommonTokenType TokenType; 54 typedef TokenType CommonTokenType; 55 typedef typename ImplTraits::StringType StringType; 56 typedef typename ImplTraits::LexerType LexerType; 57 58 private: 59 /** A special pre-allocated token, which signifies End Of Tokens. Because this must 60 * be set up with the current input index and so on, we embed the structure and 61 * return the address of it. It is marked as factoryMade, so that it is never 62 * attempted to be freed. 63 */ 64 TokenType m_eofToken; 65 66 /// A special pre-allocated token, which is returned by mTokens() if the 67 /// lexer rule said to just skip the generated token altogether. 68 /// Having this single token stops us wasting memory by have the token factory 69 /// actually create something that we are going to SKIP(); anyway. 70 /// 71 TokenType m_skipToken; 72 73 /** When the token source is constructed, it is populated with the file 74 * name from whence the tokens were produced by the lexer. This pointer is a 75 * copy of the one supplied by the CharStream (and may be NULL) so should 76 * not be manipulated other than to copy or print it. 77 */ 78 StringType m_fileName; 79 80 public: 81 TokenType& get_eofToken(); 82 const TokenType& get_eofToken() const; 83 TokenType& get_skipToken(); 84 StringType& get_fileName(); 85 LexerType* get_super(); 86 87 void set_fileName( const StringType& fileName ); 88 89 /** 90 * \brief 91 * Default implementation of the nextToken() call for a lexer. 92 * 93 * \param toksource 94 * Points to the implementation of a token source. The lexer is 95 * addressed by the super structure pointer. 96 * 97 * \returns 98 * The next token in the current input stream or the EOF token 99 * if there are no more tokens in any input stream in the stack. 100 * 101 * Write detailed description for nextToken here. 102 * 103 * \remarks 104 * Write remarks for nextToken here. 105 * 106 * \see nextTokenStr 107 */ 108 TokenType* nextToken(); 109 CommonTokenType* nextToken( BoolForwarder<true> /*isFiltered*/ ); 110 CommonTokenType* nextToken( BoolForwarder<false> /*isFiltered*/ ); 111 112 /// 113 /// \brief 114 /// Returns the next available token from the current input stream. 115 /// 116 /// \param toksource 117 /// Points to the implementation of a token source. The lexer is 118 /// addressed by the super structure pointer. 119 /// 120 /// \returns 121 /// The next token in the current input stream or the EOF token 122 /// if there are no more tokens. 123 /// 124 /// \remarks 125 /// Write remarks for nextToken here. 126 /// 127 /// \see nextToken 128 /// 129 TokenType* nextTokenStr(); 130 131 protected: 132 TokenSource(); 133 }; 134 135 /** Definition of the ANTLR3 common token stream interface. 136 * \remark 137 * Much of the documentation for this interface is stolen from Ter's Java implementation. 138 */ 139 template<class ImplTraits> 140 class TokenStream : public ImplTraits::TokenIntStreamType 141 { 142 public: 143 typedef typename ImplTraits::TokenSourceType TokenSourceType; 144 typedef typename ImplTraits::TokenIntStreamType IntStreamType; 145 typedef typename ImplTraits::CommonTokenType TokenType; 146 typedef TokenType UnitType; 147 typedef typename ImplTraits::StringType StringType; 148 typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; 149 typedef typename ImplTraits::TokenStreamType TokenStreamType; 150 typedef typename ImplTraits::ParserType ComponentType; 151 152 protected: 153 /** Pointer to the token source for this stream 154 */ 155 TokenSourceType* m_tokenSource; 156 157 /// Debugger interface, is this is a debugging token stream 158 /// 159 DebugEventListenerType* m_debugger; 160 161 /// Indicates the initial stream state for dbgConsume() 162 /// 163 bool m_initialStreamState; 164 165 public: 166 TokenStream(TokenSourceType* source, DebugEventListenerType* debugger); 167 IntStreamType* get_istream(); 168 TokenSourceType* get_tokenSource() const; 169 void set_tokenSource( TokenSourceType* tokenSource ); 170 171 /** Get Token at current input pointer + i ahead where i=1 is next Token. 172 * i<0 indicates tokens in the past. So -1 is previous token and -2 is 173 * two tokens ago. LT(0) is undefined. For i>=n, return Token.EOFToken. 174 * Return null for LT(0) and any index that results in an absolute address 175 * that is negative. 176 */ 177 const TokenType* _LT(ANTLR_INT32 k); 178 179 /** Where is this stream pulling tokens from? This is not the name, but 180 * a pointer into an interface that contains a ANTLR3_TOKEN_SOURCE interface. 181 * The Token Source interface contains a pointer to the input stream and a pointer 182 * to a function that returns the next token. 183 */ 184 TokenSourceType* getTokenSource(); 185 186 /** Function that installs a token source for teh stream 187 */ 188 void setTokenSource(TokenSourceType* tokenSource); 189 190 /** Return the text of all the tokens in the stream, as the old tramp in 191 * Leeds market used to say; "Get the lot!" 192 */ 193 StringType toString(); 194 195 /** Return the text of all tokens from start to stop, inclusive. 196 * If the stream does not buffer all the tokens then it can just 197 * return an empty ANTLR3_STRING or NULL; Grammars should not access $ruleLabel.text in 198 * an action in that case. 199 */ 200 StringType toStringSS(ANTLR_MARKER start, ANTLR_MARKER stop); 201 202 /** Because the user is not required to use a token with an index stored 203 * in it, we must provide a means for two token objects themselves to 204 * indicate the start/end location. Most often this will just delegate 205 * to the other toString(int,int). This is also parallel with 206 * the pTREENODE_STREAM->toString(Object,Object). 207 */ 208 StringType toStringTT(const TokenType* start, const TokenType* stop); 209 210 211 /** Function that sets the token stream into debugging mode 212 */ 213 void setDebugListener(DebugEventListenerType* debugger); 214 215 TokenStream(); 216 217 }; 218 219 /** Common token stream is an implementation of ANTLR_TOKEN_STREAM for the default 220 * parsers and recognizers. You may of course build your own implementation if 221 * you are so inclined. 222 */ 223 template<bool TOKENS_ACCESSED_FROM_OWNING_RULE, class ListType, class MapType> 224 class TokenStoreSelector 225 { 226 public: 227 typedef ListType TokensType; 228 }; 229 230 template<class ListType, class MapType> 231 class TokenStoreSelector<true, ListType, MapType> 232 { 233 public: 234 typedef MapType TokensType; 235 }; 236 237 template<class ImplTraits> 238 class CommonTokenStream : public TokenStream<ImplTraits> 239 { 240 public: 241 typedef typename ImplTraits::AllocPolicyType AllocPolicyType; 242 typedef typename ImplTraits::BitsetType BitsetType; 243 typedef typename ImplTraits::CommonTokenType TokenType; 244 typedef typename ImplTraits::TokenSourceType TokenSourceType; 245 typedef typename ImplTraits::DebugEventListenerType DebugEventListenerType; 246 typedef typename AllocPolicyType::template ListType<TokenType> TokensListType; 247 typedef typename AllocPolicyType::template OrderedMapType<ANTLR_MARKER, TokenType> TokensMapType; 248 typedef typename TokenStoreSelector< ImplTraits::TOKENS_ACCESSED_FROM_OWNING_RULE, 249 TokensListType, TokensMapType >::TokensType TokensType; 250 251 typedef typename AllocPolicyType::template UnOrderedMapType<ANTLR_UINT32, ANTLR_UINT32> ChannelOverridesType; 252 typedef typename AllocPolicyType::template OrderedSetType<ANTLR_UINT32> DiscardSetType; 253 typedef typename AllocPolicyType::template ListType<ANTLR_UINT32> IntListType; 254 typedef TokenStream<ImplTraits> BaseType; 255 256 private: 257 /** Records every single token pulled from the source indexed by the token index. 258 * There might be more efficient ways to do this, such as referencing directly in to 259 * the token factory pools, but for now this is convenient and the ANTLR3_LIST is not 260 * a huge overhead as it only stores pointers anyway, but allows for iterations and 261 * so on. 262 */ 263 TokensType m_tokens; 264 265 /** Override map of tokens. If a token type has an entry in here, then 266 * the pointer in the table points to an int, being the override channel number 267 * that should always be used for this token type. 268 */ 269 ChannelOverridesType m_channelOverrides; 270 271 /** Discared set. If a token has an entry in this table, then it is thrown 272 * away (data pointer is always NULL). 273 */ 274 DiscardSetType m_discardSet; 275 276 /* The channel number that this token stream is tuned to. For instance, whitespace 277 * is usually tuned to channel 99, which no token stream would normally tune to and 278 * so it is thrown away. 279 */ 280 ANTLR_UINT32 m_channel; 281 282 /** The index into the tokens list of the current token (the next one that will be 283 * consumed. p = -1 indicates that the token list is empty. 284 */ 285 ANTLR_INT32 m_p; 286 287 /* The total number of tokens issued till now. For streams that delete tokens, 288 this helps in issuing the index 289 */ 290 ANTLR_UINT32 m_nissued; 291 292 /** If this flag is set to true, then tokens that the stream sees that are not 293 * in the channel that this stream is tuned to, are not tracked in the 294 * tokens table. When set to false, ALL tokens are added to the tracking. 295 */ 296 bool m_discardOffChannel; 297 298 public: 299 CommonTokenStream(ANTLR_UINT32 hint, TokenSourceType* source = NULL, 300 DebugEventListenerType* debugger = NULL); 301 ~CommonTokenStream(); 302 TokensType& get_tokens(); 303 const TokensType& get_tokens() const; 304 DiscardSetType& get_discardSet(); 305 const DiscardSetType& get_discardSet() const; 306 ANTLR_INT32 get_p() const; 307 void set_p( ANTLR_INT32 p ); 308 void inc_p(); 309 void dec_p(); 310 311 /** A simple filter mechanism whereby you can tell this token stream 312 * to force all tokens of type ttype to be on channel. For example, 313 * when interpreting, we cannot exec actions so we need to tell 314 * the stream to force all WS and NEWLINE to be a different, ignored 315 * channel. 316 */ 317 void setTokenTypeChannel(ANTLR_UINT32 ttype, ANTLR_UINT32 channel); 318 319 /** Add a particular token type to the discard set. If a token is found to belong 320 * to this set, then it is skipped/thrown away 321 */ 322 void discardTokenType(ANTLR_INT32 ttype); 323 324 //This will discard tokens of a particular rule after the rule execution completion 325 void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop ); 326 void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, 327 BoolForwarder<true> tokens_accessed_from_owning_rule ); 328 void discardTokens( ANTLR_MARKER start, ANTLR_MARKER stop, 329 BoolForwarder<false> tokens_accessed_from_owning_rule ); 330 331 void insertToken( const TokenType& tok ); 332 void insertToken( const TokenType& tok, BoolForwarder<true> tokens_accessed_from_owning_rule ); 333 void insertToken( const TokenType& tok, BoolForwarder<false> tokens_accessed_from_owning_rule ); 334 335 /** Get a token at an absolute index i; 0..n-1. This is really only 336 * needed for profiling and debugging and token stream rewriting. 337 * If you don't want to buffer up tokens, then this method makes no 338 * sense for you. Naturally you can't use the rewrite stream feature. 339 * I believe DebugTokenStream can easily be altered to not use 340 * this method, removing the dependency. 341 */ 342 const TokenType* get(ANTLR_MARKER i); 343 const TokenType* getToken(ANTLR_MARKER i); 344 const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<true> tokens_accessed_from_owning_rule ); 345 const TokenType* getToken( ANTLR_MARKER tok_idx, BoolForwarder<false> tokens_accessed_from_owning_rule ); 346 347 /** Signal to discard off channel tokens from here on in. 348 */ 349 void discardOffChannelToks(bool discard); 350 351 /** Function that returns a pointer to the ANTLR3_LIST of all tokens 352 * in the stream (this causes the buffer to fill if we have not get any yet) 353 */ 354 TokensType* getTokens(); 355 356 /** Function that returns all the tokens between a start and a stop index. 357 */ 358 void getTokenRange(ANTLR_UINT32 start, ANTLR_UINT32 stop, TokensListType& tokenRange); 359 360 /** Function that returns all the tokens indicated by the specified bitset, within a range of tokens 361 */ 362 void getTokensSet(ANTLR_UINT32 start, ANTLR_UINT32 stop, BitsetType* types, TokensListType& tokenSet); 363 364 /** Function that returns all the tokens indicated by being a member of the supplied List 365 */ 366 void getTokensList(ANTLR_UINT32 start, ANTLR_UINT32 stop, 367 const IntListType& list, TokensListType& tokenList); 368 369 /** Function that returns all tokens of a certain type within a range. 370 */ 371 void getTokensType(ANTLR_UINT32 start, ANTLR_UINT32 stop, ANTLR_UINT32 type, TokensListType& tokens); 372 373 /** Function that resets the token stream so that it can be reused, but 374 * but that does not free up any resources, such as the token factory 375 * the factory pool and so on. This prevents the need to keep freeing 376 * and reallocating the token pools if the thing you are building is 377 * a multi-shot dameon or somethign like that. It is much faster to 378 * just reuse all the vectors. 379 */ 380 void reset(); 381 382 const TokenType* LB(ANTLR_INT32 k); 383 384 385 void fillBufferExt(); 386 void fillBuffer(); 387 388 bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<true> tokens_accessed_from_owning_rule ); 389 bool hasReachedFillbufferTarget( ANTLR_UINT32 cnt, BoolForwarder<false> tokens_accessed_from_owning_rule ); 390 391 ANTLR_UINT32 skipOffTokenChannels(ANTLR_INT32 i); 392 ANTLR_UINT32 skipOffTokenChannelsReverse(ANTLR_INT32 x); 393 ANTLR_MARKER index_impl(); 394 }; 395 396 class TokenAccessException : public std::exception 397 { what() const398 virtual const char* what() const throw() 399 { 400 return " Attempted access on Deleted Token"; 401 } 402 }; 403 404 ANTLR_END_NAMESPACE() 405 406 #include "antlr3tokenstream.inl" 407 408 #endif 409