1ANTLR_BEGIN_NAMESPACE() 2 3template<class ImplTraits> 4InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* fileName, ANTLR_UINT32 encoding) 5{ 6 // First order of business is to read the file into some buffer space 7 // as just straight 8 bit bytes. Then we will work out the encoding and 8 // byte order and adjust the API functions that are installed for the 9 // default 8Bit stream accordingly. 10 // 11 this->createFileStream(fileName); 12 13 // We have the data in memory now so we can deal with it according to 14 // the encoding scheme we were given by the user. 15 // 16 m_encoding = encoding; 17 18 // Now we need to work out the endian type and install any 19 // API functions that differ from 8Bit 20 // 21 this->setupInputStream(); 22 23 // Now we can set up the file name 24 // 25 BaseType::m_streamName = (const char* )fileName; 26 m_fileName = BaseType::m_streamName; 27} 28 29template<class ImplTraits> 30InputStream<ImplTraits>::InputStream(const ANTLR_UINT8* data, ANTLR_UINT32 encoding, ANTLR_UINT32 size, ANTLR_UINT8* name) 31{ 32 // First order of business is to set up the stream and install the data pointer. 33 // Then we will work out the encoding and byte order and adjust the API functions that are installed for the 34 // default 8Bit stream accordingly. 35 // 36 this->createStringStream(data); 37 38 // Size (in bytes) of the given 'string' 39 // 40 m_sizeBuf = size; 41 42 // We have the data in memory now so we can deal with it according to 43 // the encoding scheme we were given by the user. 44 // 45 m_encoding = encoding; 46 47 // Now we need to work out the endian type and install any 48 // API functions that differ from 8Bit 49 // 50 this->setupInputStream(); 51 52 // Now we can set up the file name 53 // 54 BaseType::m_streamName = (name == NULL ) ? "" : (const char*)name; 55 m_fileName = BaseType::m_streamName; 56 57} 58 59template<class ImplTraits> 60void InputStream<ImplTraits>::createStringStream(const ANTLR_UINT8* data) 61{ 62 if (data == NULL) 63 { 64 ParseNullStringException ex; 65 throw ex; 66 } 67 68 // Structure was allocated correctly, now we can install the pointer 69 // 70 m_data = data; 71 m_isAllocated = false; 72 73 // Call the common 8 bit input stream handler 74 // initialization. 75 // 76 this->genericSetupStream(); 77} 78 79template<class ImplTraits> 80void InputStream<ImplTraits>::createFileStream(const ANTLR_UINT8* fileName) 81{ 82 if (fileName == NULL) 83 { 84 ParseFileAbsentException ex; 85 throw ex; 86 } 87 88 // Structure was allocated correctly, now we can read the file. 89 // 90 FileUtils<ImplTraits>::AntlrRead8Bit(this, fileName); 91 92 // Call the common 8 bit input stream handler 93 // initialization. 94 // 95 this->genericSetupStream(); 96} 97 98template<class ImplTraits> 99void InputStream<ImplTraits>::genericSetupStream() 100{ 101 this->set_charByteSize(1); 102 103 /* Set up the input stream brand new 104 */ 105 this->reset(); 106 107 /* Install default line separator character (it can be replaced 108 * by the grammar programmer later) 109 */ 110 this->set_newLineChar((ANTLR_UCHAR)'\n'); 111} 112 113template<class ImplTraits> 114InputStream<ImplTraits>::~InputStream() 115{ 116 // Free the input stream buffer if we allocated it 117 // 118 if (m_isAllocated && (m_data != NULL)) 119 AllocPolicyType::free((void*)m_data); //const_cast is required 120} 121 122template<class ImplTraits> 123ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_data() const 124{ 125 return m_data; 126} 127template<class ImplTraits> 128ANTLR_INLINE bool InputStream<ImplTraits>::get_isAllocated() const 129{ 130 return m_isAllocated; 131} 132template<class ImplTraits> 133ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_nextChar() const 134{ 135 return m_nextChar; 136} 137template<class ImplTraits> 138ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_sizeBuf() const 139{ 140 return m_sizeBuf; 141} 142template<class ImplTraits> 143ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_line() const 144{ 145 return m_line; 146} 147template<class ImplTraits> 148ANTLR_INLINE const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::get_currentLine() const 149{ 150 return m_currentLine; 151} 152template<class ImplTraits> 153ANTLR_INLINE ANTLR_INT32 InputStream<ImplTraits>::get_charPositionInLine() const 154{ 155 return m_charPositionInLine; 156} 157template<class ImplTraits> 158ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_markDepth() const 159{ 160 return m_markDepth; 161} 162template<class ImplTraits> 163ANTLR_INLINE typename InputStream<ImplTraits>::MarkersType& InputStream<ImplTraits>::get_markers() 164{ 165 return m_markers; 166} 167template<class ImplTraits> 168ANTLR_INLINE const typename InputStream<ImplTraits>::StringType& InputStream<ImplTraits>::get_fileName() const 169{ 170 return m_fileName; 171} 172template<class ImplTraits> 173ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_fileNo() const 174{ 175 return m_fileNo; 176} 177template<class ImplTraits> 178ANTLR_INLINE ANTLR_UCHAR InputStream<ImplTraits>::get_newlineChar() const 179{ 180 return m_newlineChar; 181} 182template<class ImplTraits> 183ANTLR_INLINE ANTLR_UINT8 InputStream<ImplTraits>::get_charByteSize() const 184{ 185 return m_charByteSize; 186} 187template<class ImplTraits> 188ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_encoding() const 189{ 190 return m_encoding; 191} 192template<class ImplTraits> 193ANTLR_INLINE void InputStream<ImplTraits>::set_data( DataType* data ) 194{ 195 m_data = data; 196} 197template<class ImplTraits> 198ANTLR_INLINE void InputStream<ImplTraits>::set_isAllocated( bool isAllocated ) 199{ 200 m_isAllocated = isAllocated; 201} 202template<class ImplTraits> 203ANTLR_INLINE void InputStream<ImplTraits>::set_nextChar( const DataType* nextChar ) 204{ 205 m_nextChar = nextChar; 206} 207template<class ImplTraits> 208ANTLR_INLINE void InputStream<ImplTraits>::set_sizeBuf( ANTLR_UINT32 sizeBuf ) 209{ 210 m_sizeBuf = sizeBuf; 211} 212template<class ImplTraits> 213ANTLR_INLINE void InputStream<ImplTraits>::set_line( ANTLR_UINT32 line ) 214{ 215 m_line = line; 216} 217template<class ImplTraits> 218ANTLR_INLINE void InputStream<ImplTraits>::set_currentLine( const DataType* currentLine ) 219{ 220 m_currentLine = currentLine; 221} 222template<class ImplTraits> 223ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) 224{ 225 m_charPositionInLine = charPositionInLine; 226} 227template<class ImplTraits> 228ANTLR_INLINE void InputStream<ImplTraits>::set_markDepth( ANTLR_UINT32 markDepth ) 229{ 230 m_markDepth = markDepth; 231} 232template<class ImplTraits> 233ANTLR_INLINE void InputStream<ImplTraits>::set_markers( const MarkersType& markers ) 234{ 235 m_markers = markers; 236} 237template<class ImplTraits> 238ANTLR_INLINE void InputStream<ImplTraits>::set_fileName( const StringType& fileName ) 239{ 240 m_fileName = fileName; 241} 242template<class ImplTraits> 243ANTLR_INLINE void InputStream<ImplTraits>::set_fileNo( ANTLR_UINT32 fileNo ) 244{ 245 m_fileNo = fileNo; 246} 247template<class ImplTraits> 248ANTLR_INLINE void InputStream<ImplTraits>::set_newlineChar( ANTLR_UCHAR newlineChar ) 249{ 250 m_newlineChar = newlineChar; 251} 252template<class ImplTraits> 253ANTLR_INLINE void InputStream<ImplTraits>::set_charByteSize( ANTLR_UINT8 charByteSize ) 254{ 255 m_charByteSize = charByteSize; 256} 257template<class ImplTraits> 258ANTLR_INLINE void InputStream<ImplTraits>::set_encoding( ANTLR_UINT32 encoding ) 259{ 260 m_encoding = encoding; 261} 262 263template<class ImplTraits> 264ANTLR_INLINE void InputStream<ImplTraits>::inc_charPositionInLine() 265{ 266 ++m_charPositionInLine; 267} 268 269template<class ImplTraits> 270ANTLR_INLINE void InputStream<ImplTraits>::inc_line() 271{ 272 ++m_line; 273} 274 275template<class ImplTraits> 276ANTLR_INLINE void InputStream<ImplTraits>::inc_markDepth() 277{ 278 ++m_markDepth; 279} 280 281template<class ImplTraits> 282ANTLR_INLINE void InputStream<ImplTraits>::reset() 283{ 284 m_nextChar = m_data; /* Input at first character */ 285 m_line = 1; /* starts at line 1 */ 286 m_charPositionInLine = 0; 287 m_currentLine = m_data; 288 m_markDepth = 0; /* Reset markers */ 289 290 /* Clear out up the markers table if it is there 291 */ 292 m_markers.clear(); 293} 294 295template<class ImplTraits> 296void InputStream<ImplTraits>::reuse(ANTLR_UINT8* inString, ANTLR_UINT32 size, ANTLR_UINT8* name) 297{ 298 m_isAllocated = false; 299 m_data = inString; 300 m_sizeBuf = size; 301 302 // Now we can set up the file name. As we are reusing the stream, there may already 303 // be a string that we can reuse for holding the filename. 304 // 305 if ( BaseType::m_streamName.empty() ) 306 { 307 BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); 308 m_fileName = BaseType::m_streamName; 309 } 310 else 311 { 312 BaseType::m_streamName = ((name == NULL) ? "-memory-" : (const char *)name); 313 } 314 315 this->reset(); 316} 317 318/* 319template<class ImplTraits> 320typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::_LT(ANTLR_INT32 lt) 321{ 322 return this->_LA(lt); 323} 324*/ 325 326template<class ImplTraits> 327ANTLR_UINT32 InputStream<ImplTraits>::size() 328{ 329 return m_sizeBuf; 330} 331 332template<class ImplTraits> 333ANTLR_MARKER InputStream<ImplTraits>::index_impl() 334{ 335 return (ANTLR_MARKER)m_nextChar; 336} 337 338 339template<class ImplTraits> 340typename InputStream<ImplTraits>::StringType InputStream<ImplTraits>::substr(ANTLR_MARKER start, ANTLR_MARKER stop) 341{ 342 std::size_t len = static_cast<std::size_t>( (stop-start)/sizeof(DataType) + 1 ); 343 StringType str( (const char*)start, len ); 344 return str; 345} 346 347template<class ImplTraits> 348ANTLR_UINT32 InputStream<ImplTraits>::get_line() 349{ 350 return m_line; 351} 352 353template<class ImplTraits> 354const typename InputStream<ImplTraits>::DataType* InputStream<ImplTraits>::getLineBuf() 355{ 356 return m_currentLine; 357} 358 359template<class ImplTraits> 360ANTLR_INLINE ANTLR_UINT32 InputStream<ImplTraits>::get_charPositionInLine() 361{ 362 return m_charPositionInLine; 363} 364 365template<class ImplTraits> 366ANTLR_INLINE void InputStream<ImplTraits>::set_charPositionInLine(ANTLR_UINT32 position) 367{ 368 m_charPositionInLine = position; 369} 370 371template<class ImplTraits> 372void InputStream<ImplTraits>::set_newLineChar(ANTLR_UINT32 newlineChar) 373{ 374 m_newlineChar = newlineChar; 375} 376 377template<class ImplTraits> 378ANTLR_INLINE LexState<ImplTraits>::LexState() 379{ 380 m_nextChar = NULL; 381 m_line = 0; 382 m_currentLine = NULL; 383 m_charPositionInLine = 0; 384} 385 386template<class ImplTraits> 387ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_nextChar() const 388{ 389 return m_nextChar; 390} 391 392template<class ImplTraits> 393ANTLR_INLINE ANTLR_UINT32 LexState<ImplTraits>::get_line() const 394{ 395 return m_line; 396} 397 398template<class ImplTraits> 399ANTLR_INLINE const typename LexState<ImplTraits>::DataType* LexState<ImplTraits>::get_currentLine() const 400{ 401 return m_currentLine; 402} 403 404template<class ImplTraits> 405ANTLR_INLINE ANTLR_INT32 LexState<ImplTraits>::get_charPositionInLine() const 406{ 407 return m_charPositionInLine; 408} 409 410template<class ImplTraits> 411ANTLR_INLINE void LexState<ImplTraits>::set_nextChar( const DataType* nextChar ) 412{ 413 m_nextChar = nextChar; 414} 415 416template<class ImplTraits> 417ANTLR_INLINE void LexState<ImplTraits>::set_line( ANTLR_UINT32 line ) 418{ 419 m_line = line; 420} 421 422template<class ImplTraits> 423ANTLR_INLINE void LexState<ImplTraits>::set_currentLine( const DataType* currentLine ) 424{ 425 m_currentLine = currentLine; 426} 427 428template<class ImplTraits> 429ANTLR_INLINE void LexState<ImplTraits>::set_charPositionInLine( ANTLR_INT32 charPositionInLine ) 430{ 431 m_charPositionInLine = charPositionInLine; 432} 433 434template<class ImplTraits> 435ANTLR_INLINE typename InputStream<ImplTraits>::IntStreamType* InputStream<ImplTraits>::get_istream() 436{ 437 return this; 438} 439 440template<class ImplTraits> 441void InputStream<ImplTraits>::setupInputStream() 442{ 443 bool isBigEndian; 444 445 // Used to determine the endianness of the machine we are currently 446 // running on. 447 // 448 ANTLR_UINT16 bomTest = 0xFEFF; 449 450 // What endianess is the machine we are running on? If the incoming 451 // encoding endianess is the same as this machine's natural byte order 452 // then we can use more efficient API calls. 453 // 454 if (*((ANTLR_UINT8*)(&bomTest)) == 0xFE) 455 { 456 isBigEndian = true; 457 } 458 else 459 { 460 isBigEndian = false; 461 } 462 463 // What encoding did the user tell us {s}he thought it was? I am going 464 // to get sick of the questions on antlr-interest, I know I am. 465 // 466 switch (m_encoding) 467 { 468 case ANTLR_ENC_UTF8: 469 470 // See if there is a BOM at the start of this UTF-8 sequence 471 // and just eat it if there is. Windows .TXT files have this for instance 472 // as it identifies UTF-8 even though it is of no consequence for byte order 473 // as UTF-8 does not have a byte order. 474 // 475 if ( (*(m_nextChar)) == 0xEF 476 && (*(m_nextChar+1)) == 0xBB 477 && (*(m_nextChar+2)) == 0xBF 478 ) 479 { 480 // The UTF8 BOM is present so skip it 481 // 482 m_nextChar += 3; 483 } 484 485 // Install the UTF8 input routines 486 // 487 this->setupIntStream( isBigEndian, isBigEndian ); 488 this->set_charByteSize(0); 489 break; 490 491 case ANTLR_ENC_UTF16: 492 493 // See if there is a BOM at the start of the input. If not then 494 // we assume that the byte order is the natural order of this 495 // machine (or it is really UCS2). If there is a BOM we determine if the encoding 496 // is the same as the natural order of this machine. 497 // 498 if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFE 499 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFF 500 ) 501 { 502 // BOM Present, indicates Big Endian 503 // 504 m_nextChar += 1; 505 506 this->setupIntStream( isBigEndian, true ); 507 } 508 else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF 509 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE 510 ) 511 { 512 // BOM present, indicates Little Endian 513 // 514 m_nextChar += 1; 515 516 this->setupIntStream( isBigEndian, false ); 517 } 518 else 519 { 520 // No BOM present, assume local computer byte order 521 // 522 this->setupIntStream(isBigEndian, isBigEndian); 523 } 524 this->set_charByteSize(2); 525 break; 526 527 case ANTLR_ENC_UTF32: 528 529 // See if there is a BOM at the start of the input. If not then 530 // we assume that the byte order is the natural order of this 531 // machine. If there is we determine if the encoding 532 // is the same as the natural order of this machine. 533 // 534 if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0x00 535 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 536 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+2)) == 0xFE 537 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+3)) == 0xFF 538 ) 539 { 540 // BOM Present, indicates Big Endian 541 // 542 m_nextChar += 1; 543 544 this->setupIntStream(isBigEndian, true); 545 } 546 else if ( (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar)) == 0xFF 547 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0xFE 548 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 549 && (ANTLR_UINT8)(*((ANTLR_UINT8*)m_nextChar+1)) == 0x00 550 ) 551 { 552 // BOM present, indicates Little Endian 553 // 554 m_nextChar += 1; 555 556 this->setupIntStream( isBigEndian, false ); 557 } 558 else 559 { 560 // No BOM present, assume local computer byte order 561 // 562 this->setupIntStream( isBigEndian, isBigEndian ); 563 } 564 this->set_charByteSize(4); 565 break; 566 567 case ANTLR_ENC_UTF16BE: 568 569 // Encoding is definately Big Endian with no BOM 570 // 571 this->setupIntStream( isBigEndian, true ); 572 this->set_charByteSize(2); 573 break; 574 575 case ANTLR_ENC_UTF16LE: 576 577 // Encoding is definately Little Endian with no BOM 578 // 579 this->setupIntStream( isBigEndian, false ); 580 this->set_charByteSize(2); 581 break; 582 583 case ANTLR_ENC_UTF32BE: 584 585 // Encoding is definately Big Endian with no BOM 586 // 587 this->setupIntStream( isBigEndian, true ); 588 this->set_charByteSize(4); 589 break; 590 591 case ANTLR_ENC_UTF32LE: 592 593 // Encoding is definately Little Endian with no BOM 594 // 595 this->setupIntStream( isBigEndian, false ); 596 this->set_charByteSize(4); 597 break; 598 599 case ANTLR_ENC_EBCDIC: 600 601 // EBCDIC is basically the same as ASCII but with an on the 602 // fly translation to ASCII 603 // 604 this->setupIntStream( isBigEndian, isBigEndian ); 605 this->set_charByteSize(1); 606 break; 607 608 case ANTLR_ENC_8BIT: 609 default: 610 611 // Standard 8bit/ASCII 612 // 613 this->setupIntStream( isBigEndian, isBigEndian ); 614 this->set_charByteSize(1); 615 break; 616 } 617} 618 619ANTLR_END_NAMESPACE() 620