1 /* 2 * Conditions Of Use 3 * 4 * This software was developed by employees of the National Institute of 5 * Standards and Technology (NIST), an agency of the Federal Government. 6 * Pursuant to title 15 Untied States Code Section 105, works of NIST 7 * employees are not subject to copyright protection in the United States 8 * and are considered to be in the public domain. As a result, a formal 9 * license is not needed to use the software. 10 * 11 * This software is provided by NIST as a service and is expressly 12 * provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED 13 * OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF 14 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT 15 * AND DATA ACCURACY. NIST does not warrant or make any representations 16 * regarding the use of the software or the results thereof, including but 17 * not limited to the correctness, accuracy, reliability or usefulness of 18 * the software. 19 * 20 * Permission to use this software is contingent upon your acceptance 21 * of the terms of this agreement 22 * 23 * . 24 * 25 */ 26 package gov.nist.core; 27 28 import java.text.ParseException; 29 import java.util.Hashtable; 30 31 /** A lexical analyzer that is used by all parsers in our implementation. 32 * 33 *@version 1.2 34 *@since 1.1 35 * 36 *@author M. Ranganathan 37 */ 38 public class LexerCore extends StringTokenizer { 39 40 // IMPORTANT - All keyword matches should be between START and END 41 public static final int START = 2048; 42 public static final int END = START + 2048; 43 // IMPORTANT -- This should be < END 44 public static final int ID = END - 1; 45 public static final int SAFE = END - 2; 46 // Individial token classes. 47 public static final int WHITESPACE = END + 1; 48 public static final int DIGIT = END + 2; 49 public static final int ALPHA = END + 3; 50 public static final int BACKSLASH = (int) '\\'; 51 public static final int QUOTE = (int) '\''; 52 public static final int AT = (int) '@'; 53 public static final int SP = (int) ' '; 54 public static final int HT = (int) '\t'; 55 public static final int COLON = (int) ':'; 56 public static final int STAR = (int) '*'; 57 public static final int DOLLAR = (int) '$'; 58 public static final int PLUS = (int) '+'; 59 public static final int POUND = (int) '#'; 60 public static final int MINUS = (int) '-'; 61 public static final int DOUBLEQUOTE = (int) '\"'; 62 public static final int TILDE = (int) '~'; 63 public static final int BACK_QUOTE = (int) '`'; 64 public static final int NULL = (int) '\0'; 65 public static final int EQUALS = (int) '='; 66 public static final int SEMICOLON = (int) ';'; 67 public static final int SLASH = (int) '/'; 68 public static final int L_SQUARE_BRACKET = (int) '['; 69 public static final int R_SQUARE_BRACKET = (int) ']'; 70 public static final int R_CURLY = (int) '}'; 71 public static final int L_CURLY = (int) '{'; 72 public static final int HAT = (int) '^'; 73 public static final int BAR = (int) '|'; 74 public static final int DOT = (int) '.'; 75 public static final int EXCLAMATION = (int) '!'; 76 public static final int LPAREN = (int) '('; 77 public static final int RPAREN = (int) ')'; 78 public static final int GREATER_THAN = (int) '>'; 79 public static final int LESS_THAN = (int) '<'; 80 public static final int PERCENT = (int) '%'; 81 public static final int QUESTION = (int) '?'; 82 public static final int AND = (int) '&'; 83 public static final int UNDERSCORE = (int) '_'; 84 85 protected static final Hashtable globalSymbolTable; 86 protected static final Hashtable lexerTables; 87 protected Hashtable currentLexer; 88 protected String currentLexerName; 89 protected Token currentMatch; 90 91 static { 92 globalSymbolTable = new Hashtable(); 93 lexerTables = new Hashtable(); 94 } 95 addKeyword(String name, int value)96 protected void addKeyword(String name, int value) { 97 // System.out.println("addKeyword " + name + " value = " + value); 98 // new Exception().printStackTrace(); 99 Integer val = Integer.valueOf(value); 100 currentLexer.put(name, val); 101 if (!globalSymbolTable.containsKey(val)) 102 globalSymbolTable.put(val, name); 103 } 104 lookupToken(int value)105 public String lookupToken(int value) { 106 if (value > START) { 107 return (String) globalSymbolTable.get(Integer.valueOf(value)); 108 } else { 109 Character ch = Character.valueOf((char) value); 110 return ch.toString(); 111 } 112 } 113 addLexer(String lexerName)114 protected Hashtable addLexer(String lexerName) { 115 currentLexer = (Hashtable) lexerTables.get(lexerName); 116 if (currentLexer == null) { 117 currentLexer = new Hashtable(); 118 lexerTables.put(lexerName, currentLexer); 119 } 120 return currentLexer; 121 } 122 123 //public abstract void selectLexer(String lexerName); 124 selectLexer(String lexerName)125 public void selectLexer(String lexerName) { 126 this.currentLexerName = lexerName; 127 } 128 LexerCore()129 protected LexerCore() { 130 this.currentLexer = new Hashtable(); 131 this.currentLexerName = "charLexer"; 132 } 133 134 /** Initialize the lexer with a buffer. 135 */ LexerCore(String lexerName, String buffer)136 public LexerCore(String lexerName, String buffer) { 137 super(buffer); 138 this.currentLexerName = lexerName; 139 } 140 141 /** Peek the next id but dont move the buffer pointer forward. 142 */ 143 peekNextId()144 public String peekNextId() { 145 int oldPtr = ptr; 146 String retval = ttoken(); 147 savedPtr = ptr; 148 ptr = oldPtr; 149 return retval; 150 } 151 152 /** Get the next id. 153 */ getNextId()154 public String getNextId() { 155 return ttoken(); 156 } 157 158 // call this after you call match getNextToken()159 public Token getNextToken() { 160 return this.currentMatch; 161 162 } 163 164 /** Look ahead for one token. 165 */ peekNextToken()166 public Token peekNextToken() throws ParseException { 167 return (Token) peekNextToken(1)[0]; 168 } 169 peekNextToken(int ntokens)170 public Token[] peekNextToken(int ntokens) throws ParseException { 171 int old = ptr; 172 Token[] retval = new Token[ntokens]; 173 for (int i = 0; i < ntokens; i++) { 174 Token tok = new Token(); 175 if (startsId()) { 176 String id = ttoken(); 177 tok.tokenValue = id; 178 String idUppercase = id.toUpperCase(); 179 if (currentLexer.containsKey(idUppercase)) { 180 Integer type = (Integer) currentLexer.get(idUppercase); 181 tok.tokenType = type.intValue(); 182 } else 183 tok.tokenType = ID; 184 } else { 185 char nextChar = getNextChar(); 186 tok.tokenValue = String.valueOf(nextChar); 187 if (isAlpha(nextChar)) { 188 tok.tokenType = ALPHA; 189 } else if (isDigit(nextChar)) { 190 tok.tokenType = DIGIT; 191 } else 192 tok.tokenType = (int) nextChar; 193 } 194 retval[i] = tok; 195 } 196 savedPtr = ptr; 197 ptr = old; 198 return retval; 199 } 200 201 /** Match the given token or throw an exception if no such token 202 * can be matched. 203 */ match(int tok)204 public Token match(int tok) throws ParseException { 205 if (Debug.parserDebug) { 206 Debug.println("match " + tok); 207 } 208 if (tok > START && tok < END) { 209 if (tok == ID) { 210 // Generic ID sought. 211 if (!startsId()) 212 throw new ParseException(buffer + "\nID expected", ptr); 213 String id = getNextId(); 214 this.currentMatch = new Token(); 215 this.currentMatch.tokenValue = id; 216 this.currentMatch.tokenType = ID; 217 } else if (tok == SAFE) { 218 if (!startsSafeToken()) 219 throw new ParseException(buffer + "\nID expected", ptr); 220 String id = ttokenSafe(); 221 this.currentMatch = new Token(); 222 this.currentMatch.tokenValue = id; 223 this.currentMatch.tokenType = SAFE; 224 } else { 225 String nexttok = getNextId(); 226 Integer cur = (Integer) currentLexer.get(nexttok.toUpperCase()); 227 228 if (cur == null || cur.intValue() != tok) 229 throw new ParseException( 230 buffer + "\nUnexpected Token : " + nexttok, 231 ptr); 232 this.currentMatch = new Token(); 233 this.currentMatch.tokenValue = nexttok; 234 this.currentMatch.tokenType = tok; 235 } 236 } else if (tok > END) { 237 // Character classes. 238 char next = lookAhead(0); 239 if (tok == DIGIT) { 240 if (!isDigit(next)) 241 throw new ParseException(buffer + "\nExpecting DIGIT", ptr); 242 this.currentMatch = new Token(); 243 this.currentMatch.tokenValue = 244 String.valueOf(next); 245 this.currentMatch.tokenType = tok; 246 consume(1); 247 248 } else if (tok == ALPHA) { 249 if (!isAlpha(next)) 250 throw new ParseException(buffer + "\nExpecting ALPHA", ptr); 251 this.currentMatch = new Token(); 252 this.currentMatch.tokenValue = 253 String.valueOf(next); 254 this.currentMatch.tokenType = tok; 255 consume(1); 256 257 } 258 259 } else { 260 // This is a direct character spec. 261 char ch = (char) tok; 262 char next = lookAhead(0); 263 if (next == ch) { 264 /*this.currentMatch = new Token(); 265 this.currentMatch.tokenValue = 266 String.valueOf(ch); 267 this.currentMatch.tokenType = tok;*/ 268 consume(1); 269 } else 270 throw new ParseException( 271 buffer + "\nExpecting >>>" + ch + "<<< got >>>" 272 + next + "<<<", ptr); 273 } 274 return this.currentMatch; 275 } 276 SPorHT()277 public void SPorHT() { 278 try { 279 char c = lookAhead(0); 280 while (c == ' ' || c == '\t') { 281 consume(1); 282 c = lookAhead(0); 283 } 284 } catch (ParseException ex) { 285 // Ignore 286 } 287 } 288 289 /** 290 * JvB: utility function added to validate tokens 291 * 292 * @see RFC3261 section 25.1: 293 * token = 1*(alphanum / "-" / "." / "!" / "%" / "*" 294 / "_" / "+" / "`" / "'" / "~" ) 295 296 * @param c - character to check 297 * @return true iff character c is a valid token character as per RFC3261 298 */ isTokenChar( char c )299 public static final boolean isTokenChar( char c ) { 300 if ( isAlphaDigit(c) ) return true; 301 else switch (c) 302 { 303 case '-': 304 case '.': 305 case '!': 306 case '%': 307 case '*': 308 case '_': 309 case '+': 310 case '`': 311 case '\'': 312 case '~': 313 return true; 314 default: 315 return false; 316 } 317 } 318 319 startsId()320 public boolean startsId() { 321 try { 322 char nextChar = lookAhead(0); 323 return isTokenChar(nextChar); 324 } catch (ParseException ex) { 325 return false; 326 } 327 } 328 startsSafeToken()329 public boolean startsSafeToken() { 330 try { 331 char nextChar = lookAhead(0); 332 if (isAlphaDigit(nextChar)) { 333 return true; 334 } 335 else { 336 switch (nextChar) { 337 case '_': 338 case '+': 339 case '-': 340 case '!': 341 case '`': 342 case '\'': 343 case '.': 344 case '/': 345 case '}': 346 case '{': 347 case ']': 348 case '[': 349 case '^': 350 case '|': 351 case '~': 352 case '%': // bug fix by Bruno Konik, JvB copied here 353 case '#': 354 case '@': 355 case '$': 356 case ':': 357 case ';': 358 case '?': 359 case '\"': 360 case '*': 361 case '=': // Issue 155 on java.net 362 return true; 363 default: 364 return false; 365 } 366 } 367 } catch (ParseException ex) { 368 return false; 369 } 370 } 371 ttoken()372 public String ttoken() { 373 int startIdx = ptr; 374 try { 375 while (hasMoreChars()) { 376 char nextChar = lookAhead(0); 377 if ( isTokenChar(nextChar) ) { 378 consume(1); 379 } else { 380 break; 381 } 382 } 383 return buffer.substring(startIdx, ptr); 384 } catch (ParseException ex) { 385 return null; 386 } 387 } 388 389 /* JvB: unreferenced 390 public String ttokenAllowSpace() { 391 int startIdx = ptr; 392 try { 393 while (hasMoreChars()) { 394 char nextChar = lookAhead(0); 395 if (isAlphaDigit(nextChar)) { 396 consume(1); 397 } 398 else { 399 boolean isValidChar = false; 400 switch (nextChar) { 401 case '_': 402 case '+': 403 case '-': 404 case '!': 405 case '`': 406 case '\'': 407 case '~': 408 case '%': // bug fix by Bruno Konik, JvB copied here 409 case '.': 410 case ' ': 411 case '\t': 412 case '*': 413 isValidChar = true; 414 } 415 if (isValidChar) { 416 consume(1); 417 } 418 else { 419 break; 420 } 421 } 422 423 } 424 return buffer.substring(startIdx, ptr); 425 } catch (ParseException ex) { 426 return null; 427 } 428 }*/ 429 ttokenSafe()430 public String ttokenSafe() { 431 int startIdx = ptr; 432 try { 433 while (hasMoreChars()) { 434 char nextChar = lookAhead(0); 435 if (isAlphaDigit(nextChar)) { 436 consume(1); 437 } 438 else { 439 boolean isValidChar = false; 440 switch (nextChar) { 441 case '_': 442 case '+': 443 case '-': 444 case '!': 445 case '`': 446 case '\'': 447 case '.': 448 case '/': 449 case '}': 450 case '{': 451 case ']': 452 case '[': 453 case '^': 454 case '|': 455 case '~': 456 case '%': // bug fix by Bruno Konik, JvB copied here 457 case '#': 458 case '@': 459 case '$': 460 case ':': 461 case ';': 462 case '?': 463 case '\"': 464 case '*': 465 isValidChar = true; 466 } 467 if (isValidChar) { 468 consume(1); 469 } 470 else { 471 break; 472 } 473 } 474 } 475 return buffer.substring(startIdx, ptr); 476 } catch (ParseException ex) { 477 return null; 478 } 479 } 480 481 static final char ALPHA_VALID_CHARS = Character.MAX_VALUE; 482 static final char DIGIT_VALID_CHARS = Character.MAX_VALUE - 1; 483 static final char ALPHADIGIT_VALID_CHARS = Character.MAX_VALUE - 2; consumeValidChars(char[] validChars)484 public void consumeValidChars(char[] validChars) { 485 int validCharsLength = validChars.length; 486 try { 487 while (hasMoreChars()) { 488 char nextChar = lookAhead(0); 489 boolean isValid = false; 490 for (int i = 0; i < validCharsLength; i++) { 491 char validChar = validChars[i]; 492 switch(validChar) { 493 case ALPHA_VALID_CHARS: 494 isValid = isAlpha(nextChar); 495 break; 496 case DIGIT_VALID_CHARS: 497 isValid = isDigit(nextChar); 498 break; 499 case ALPHADIGIT_VALID_CHARS: 500 isValid = isAlphaDigit(nextChar); 501 break; 502 default: 503 isValid = nextChar == validChar; 504 } 505 if (isValid) { 506 break; 507 } 508 } 509 if (isValid) { 510 consume(1); 511 } 512 else { 513 break; 514 } 515 } 516 } catch (ParseException ex) { 517 518 } 519 } 520 521 /** Parse a comment string cursor is at a ". Leave cursor at closing " 522 *@return the substring containing the quoted string excluding the 523 * closing quote. 524 */ quotedString()525 public String quotedString() throws ParseException { 526 int startIdx = ptr + 1; 527 if (lookAhead(0) != '\"') 528 return null; 529 consume(1); 530 while (true) { 531 char next = getNextChar(); 532 if (next == '\"') { 533 // Got to the terminating quote. 534 break; 535 } else if (next == '\0') { 536 throw new ParseException( 537 this.buffer + " :unexpected EOL", 538 this.ptr); 539 } else if (next == '\\') { 540 consume(1); 541 } 542 } 543 return buffer.substring(startIdx, ptr - 1); 544 } 545 546 /** Parse a comment string cursor is at a "(". Leave cursor at ) 547 *@return the substring containing the comment excluding the 548 * closing brace. 549 */ comment()550 public String comment() throws ParseException { 551 StringBuffer retval = new StringBuffer(); 552 if (lookAhead(0) != '(') 553 return null; 554 consume(1); 555 while (true) { 556 char next = getNextChar(); 557 if (next == ')') { 558 break; 559 } else if (next == '\0') { 560 throw new ParseException( 561 this.buffer + " :unexpected EOL", 562 this.ptr); 563 } else if (next == '\\') { 564 retval.append(next); 565 next = getNextChar(); 566 if (next == '\0') 567 throw new ParseException( 568 this.buffer + " : unexpected EOL", 569 this.ptr); 570 retval.append(next); 571 } else { 572 retval.append(next); 573 } 574 } 575 return retval.toString(); 576 } 577 578 /** Return a substring containing no semicolons. 579 *@return a substring containing no semicolons. 580 */ byteStringNoSemicolon()581 public String byteStringNoSemicolon() { 582 StringBuffer retval = new StringBuffer(); 583 try { 584 while (true) { 585 char next = lookAhead(0); 586 // bug fix from Ben Evans. 587 if (next == '\0' || next == '\n' || next == ';' || next == ',' ) { 588 break; 589 } else { 590 consume(1); 591 retval.append(next); 592 } 593 } 594 } catch (ParseException ex) { 595 return retval.toString(); 596 } 597 return retval.toString(); 598 } 599 600 /** 601 * Scan until you see a slash or an EOL. 602 * 603 * @return substring containing no slash. 604 */ byteStringNoSlash()605 public String byteStringNoSlash() { 606 StringBuffer retval = new StringBuffer(); 607 try { 608 while (true) { 609 char next = lookAhead(0); 610 // bug fix from Ben Evans. 611 if (next == '\0' || next == '\n' || next == '/' ) { 612 break; 613 } else { 614 consume(1); 615 retval.append(next); 616 } 617 } 618 } catch (ParseException ex) { 619 return retval.toString(); 620 } 621 return retval.toString(); 622 } 623 624 /** Return a substring containing no commas 625 *@return a substring containing no commas. 626 */ 627 byteStringNoComma()628 public String byteStringNoComma() { 629 StringBuffer retval = new StringBuffer(); 630 try { 631 while (true) { 632 char next = lookAhead(0); 633 if (next == '\n' || next == ',') { 634 break; 635 } else { 636 consume(1); 637 retval.append(next); 638 } 639 } 640 } catch (ParseException ex) { 641 } 642 return retval.toString(); 643 } 644 charAsString(char ch)645 public static String charAsString(char ch) { 646 return String.valueOf(ch); 647 } 648 649 /** Lookahead in the inputBuffer for n chars and return as a string. 650 * Do not consume the input. 651 */ charAsString(int nchars)652 public String charAsString(int nchars) { 653 return buffer.substring(ptr, ptr + nchars); 654 } 655 656 /** Get and consume the next number. 657 *@return a substring corresponding to a number 658 *(i.e. sequence of digits). 659 */ number()660 public String number() throws ParseException { 661 662 int startIdx = ptr; 663 try { 664 if (!isDigit(lookAhead(0))) { 665 throw new ParseException( 666 buffer + ": Unexpected token at " + lookAhead(0), 667 ptr); 668 } 669 consume(1); 670 while (true) { 671 char next = lookAhead(0); 672 if (isDigit(next)) { 673 consume(1); 674 } else 675 break; 676 } 677 return buffer.substring(startIdx, ptr); 678 } catch (ParseException ex) { 679 return buffer.substring(startIdx, ptr); 680 } 681 } 682 683 /** Mark the position for backtracking. 684 *@return the current location of the pointer. 685 */ markInputPosition()686 public int markInputPosition() { 687 return ptr; 688 } 689 690 /** Rewind the input ptr to the marked position. 691 *@param position - the position to rewind the parser to. 692 */ rewindInputPosition(int position)693 public void rewindInputPosition(int position) { 694 this.ptr = position; 695 } 696 697 /** Get the rest of the String 698 * @return rest of the buffer. 699 */ getRest()700 public String getRest() { 701 if (ptr >= buffer.length()) 702 return null; 703 else 704 return buffer.substring(ptr); 705 } 706 707 /** Get the sub-String until the character is encountered 708 * @param c the character to match 709 * @return the substring that matches. 710 */ getString(char c)711 public String getString(char c) throws ParseException { 712 StringBuffer retval = new StringBuffer(); 713 while (true) { 714 char next = lookAhead(0); 715 //System.out.println(" next = [" + next + ']' + "ptr = " + ptr); 716 //System.out.println(next == '\0'); 717 718 if (next == '\0') { 719 throw new ParseException( 720 this.buffer + "unexpected EOL", 721 this.ptr); 722 } else if (next == c) { 723 consume(1); 724 break; 725 } else if (next == '\\') { 726 consume(1); 727 char nextchar = lookAhead(0); 728 if (nextchar == '\0') { 729 throw new ParseException( 730 this.buffer + "unexpected EOL", 731 this.ptr); 732 } else { 733 consume(1); 734 retval.append(nextchar); 735 } 736 } else { 737 consume(1); 738 retval.append(next); 739 } 740 } 741 return retval.toString(); 742 } 743 744 /** Get the read pointer. 745 */ getPtr()746 public int getPtr() { 747 return this.ptr; 748 } 749 750 /** Get the buffer. 751 */ getBuffer()752 public String getBuffer() { 753 return this.buffer; 754 } 755 756 /** Create a parse exception. 757 */ createParseException()758 public ParseException createParseException() { 759 return new ParseException(this.buffer, this.ptr); 760 } 761 } 762