1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import com.google.protobuf.Descriptors.Descriptor; 34 import com.google.protobuf.Descriptors.EnumDescriptor; 35 import com.google.protobuf.Descriptors.EnumValueDescriptor; 36 import com.google.protobuf.Descriptors.FieldDescriptor; 37 38 import java.io.IOException; 39 import java.math.BigInteger; 40 import java.nio.CharBuffer; 41 import java.util.ArrayList; 42 import java.util.List; 43 import java.util.Locale; 44 import java.util.Map; 45 import java.util.logging.Logger; 46 import java.util.regex.Matcher; 47 import java.util.regex.Pattern; 48 49 /** 50 * Provide text parsing and formatting support for proto2 instances. 51 * The implementation largely follows google/protobuf/text_format.cc. 52 * 53 * @author wenboz@google.com Wenbo Zhu 54 * @author kenton@google.com Kenton Varda 55 */ 56 public final class TextFormat { TextFormat()57 private TextFormat() {} 58 59 private static final Logger logger = 60 Logger.getLogger(TextFormat.class.getName()); 61 62 private static final Printer DEFAULT_PRINTER = new Printer(); 63 private static final Printer SINGLE_LINE_PRINTER = 64 (new Printer()).setSingleLineMode(true); 65 private static final Printer UNICODE_PRINTER = 66 (new Printer()).setEscapeNonAscii(false); 67 68 /** 69 * Outputs a textual representation of the Protocol Message supplied into 70 * the parameter output. (This representation is the new version of the 71 * classic "ProtocolPrinter" output from the original Protocol Buffer system) 72 */ print( final MessageOrBuilder message, final Appendable output)73 public static void print( 74 final MessageOrBuilder message, final Appendable output) 75 throws IOException { 76 DEFAULT_PRINTER.print(message, new TextGenerator(output)); 77 } 78 79 /** Outputs a textual representation of {@code fields} to {@code output}. */ print(final UnknownFieldSet fields, final Appendable output)80 public static void print(final UnknownFieldSet fields, 81 final Appendable output) 82 throws IOException { 83 DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output)); 84 } 85 86 /** 87 * Same as {@code print()}, except that non-ASCII characters are not 88 * escaped. 89 */ printUnicode( final MessageOrBuilder message, final Appendable output)90 public static void printUnicode( 91 final MessageOrBuilder message, final Appendable output) 92 throws IOException { 93 UNICODE_PRINTER.print(message, new TextGenerator(output)); 94 } 95 96 /** 97 * Same as {@code print()}, except that non-ASCII characters are not 98 * escaped. 99 */ printUnicode(final UnknownFieldSet fields, final Appendable output)100 public static void printUnicode(final UnknownFieldSet fields, 101 final Appendable output) 102 throws IOException { 103 UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(output)); 104 } 105 106 /** 107 * Generates a human readable form of this message, useful for debugging and 108 * other purposes, with no newline characters. 109 */ shortDebugString(final MessageOrBuilder message)110 public static String shortDebugString(final MessageOrBuilder message) { 111 try { 112 final StringBuilder sb = new StringBuilder(); 113 SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb)); 114 // Single line mode currently might have an extra space at the end. 115 return sb.toString().trim(); 116 } catch (IOException e) { 117 throw new IllegalStateException(e); 118 } 119 } 120 121 /** 122 * Generates a human readable form of the field, useful for debugging 123 * and other purposes, with no newline characters. 124 */ shortDebugString(final FieldDescriptor field, final Object value)125 public static String shortDebugString(final FieldDescriptor field, 126 final Object value) { 127 try { 128 final StringBuilder sb = new StringBuilder(); 129 SINGLE_LINE_PRINTER.printField(field, value, new TextGenerator(sb)); 130 return sb.toString().trim(); 131 } catch (IOException e) { 132 throw new IllegalStateException(e); 133 } 134 } 135 136 /** 137 * Generates a human readable form of the unknown fields, useful for debugging 138 * and other purposes, with no newline characters. 139 */ shortDebugString(final UnknownFieldSet fields)140 public static String shortDebugString(final UnknownFieldSet fields) { 141 try { 142 final StringBuilder sb = new StringBuilder(); 143 SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb)); 144 // Single line mode currently might have an extra space at the end. 145 return sb.toString().trim(); 146 } catch (IOException e) { 147 throw new IllegalStateException(e); 148 } 149 } 150 151 /** 152 * Like {@code print()}, but writes directly to a {@code String} and 153 * returns it. 154 */ printToString(final MessageOrBuilder message)155 public static String printToString(final MessageOrBuilder message) { 156 try { 157 final StringBuilder text = new StringBuilder(); 158 print(message, text); 159 return text.toString(); 160 } catch (IOException e) { 161 throw new IllegalStateException(e); 162 } 163 } 164 165 /** 166 * Like {@code print()}, but writes directly to a {@code String} and 167 * returns it. 168 */ printToString(final UnknownFieldSet fields)169 public static String printToString(final UnknownFieldSet fields) { 170 try { 171 final StringBuilder text = new StringBuilder(); 172 print(fields, text); 173 return text.toString(); 174 } catch (IOException e) { 175 throw new IllegalStateException(e); 176 } 177 } 178 179 /** 180 * Same as {@code printToString()}, except that non-ASCII characters 181 * in string type fields are not escaped in backslash+octals. 182 */ printToUnicodeString(final MessageOrBuilder message)183 public static String printToUnicodeString(final MessageOrBuilder message) { 184 try { 185 final StringBuilder text = new StringBuilder(); 186 UNICODE_PRINTER.print(message, new TextGenerator(text)); 187 return text.toString(); 188 } catch (IOException e) { 189 throw new IllegalStateException(e); 190 } 191 } 192 193 /** 194 * Same as {@code printToString()}, except that non-ASCII characters 195 * in string type fields are not escaped in backslash+octals. 196 */ printToUnicodeString(final UnknownFieldSet fields)197 public static String printToUnicodeString(final UnknownFieldSet fields) { 198 try { 199 final StringBuilder text = new StringBuilder(); 200 UNICODE_PRINTER.printUnknownFields(fields, new TextGenerator(text)); 201 return text.toString(); 202 } catch (IOException e) { 203 throw new IllegalStateException(e); 204 } 205 } 206 printField(final FieldDescriptor field, final Object value, final Appendable output)207 public static void printField(final FieldDescriptor field, 208 final Object value, 209 final Appendable output) 210 throws IOException { 211 DEFAULT_PRINTER.printField(field, value, new TextGenerator(output)); 212 } 213 printFieldToString(final FieldDescriptor field, final Object value)214 public static String printFieldToString(final FieldDescriptor field, 215 final Object value) { 216 try { 217 final StringBuilder text = new StringBuilder(); 218 printField(field, value, text); 219 return text.toString(); 220 } catch (IOException e) { 221 throw new IllegalStateException(e); 222 } 223 } 224 225 /** 226 * Outputs a textual representation of the value of given field value. 227 * 228 * @param field the descriptor of the field 229 * @param value the value of the field 230 * @param output the output to which to append the formatted value 231 * @throws ClassCastException if the value is not appropriate for the 232 * given field descriptor 233 * @throws IOException if there is an exception writing to the output 234 */ printFieldValue(final FieldDescriptor field, final Object value, final Appendable output)235 public static void printFieldValue(final FieldDescriptor field, 236 final Object value, 237 final Appendable output) 238 throws IOException { 239 DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output)); 240 } 241 242 /** 243 * Outputs a textual representation of the value of an unknown field. 244 * 245 * @param tag the field's tag number 246 * @param value the value of the field 247 * @param output the output to which to append the formatted value 248 * @throws ClassCastException if the value is not appropriate for the 249 * given field descriptor 250 * @throws IOException if there is an exception writing to the output 251 */ printUnknownFieldValue(final int tag, final Object value, final Appendable output)252 public static void printUnknownFieldValue(final int tag, 253 final Object value, 254 final Appendable output) 255 throws IOException { 256 printUnknownFieldValue(tag, value, new TextGenerator(output)); 257 } 258 printUnknownFieldValue(final int tag, final Object value, final TextGenerator generator)259 private static void printUnknownFieldValue(final int tag, 260 final Object value, 261 final TextGenerator generator) 262 throws IOException { 263 switch (WireFormat.getTagWireType(tag)) { 264 case WireFormat.WIRETYPE_VARINT: 265 generator.print(unsignedToString((Long) value)); 266 break; 267 case WireFormat.WIRETYPE_FIXED32: 268 generator.print( 269 String.format((Locale) null, "0x%08x", (Integer) value)); 270 break; 271 case WireFormat.WIRETYPE_FIXED64: 272 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 273 break; 274 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 275 generator.print("\""); 276 generator.print(escapeBytes((ByteString) value)); 277 generator.print("\""); 278 break; 279 case WireFormat.WIRETYPE_START_GROUP: 280 DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator); 281 break; 282 default: 283 throw new IllegalArgumentException("Bad tag: " + tag); 284 } 285 } 286 287 /** Helper class for converting protobufs to text. */ 288 private static final class Printer { 289 /** Whether to omit newlines from the output. */ 290 boolean singleLineMode = false; 291 292 /** Whether to escape non ASCII characters with backslash and octal. */ 293 boolean escapeNonAscii = true; 294 Printer()295 private Printer() {} 296 297 /** Setter of singleLineMode */ setSingleLineMode(boolean singleLineMode)298 private Printer setSingleLineMode(boolean singleLineMode) { 299 this.singleLineMode = singleLineMode; 300 return this; 301 } 302 303 /** Setter of escapeNonAscii */ setEscapeNonAscii(boolean escapeNonAscii)304 private Printer setEscapeNonAscii(boolean escapeNonAscii) { 305 this.escapeNonAscii = escapeNonAscii; 306 return this; 307 } 308 print( final MessageOrBuilder message, final TextGenerator generator)309 private void print( 310 final MessageOrBuilder message, final TextGenerator generator) 311 throws IOException { 312 for (Map.Entry<FieldDescriptor, Object> field 313 : message.getAllFields().entrySet()) { 314 printField(field.getKey(), field.getValue(), generator); 315 } 316 printUnknownFields(message.getUnknownFields(), generator); 317 } 318 printField(final FieldDescriptor field, final Object value, final TextGenerator generator)319 private void printField(final FieldDescriptor field, final Object value, 320 final TextGenerator generator) throws IOException { 321 if (field.isRepeated()) { 322 // Repeated field. Print each element. 323 for (Object element : (List<?>) value) { 324 printSingleField(field, element, generator); 325 } 326 } else { 327 printSingleField(field, value, generator); 328 } 329 } 330 printSingleField(final FieldDescriptor field, final Object value, final TextGenerator generator)331 private void printSingleField(final FieldDescriptor field, 332 final Object value, 333 final TextGenerator generator) 334 throws IOException { 335 if (field.isExtension()) { 336 generator.print("["); 337 // We special-case MessageSet elements for compatibility with proto1. 338 if (field.getContainingType().getOptions().getMessageSetWireFormat() 339 && (field.getType() == FieldDescriptor.Type.MESSAGE) 340 && (field.isOptional()) 341 // object equality 342 && (field.getExtensionScope() == field.getMessageType())) { 343 generator.print(field.getMessageType().getFullName()); 344 } else { 345 generator.print(field.getFullName()); 346 } 347 generator.print("]"); 348 } else { 349 if (field.getType() == FieldDescriptor.Type.GROUP) { 350 // Groups must be serialized with their original capitalization. 351 generator.print(field.getMessageType().getName()); 352 } else { 353 generator.print(field.getName()); 354 } 355 } 356 357 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 358 if (singleLineMode) { 359 generator.print(" { "); 360 } else { 361 generator.print(" {\n"); 362 generator.indent(); 363 } 364 } else { 365 generator.print(": "); 366 } 367 368 printFieldValue(field, value, generator); 369 370 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 371 if (singleLineMode) { 372 generator.print("} "); 373 } else { 374 generator.outdent(); 375 generator.print("}\n"); 376 } 377 } else { 378 if (singleLineMode) { 379 generator.print(" "); 380 } else { 381 generator.print("\n"); 382 } 383 } 384 } 385 printFieldValue(final FieldDescriptor field, final Object value, final TextGenerator generator)386 private void printFieldValue(final FieldDescriptor field, 387 final Object value, 388 final TextGenerator generator) 389 throws IOException { 390 switch (field.getType()) { 391 case INT32: 392 case SINT32: 393 case SFIXED32: 394 generator.print(((Integer) value).toString()); 395 break; 396 397 case INT64: 398 case SINT64: 399 case SFIXED64: 400 generator.print(((Long) value).toString()); 401 break; 402 403 case BOOL: 404 generator.print(((Boolean) value).toString()); 405 break; 406 407 case FLOAT: 408 generator.print(((Float) value).toString()); 409 break; 410 411 case DOUBLE: 412 generator.print(((Double) value).toString()); 413 break; 414 415 case UINT32: 416 case FIXED32: 417 generator.print(unsignedToString((Integer) value)); 418 break; 419 420 case UINT64: 421 case FIXED64: 422 generator.print(unsignedToString((Long) value)); 423 break; 424 425 case STRING: 426 generator.print("\""); 427 generator.print(escapeNonAscii 428 ? TextFormatEscaper.escapeText((String) value) 429 : escapeDoubleQuotesAndBackslashes((String) value) 430 .replace("\n", "\\n")); 431 generator.print("\""); 432 break; 433 434 case BYTES: 435 generator.print("\""); 436 if (value instanceof ByteString) { 437 generator.print(escapeBytes((ByteString) value)); 438 } else { 439 generator.print(escapeBytes((byte[]) value)); 440 } 441 generator.print("\""); 442 break; 443 444 case ENUM: 445 generator.print(((EnumValueDescriptor) value).getName()); 446 break; 447 448 case MESSAGE: 449 case GROUP: 450 print((Message) value, generator); 451 break; 452 } 453 } 454 printUnknownFields(final UnknownFieldSet unknownFields, final TextGenerator generator)455 private void printUnknownFields(final UnknownFieldSet unknownFields, 456 final TextGenerator generator) 457 throws IOException { 458 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : 459 unknownFields.asMap().entrySet()) { 460 final int number = entry.getKey(); 461 final UnknownFieldSet.Field field = entry.getValue(); 462 printUnknownField(number, WireFormat.WIRETYPE_VARINT, 463 field.getVarintList(), generator); 464 printUnknownField(number, WireFormat.WIRETYPE_FIXED32, 465 field.getFixed32List(), generator); 466 printUnknownField(number, WireFormat.WIRETYPE_FIXED64, 467 field.getFixed64List(), generator); 468 printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED, 469 field.getLengthDelimitedList(), generator); 470 for (final UnknownFieldSet value : field.getGroupList()) { 471 generator.print(entry.getKey().toString()); 472 if (singleLineMode) { 473 generator.print(" { "); 474 } else { 475 generator.print(" {\n"); 476 generator.indent(); 477 } 478 printUnknownFields(value, generator); 479 if (singleLineMode) { 480 generator.print("} "); 481 } else { 482 generator.outdent(); 483 generator.print("}\n"); 484 } 485 } 486 } 487 } 488 printUnknownField(final int number, final int wireType, final List<?> values, final TextGenerator generator)489 private void printUnknownField(final int number, 490 final int wireType, 491 final List<?> values, 492 final TextGenerator generator) 493 throws IOException { 494 for (final Object value : values) { 495 generator.print(String.valueOf(number)); 496 generator.print(": "); 497 printUnknownFieldValue(wireType, value, generator); 498 generator.print(singleLineMode ? " " : "\n"); 499 } 500 } 501 } 502 503 /** Convert an unsigned 32-bit integer to a string. */ unsignedToString(final int value)504 public static String unsignedToString(final int value) { 505 if (value >= 0) { 506 return Integer.toString(value); 507 } else { 508 return Long.toString(value & 0x00000000FFFFFFFFL); 509 } 510 } 511 512 /** Convert an unsigned 64-bit integer to a string. */ unsignedToString(final long value)513 public static String unsignedToString(final long value) { 514 if (value >= 0) { 515 return Long.toString(value); 516 } else { 517 // Pull off the most-significant bit so that BigInteger doesn't think 518 // the number is negative, then set it again using setBit(). 519 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL) 520 .setBit(63).toString(); 521 } 522 } 523 524 /** 525 * An inner class for writing text to the output stream. 526 */ 527 private static final class TextGenerator { 528 private final Appendable output; 529 private final StringBuilder indent = new StringBuilder(); 530 private boolean atStartOfLine = true; 531 TextGenerator(final Appendable output)532 private TextGenerator(final Appendable output) { 533 this.output = output; 534 } 535 536 /** 537 * Indent text by two spaces. After calling Indent(), two spaces will be 538 * inserted at the beginning of each line of text. Indent() may be called 539 * multiple times to produce deeper indents. 540 */ indent()541 public void indent() { 542 indent.append(" "); 543 } 544 545 /** 546 * Reduces the current indent level by two spaces, or crashes if the indent 547 * level is zero. 548 */ outdent()549 public void outdent() { 550 final int length = indent.length(); 551 if (length == 0) { 552 throw new IllegalArgumentException( 553 " Outdent() without matching Indent()."); 554 } 555 indent.delete(length - 2, length); 556 } 557 558 /** 559 * Print text to the output stream. 560 */ print(final CharSequence text)561 public void print(final CharSequence text) throws IOException { 562 final int size = text.length(); 563 int pos = 0; 564 565 for (int i = 0; i < size; i++) { 566 if (text.charAt(i) == '\n') { 567 write(text.subSequence(pos, i + 1)); 568 pos = i + 1; 569 atStartOfLine = true; 570 } 571 } 572 write(text.subSequence(pos, size)); 573 } 574 write(final CharSequence data)575 private void write(final CharSequence data) throws IOException { 576 if (data.length() == 0) { 577 return; 578 } 579 if (atStartOfLine) { 580 atStartOfLine = false; 581 output.append(indent); 582 } 583 output.append(data); 584 } 585 } 586 587 // ================================================================= 588 // Parsing 589 590 /** 591 * Represents a stream of tokens parsed from a {@code String}. 592 * 593 * <p>The Java standard library provides many classes that you might think 594 * would be useful for implementing this, but aren't. For example: 595 * 596 * <ul> 597 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, 598 * at least, something that would get us close to what we want -- except 599 * for one fatal flaw: It automatically un-escapes strings using Java 600 * escape sequences, which do not include all the escape sequences we 601 * need to support (e.g. '\x'). 602 * <li>{@code java.util.Scanner}: This seems like a great way at least to 603 * parse regular expressions out of a stream (so we wouldn't have to load 604 * the entire input into a single string before parsing). Sadly, 605 * {@code Scanner} requires that tokens be delimited with some delimiter. 606 * Thus, although the text "foo:" should parse to two tokens ("foo" and 607 * ":"), {@code Scanner} would recognize it only as a single token. 608 * Furthermore, {@code Scanner} provides no way to inspect the contents 609 * of delimiters, making it impossible to keep track of line and column 610 * numbers. 611 * </ul> 612 * 613 * <p>Luckily, Java's regular expression support does manage to be useful to 614 * us. (Barely: We need {@code Matcher.usePattern()}, which is new in 615 * Java 1.5.) So, we can use that, at least. Unfortunately, this implies 616 * that we need to have the entire input in one contiguous string. 617 */ 618 private static final class Tokenizer { 619 private final CharSequence text; 620 private final Matcher matcher; 621 private String currentToken; 622 623 // The character index within this.text at which the current token begins. 624 private int pos = 0; 625 626 // The line and column numbers of the current token. 627 private int line = 0; 628 private int column = 0; 629 630 // The line and column numbers of the previous token (allows throwing 631 // errors *after* consuming). 632 private int previousLine = 0; 633 private int previousColumn = 0; 634 635 // We use possessive quantifiers (*+ and ++) because otherwise the Java 636 // regex matcher has stack overflows on large inputs. 637 private static final Pattern WHITESPACE = 638 Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 639 private static final Pattern TOKEN = Pattern.compile( 640 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" + // an identifier 641 "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" + // a number 642 "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" + // a double-quoted string 643 "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 644 Pattern.MULTILINE); 645 646 private static final Pattern DOUBLE_INFINITY = Pattern.compile( 647 "-?inf(inity)?", 648 Pattern.CASE_INSENSITIVE); 649 private static final Pattern FLOAT_INFINITY = Pattern.compile( 650 "-?inf(inity)?f?", 651 Pattern.CASE_INSENSITIVE); 652 private static final Pattern FLOAT_NAN = Pattern.compile( 653 "nanf?", 654 Pattern.CASE_INSENSITIVE); 655 656 /** Construct a tokenizer that parses tokens from the given text. */ Tokenizer(final CharSequence text)657 private Tokenizer(final CharSequence text) { 658 this.text = text; 659 this.matcher = WHITESPACE.matcher(text); 660 skipWhitespace(); 661 nextToken(); 662 } 663 getPreviousLine()664 int getPreviousLine() { 665 return previousLine; 666 } 667 getPreviousColumn()668 int getPreviousColumn() { 669 return previousColumn; 670 } 671 getLine()672 int getLine() { 673 return line; 674 } 675 getColumn()676 int getColumn() { 677 return column; 678 } 679 680 /** Are we at the end of the input? */ atEnd()681 public boolean atEnd() { 682 return currentToken.length() == 0; 683 } 684 685 /** Advance to the next token. */ nextToken()686 public void nextToken() { 687 previousLine = line; 688 previousColumn = column; 689 690 // Advance the line counter to the current position. 691 while (pos < matcher.regionStart()) { 692 if (text.charAt(pos) == '\n') { 693 ++line; 694 column = 0; 695 } else { 696 ++column; 697 } 698 ++pos; 699 } 700 701 // Match the next token. 702 if (matcher.regionStart() == matcher.regionEnd()) { 703 // EOF 704 currentToken = ""; 705 } else { 706 matcher.usePattern(TOKEN); 707 if (matcher.lookingAt()) { 708 currentToken = matcher.group(); 709 matcher.region(matcher.end(), matcher.regionEnd()); 710 } else { 711 // Take one character. 712 currentToken = String.valueOf(text.charAt(pos)); 713 matcher.region(pos + 1, matcher.regionEnd()); 714 } 715 716 skipWhitespace(); 717 } 718 } 719 720 /** 721 * Skip over any whitespace so that the matcher region starts at the next 722 * token. 723 */ skipWhitespace()724 private void skipWhitespace() { 725 matcher.usePattern(WHITESPACE); 726 if (matcher.lookingAt()) { 727 matcher.region(matcher.end(), matcher.regionEnd()); 728 } 729 } 730 731 /** 732 * If the next token exactly matches {@code token}, consume it and return 733 * {@code true}. Otherwise, return {@code false} without doing anything. 734 */ tryConsume(final String token)735 public boolean tryConsume(final String token) { 736 if (currentToken.equals(token)) { 737 nextToken(); 738 return true; 739 } else { 740 return false; 741 } 742 } 743 744 /** 745 * If the next token exactly matches {@code token}, consume it. Otherwise, 746 * throw a {@link ParseException}. 747 */ consume(final String token)748 public void consume(final String token) throws ParseException { 749 if (!tryConsume(token)) { 750 throw parseException("Expected \"" + token + "\"."); 751 } 752 } 753 754 /** 755 * Returns {@code true} if the next token is an integer, but does 756 * not consume it. 757 */ lookingAtInteger()758 public boolean lookingAtInteger() { 759 if (currentToken.length() == 0) { 760 return false; 761 } 762 763 final char c = currentToken.charAt(0); 764 return ('0' <= c && c <= '9') 765 || c == '-' || c == '+'; 766 } 767 768 /** 769 * Returns {@code true} if the current token's text is equal to that 770 * specified. 771 */ lookingAt(String text)772 public boolean lookingAt(String text) { 773 return currentToken.equals(text); 774 } 775 776 /** 777 * If the next token is an identifier, consume it and return its value. 778 * Otherwise, throw a {@link ParseException}. 779 */ consumeIdentifier()780 public String consumeIdentifier() throws ParseException { 781 for (int i = 0; i < currentToken.length(); i++) { 782 final char c = currentToken.charAt(i); 783 if (('a' <= c && c <= 'z') 784 || ('A' <= c && c <= 'Z') 785 || ('0' <= c && c <= '9') 786 || (c == '_') || (c == '.')) { 787 // OK 788 } else { 789 throw parseException( 790 "Expected identifier. Found '" + currentToken + "'"); 791 } 792 } 793 794 final String result = currentToken; 795 nextToken(); 796 return result; 797 } 798 799 /** 800 * If the next token is an identifier, consume it and return {@code true}. 801 * Otherwise, return {@code false} without doing anything. 802 */ tryConsumeIdentifier()803 public boolean tryConsumeIdentifier() { 804 try { 805 consumeIdentifier(); 806 return true; 807 } catch (ParseException e) { 808 return false; 809 } 810 } 811 812 /** 813 * If the next token is a 32-bit signed integer, consume it and return its 814 * value. Otherwise, throw a {@link ParseException}. 815 */ consumeInt32()816 public int consumeInt32() throws ParseException { 817 try { 818 final int result = parseInt32(currentToken); 819 nextToken(); 820 return result; 821 } catch (NumberFormatException e) { 822 throw integerParseException(e); 823 } 824 } 825 826 /** 827 * If the next token is a 32-bit unsigned integer, consume it and return its 828 * value. Otherwise, throw a {@link ParseException}. 829 */ consumeUInt32()830 public int consumeUInt32() throws ParseException { 831 try { 832 final int result = parseUInt32(currentToken); 833 nextToken(); 834 return result; 835 } catch (NumberFormatException e) { 836 throw integerParseException(e); 837 } 838 } 839 840 /** 841 * If the next token is a 64-bit signed integer, consume it and return its 842 * value. Otherwise, throw a {@link ParseException}. 843 */ consumeInt64()844 public long consumeInt64() throws ParseException { 845 try { 846 final long result = parseInt64(currentToken); 847 nextToken(); 848 return result; 849 } catch (NumberFormatException e) { 850 throw integerParseException(e); 851 } 852 } 853 854 /** 855 * If the next token is a 64-bit signed integer, consume it and return 856 * {@code true}. Otherwise, return {@code false} without doing anything. 857 */ tryConsumeInt64()858 public boolean tryConsumeInt64() { 859 try { 860 consumeInt64(); 861 return true; 862 } catch (ParseException e) { 863 return false; 864 } 865 } 866 867 /** 868 * If the next token is a 64-bit unsigned integer, consume it and return its 869 * value. Otherwise, throw a {@link ParseException}. 870 */ consumeUInt64()871 public long consumeUInt64() throws ParseException { 872 try { 873 final long result = parseUInt64(currentToken); 874 nextToken(); 875 return result; 876 } catch (NumberFormatException e) { 877 throw integerParseException(e); 878 } 879 } 880 881 /** 882 * If the next token is a 64-bit unsigned integer, consume it and return 883 * {@code true}. Otherwise, return {@code false} without doing anything. 884 */ tryConsumeUInt64()885 public boolean tryConsumeUInt64() { 886 try { 887 consumeUInt64(); 888 return true; 889 } catch (ParseException e) { 890 return false; 891 } 892 } 893 894 /** 895 * If the next token is a double, consume it and return its value. 896 * Otherwise, throw a {@link ParseException}. 897 */ consumeDouble()898 public double consumeDouble() throws ParseException { 899 // We need to parse infinity and nan separately because 900 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 901 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 902 final boolean negative = currentToken.startsWith("-"); 903 nextToken(); 904 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 905 } 906 if (currentToken.equalsIgnoreCase("nan")) { 907 nextToken(); 908 return Double.NaN; 909 } 910 try { 911 final double result = Double.parseDouble(currentToken); 912 nextToken(); 913 return result; 914 } catch (NumberFormatException e) { 915 throw floatParseException(e); 916 } 917 } 918 919 /** 920 * If the next token is a double, consume it and return {@code true}. 921 * Otherwise, return {@code false} without doing anything. 922 */ tryConsumeDouble()923 public boolean tryConsumeDouble() { 924 try { 925 consumeDouble(); 926 return true; 927 } catch (ParseException e) { 928 return false; 929 } 930 } 931 932 /** 933 * If the next token is a float, consume it and return its value. 934 * Otherwise, throw a {@link ParseException}. 935 */ consumeFloat()936 public float consumeFloat() throws ParseException { 937 // We need to parse infinity and nan separately because 938 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 939 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 940 final boolean negative = currentToken.startsWith("-"); 941 nextToken(); 942 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 943 } 944 if (FLOAT_NAN.matcher(currentToken).matches()) { 945 nextToken(); 946 return Float.NaN; 947 } 948 try { 949 final float result = Float.parseFloat(currentToken); 950 nextToken(); 951 return result; 952 } catch (NumberFormatException e) { 953 throw floatParseException(e); 954 } 955 } 956 957 /** 958 * If the next token is a float, consume it and return {@code true}. 959 * Otherwise, return {@code false} without doing anything. 960 */ tryConsumeFloat()961 public boolean tryConsumeFloat() { 962 try { 963 consumeFloat(); 964 return true; 965 } catch (ParseException e) { 966 return false; 967 } 968 } 969 970 /** 971 * If the next token is a boolean, consume it and return its value. 972 * Otherwise, throw a {@link ParseException}. 973 */ consumeBoolean()974 public boolean consumeBoolean() throws ParseException { 975 if (currentToken.equals("true") 976 || currentToken.equals("True") 977 || currentToken.equals("t") 978 || currentToken.equals("1")) { 979 nextToken(); 980 return true; 981 } else if (currentToken.equals("false") 982 || currentToken.equals("False") 983 || currentToken.equals("f") 984 || currentToken.equals("0")) { 985 nextToken(); 986 return false; 987 } else { 988 throw parseException("Expected \"true\" or \"false\"."); 989 } 990 } 991 992 /** 993 * If the next token is a string, consume it and return its (unescaped) 994 * value. Otherwise, throw a {@link ParseException}. 995 */ consumeString()996 public String consumeString() throws ParseException { 997 return consumeByteString().toStringUtf8(); 998 } 999 1000 /** 1001 * If the next token is a string, consume it and return true. Otherwise, 1002 * return false. 1003 */ tryConsumeString()1004 public boolean tryConsumeString() { 1005 try { 1006 consumeString(); 1007 return true; 1008 } catch (ParseException e) { 1009 return false; 1010 } 1011 } 1012 1013 /** 1014 * If the next token is a string, consume it, unescape it as a 1015 * {@link ByteString}, and return it. Otherwise, throw a 1016 * {@link ParseException}. 1017 */ consumeByteString()1018 public ByteString consumeByteString() throws ParseException { 1019 List<ByteString> list = new ArrayList<ByteString>(); 1020 consumeByteString(list); 1021 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 1022 consumeByteString(list); 1023 } 1024 return ByteString.copyFrom(list); 1025 } 1026 1027 /** 1028 * Like {@link #consumeByteString()} but adds each token of the string to 1029 * the given list. String literals (whether bytes or text) may come in 1030 * multiple adjacent tokens which are automatically concatenated, like in 1031 * C or Python. 1032 */ consumeByteString(List<ByteString> list)1033 private void consumeByteString(List<ByteString> list) 1034 throws ParseException { 1035 final char quote = currentToken.length() > 0 1036 ? currentToken.charAt(0) 1037 : '\0'; 1038 if (quote != '\"' && quote != '\'') { 1039 throw parseException("Expected string."); 1040 } 1041 1042 if (currentToken.length() < 2 1043 || currentToken.charAt(currentToken.length() - 1) != quote) { 1044 throw parseException("String missing ending quote."); 1045 } 1046 1047 try { 1048 final String escaped = 1049 currentToken.substring(1, currentToken.length() - 1); 1050 final ByteString result = unescapeBytes(escaped); 1051 nextToken(); 1052 list.add(result); 1053 } catch (InvalidEscapeSequenceException e) { 1054 throw parseException(e.getMessage()); 1055 } 1056 } 1057 1058 /** 1059 * Returns a {@link ParseException} with the current line and column 1060 * numbers in the description, suitable for throwing. 1061 */ parseException(final String description)1062 public ParseException parseException(final String description) { 1063 // Note: People generally prefer one-based line and column numbers. 1064 return new ParseException( 1065 line + 1, column + 1, description); 1066 } 1067 1068 /** 1069 * Returns a {@link ParseException} with the line and column numbers of 1070 * the previous token in the description, suitable for throwing. 1071 */ parseExceptionPreviousToken( final String description)1072 public ParseException parseExceptionPreviousToken( 1073 final String description) { 1074 // Note: People generally prefer one-based line and column numbers. 1075 return new ParseException( 1076 previousLine + 1, previousColumn + 1, description); 1077 } 1078 1079 /** 1080 * Constructs an appropriate {@link ParseException} for the given 1081 * {@code NumberFormatException} when trying to parse an integer. 1082 */ integerParseException( final NumberFormatException e)1083 private ParseException integerParseException( 1084 final NumberFormatException e) { 1085 return parseException("Couldn't parse integer: " + e.getMessage()); 1086 } 1087 1088 /** 1089 * Constructs an appropriate {@link ParseException} for the given 1090 * {@code NumberFormatException} when trying to parse a float or double. 1091 */ floatParseException(final NumberFormatException e)1092 private ParseException floatParseException(final NumberFormatException e) { 1093 return parseException("Couldn't parse number: " + e.getMessage()); 1094 } 1095 1096 /** 1097 * Returns a {@link UnknownFieldParseException} with the line and column 1098 * numbers of the previous token in the description, and the unknown field 1099 * name, suitable for throwing. 1100 */ unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1101 public UnknownFieldParseException unknownFieldParseExceptionPreviousToken( 1102 final String unknownField, final String description) { 1103 // Note: People generally prefer one-based line and column numbers. 1104 return new UnknownFieldParseException( 1105 previousLine + 1, previousColumn + 1, unknownField, description); 1106 } 1107 } 1108 1109 /** Thrown when parsing an invalid text format message. */ 1110 public static class ParseException extends IOException { 1111 private static final long serialVersionUID = 3196188060225107702L; 1112 1113 private final int line; 1114 private final int column; 1115 1116 /** Create a new instance, with -1 as the line and column numbers. */ ParseException(final String message)1117 public ParseException(final String message) { 1118 this(-1, -1, message); 1119 } 1120 1121 /** 1122 * Create a new instance 1123 * 1124 * @param line the line number where the parse error occurred, 1125 * using 1-offset. 1126 * @param column the column number where the parser error occurred, 1127 * using 1-offset. 1128 */ ParseException(final int line, final int column, final String message)1129 public ParseException(final int line, final int column, 1130 final String message) { 1131 super(Integer.toString(line) + ":" + column + ": " + message); 1132 this.line = line; 1133 this.column = column; 1134 } 1135 1136 /** 1137 * Return the line where the parse exception occurred, or -1 when 1138 * none is provided. The value is specified as 1-offset, so the first 1139 * line is line 1. 1140 */ getLine()1141 public int getLine() { 1142 return line; 1143 } 1144 1145 /** 1146 * Return the column where the parse exception occurred, or -1 when 1147 * none is provided. The value is specified as 1-offset, so the first 1148 * line is line 1. 1149 */ getColumn()1150 public int getColumn() { 1151 return column; 1152 } 1153 } 1154 1155 /** 1156 * Thrown when encountering an unknown field while parsing 1157 * a text format message. 1158 */ 1159 public static class UnknownFieldParseException extends ParseException { 1160 private final String unknownField; 1161 1162 /** 1163 * Create a new instance, with -1 as the line and column numbers, and an 1164 * empty unknown field name. 1165 */ UnknownFieldParseException(final String message)1166 public UnknownFieldParseException(final String message) { 1167 this(-1, -1, "", message); 1168 } 1169 1170 /** 1171 * Create a new instance 1172 * 1173 * @param line the line number where the parse error occurred, 1174 * using 1-offset. 1175 * @param column the column number where the parser error occurred, 1176 * using 1-offset. 1177 * @param unknownField the name of the unknown field found while parsing. 1178 */ UnknownFieldParseException(final int line, final int column, final String unknownField, final String message)1179 public UnknownFieldParseException(final int line, final int column, 1180 final String unknownField, final String message) { 1181 super(line, column, message); 1182 this.unknownField = unknownField; 1183 } 1184 1185 /** 1186 * Return the name of the unknown field encountered while parsing the 1187 * protocol buffer string. 1188 */ getUnknownField()1189 public String getUnknownField() { 1190 return unknownField; 1191 } 1192 } 1193 1194 private static final Parser PARSER = Parser.newBuilder().build(); 1195 1196 /** 1197 * Return a {@link Parser} instance which can parse text-format 1198 * messages. The returned instance is thread-safe. 1199 */ getParser()1200 public static Parser getParser() { 1201 return PARSER; 1202 } 1203 1204 /** 1205 * Parse a text-format message from {@code input} and merge the contents 1206 * into {@code builder}. 1207 */ merge(final Readable input, final Message.Builder builder)1208 public static void merge(final Readable input, 1209 final Message.Builder builder) 1210 throws IOException { 1211 PARSER.merge(input, builder); 1212 } 1213 1214 /** 1215 * Parse a text-format message from {@code input} and merge the contents 1216 * into {@code builder}. 1217 */ merge(final CharSequence input, final Message.Builder builder)1218 public static void merge(final CharSequence input, 1219 final Message.Builder builder) 1220 throws ParseException { 1221 PARSER.merge(input, builder); 1222 } 1223 1224 /** 1225 * Parse a text-format message from {@code input} and merge the contents 1226 * into {@code builder}. Extensions will be recognized if they are 1227 * registered in {@code extensionRegistry}. 1228 */ merge(final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1229 public static void merge(final Readable input, 1230 final ExtensionRegistry extensionRegistry, 1231 final Message.Builder builder) 1232 throws IOException { 1233 PARSER.merge(input, extensionRegistry, builder); 1234 } 1235 1236 1237 /** 1238 * Parse a text-format message from {@code input} and merge the contents 1239 * into {@code builder}. Extensions will be recognized if they are 1240 * registered in {@code extensionRegistry}. 1241 */ merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1242 public static void merge(final CharSequence input, 1243 final ExtensionRegistry extensionRegistry, 1244 final Message.Builder builder) 1245 throws ParseException { 1246 PARSER.merge(input, extensionRegistry, builder); 1247 } 1248 1249 1250 /** 1251 * Parser for text-format proto2 instances. This class is thread-safe. 1252 * The implementation largely follows google/protobuf/text_format.cc. 1253 * 1254 * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or 1255 * {@link Builder} to control the parser behavior. 1256 */ 1257 public static class Parser { 1258 /** 1259 * Determines if repeated values for non-repeated fields and 1260 * oneofs are permitted. For example, given required/optional field "foo" 1261 * and a oneof containing "baz" and "qux": 1262 * <ul> 1263 * <li>"foo: 1 foo: 2" 1264 * <li>"baz: 1 qux: 2" 1265 * <li>merging "foo: 2" into a proto in which foo is already set, or 1266 * <li>merging "qux: 2" into a proto in which baz is already set. 1267 * </ul> 1268 */ 1269 public enum SingularOverwritePolicy { 1270 /** The last value is retained. */ 1271 ALLOW_SINGULAR_OVERWRITES, 1272 /** An error is issued. */ 1273 FORBID_SINGULAR_OVERWRITES 1274 } 1275 1276 private final boolean allowUnknownFields; 1277 private final SingularOverwritePolicy singularOverwritePolicy; 1278 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder; 1279 Parser( boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1280 private Parser( 1281 boolean allowUnknownFields, SingularOverwritePolicy singularOverwritePolicy, 1282 TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1283 this.allowUnknownFields = allowUnknownFields; 1284 this.singularOverwritePolicy = singularOverwritePolicy; 1285 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1286 } 1287 1288 /** 1289 * Returns a new instance of {@link Builder}. 1290 */ newBuilder()1291 public static Builder newBuilder() { 1292 return new Builder(); 1293 } 1294 1295 /** 1296 * Builder that can be used to obtain new instances of {@link Parser}. 1297 */ 1298 public static class Builder { 1299 private boolean allowUnknownFields = false; 1300 private SingularOverwritePolicy singularOverwritePolicy = 1301 SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; 1302 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null; 1303 1304 1305 /** 1306 * Sets parser behavior when a non-repeated field appears more than once. 1307 */ setSingularOverwritePolicy(SingularOverwritePolicy p)1308 public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { 1309 this.singularOverwritePolicy = p; 1310 return this; 1311 } 1312 setParseInfoTreeBuilder( TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1313 public Builder setParseInfoTreeBuilder( 1314 TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1315 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1316 return this; 1317 } 1318 build()1319 public Parser build() { 1320 return new Parser( 1321 allowUnknownFields, singularOverwritePolicy, parseInfoTreeBuilder); 1322 } 1323 } 1324 1325 /** 1326 * Parse a text-format message from {@code input} and merge the contents 1327 * into {@code builder}. 1328 */ merge(final Readable input, final Message.Builder builder)1329 public void merge(final Readable input, 1330 final Message.Builder builder) 1331 throws IOException { 1332 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1333 } 1334 1335 /** 1336 * Parse a text-format message from {@code input} and merge the contents 1337 * into {@code builder}. 1338 */ merge(final CharSequence input, final Message.Builder builder)1339 public void merge(final CharSequence input, 1340 final Message.Builder builder) 1341 throws ParseException { 1342 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1343 } 1344 1345 /** 1346 * Parse a text-format message from {@code input} and merge the contents 1347 * into {@code builder}. Extensions will be recognized if they are 1348 * registered in {@code extensionRegistry}. 1349 */ merge(final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1350 public void merge(final Readable input, 1351 final ExtensionRegistry extensionRegistry, 1352 final Message.Builder builder) 1353 throws IOException { 1354 // Read the entire input to a String then parse that. 1355 1356 // If StreamTokenizer were not quite so crippled, or if there were a kind 1357 // of Reader that could read in chunks that match some particular regex, 1358 // or if we wanted to write a custom Reader to tokenize our stream, then 1359 // we would not have to read to one big String. Alas, none of these is 1360 // the case. Oh well. 1361 1362 merge(toStringBuilder(input), extensionRegistry, builder); 1363 } 1364 1365 1366 private static final int BUFFER_SIZE = 4096; 1367 1368 // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) 1369 // overhead is worthwhile toStringBuilder(final Readable input)1370 private static StringBuilder toStringBuilder(final Readable input) 1371 throws IOException { 1372 final StringBuilder text = new StringBuilder(); 1373 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1374 while (true) { 1375 final int n = input.read(buffer); 1376 if (n == -1) { 1377 break; 1378 } 1379 buffer.flip(); 1380 text.append(buffer, 0, n); 1381 } 1382 return text; 1383 } 1384 1385 // Check both unknown fields and unknown extensions and log warming messages 1386 // or throw exceptions according to the flag. checkUnknownFields(final List<String> unknownFields)1387 private void checkUnknownFields(final List<String> unknownFields) 1388 throws ParseException { 1389 if (unknownFields.isEmpty()) { 1390 return; 1391 } 1392 1393 StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:"); 1394 for (String field : unknownFields) { 1395 msg.append('\n').append(field); 1396 } 1397 1398 if (allowUnknownFields) { 1399 logger.warning(msg.toString()); 1400 } else { 1401 String[] lineColumn = unknownFields.get(0).split(":"); 1402 throw new ParseException(Integer.valueOf(lineColumn[0]), 1403 Integer.valueOf(lineColumn[1]), msg.toString()); 1404 } 1405 } 1406 1407 /** 1408 * Parse a text-format message from {@code input} and merge the contents 1409 * into {@code builder}. Extensions will be recognized if they are 1410 * registered in {@code extensionRegistry}. 1411 */ merge(final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1412 public void merge(final CharSequence input, 1413 final ExtensionRegistry extensionRegistry, 1414 final Message.Builder builder) 1415 throws ParseException { 1416 final Tokenizer tokenizer = new Tokenizer(input); 1417 MessageReflection.BuilderAdapter target = 1418 new MessageReflection.BuilderAdapter(builder); 1419 1420 List<String> unknownFields = new ArrayList<String>(); 1421 1422 while (!tokenizer.atEnd()) { 1423 mergeField(tokenizer, extensionRegistry, target, unknownFields); 1424 } 1425 1426 checkUnknownFields(unknownFields); 1427 } 1428 1429 1430 /** 1431 * Parse a single field from {@code tokenizer} and merge it into 1432 * {@code builder}. 1433 */ mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<String> unknownFields)1434 private void mergeField(final Tokenizer tokenizer, 1435 final ExtensionRegistry extensionRegistry, 1436 final MessageReflection.MergeTarget target, 1437 List<String> unknownFields) 1438 throws ParseException { 1439 mergeField(tokenizer, extensionRegistry, target, parseInfoTreeBuilder, 1440 unknownFields); 1441 } 1442 1443 /** 1444 * Parse a single field from {@code tokenizer} and merge it into 1445 * {@code builder}. 1446 */ mergeField(final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<String> unknownFields)1447 private void mergeField(final Tokenizer tokenizer, 1448 final ExtensionRegistry extensionRegistry, 1449 final MessageReflection.MergeTarget target, 1450 TextFormatParseInfoTree.Builder parseTreeBuilder, 1451 List<String> unknownFields) 1452 throws ParseException { 1453 FieldDescriptor field = null; 1454 int startLine = tokenizer.getLine(); 1455 int startColumn = tokenizer.getColumn(); 1456 final Descriptor type = target.getDescriptorForType(); 1457 ExtensionRegistry.ExtensionInfo extension = null; 1458 1459 if (tokenizer.tryConsume("[")) { 1460 // An extension. 1461 final StringBuilder name = 1462 new StringBuilder(tokenizer.consumeIdentifier()); 1463 while (tokenizer.tryConsume(".")) { 1464 name.append('.'); 1465 name.append(tokenizer.consumeIdentifier()); 1466 } 1467 1468 extension = target.findExtensionByName( 1469 extensionRegistry, name.toString()); 1470 1471 if (extension == null) { 1472 unknownFields.add((tokenizer.getPreviousLine() + 1) + ":" + 1473 (tokenizer.getPreviousColumn() + 1) + ":\t" + 1474 type.getFullName() + ".[" + name + "]"); 1475 } else { 1476 if (extension.descriptor.getContainingType() != type) { 1477 throw tokenizer.parseExceptionPreviousToken( 1478 "Extension \"" + name + "\" does not extend message type \"" 1479 + type.getFullName() + "\"."); 1480 } 1481 field = extension.descriptor; 1482 } 1483 1484 tokenizer.consume("]"); 1485 } else { 1486 final String name = tokenizer.consumeIdentifier(); 1487 field = type.findFieldByName(name); 1488 1489 // Group names are expected to be capitalized as they appear in the 1490 // .proto file, which actually matches their type names, not their field 1491 // names. 1492 if (field == null) { 1493 // Explicitly specify US locale so that this code does not break when 1494 // executing in Turkey. 1495 final String lowerName = name.toLowerCase(Locale.US); 1496 field = type.findFieldByName(lowerName); 1497 // If the case-insensitive match worked but the field is NOT a group, 1498 if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { 1499 field = null; 1500 } 1501 } 1502 // Again, special-case group names as described above. 1503 if (field != null && field.getType() == FieldDescriptor.Type.GROUP 1504 && !field.getMessageType().getName().equals(name)) { 1505 field = null; 1506 } 1507 1508 if (field == null) { 1509 unknownFields.add((tokenizer.getPreviousLine() + 1) + ":" + 1510 (tokenizer.getPreviousColumn() + 1) + ":\t" + 1511 type.getFullName() + "." + name); 1512 } 1513 } 1514 1515 // Skips unknown fields. 1516 if (field == null) { 1517 // Try to guess the type of this field. 1518 // If this field is not a message, there should be a ":" between the 1519 // field name and the field value and also the field value should not 1520 // start with "{" or "<" which indicates the beginning of a message body. 1521 // If there is no ":" or there is a "{" or "<" after ":", this field has 1522 // to be a message or the input is ill-formed. 1523 if (tokenizer.tryConsume(":") 1524 && !tokenizer.lookingAt("{") 1525 && !tokenizer.lookingAt("<")) { 1526 skipFieldValue(tokenizer); 1527 } else { 1528 skipFieldMessage(tokenizer); 1529 } 1530 return; 1531 } 1532 1533 // Handle potential ':'. 1534 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1535 tokenizer.tryConsume(":"); // optional 1536 if (parseTreeBuilder != null) { 1537 TextFormatParseInfoTree.Builder childParseTreeBuilder = 1538 parseTreeBuilder.getBuilderForSubMessageField(field); 1539 consumeFieldValues(tokenizer, extensionRegistry, target, field, extension, 1540 childParseTreeBuilder, unknownFields); 1541 } else { 1542 consumeFieldValues(tokenizer, extensionRegistry, target, field, extension, 1543 parseTreeBuilder, unknownFields); 1544 } 1545 } else { 1546 tokenizer.consume(":"); // required 1547 consumeFieldValues(tokenizer, extensionRegistry, target, field, 1548 extension, parseTreeBuilder, unknownFields); 1549 } 1550 1551 if (parseTreeBuilder != null) { 1552 parseTreeBuilder.setLocation( 1553 field, TextFormatParseLocation.create(startLine, startColumn)); 1554 } 1555 1556 // For historical reasons, fields may optionally be separated by commas or 1557 // semicolons. 1558 if (!tokenizer.tryConsume(";")) { 1559 tokenizer.tryConsume(","); 1560 } 1561 } 1562 1563 /** 1564 * Parse a one or more field values from {@code tokenizer} and merge it into 1565 * {@code builder}. 1566 */ consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<String> unknownFields)1567 private void consumeFieldValues( 1568 final Tokenizer tokenizer, 1569 final ExtensionRegistry extensionRegistry, 1570 final MessageReflection.MergeTarget target, 1571 final FieldDescriptor field, 1572 final ExtensionRegistry.ExtensionInfo extension, 1573 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1574 List<String> unknownFields) 1575 throws ParseException { 1576 // Support specifying repeated field values as a comma-separated list. 1577 // Ex."foo: [1, 2, 3]" 1578 if (field.isRepeated() && tokenizer.tryConsume("[")) { 1579 while (true) { 1580 consumeFieldValue(tokenizer, extensionRegistry, target, field, extension, 1581 parseTreeBuilder, unknownFields); 1582 if (tokenizer.tryConsume("]")) { 1583 // End of list. 1584 break; 1585 } 1586 tokenizer.consume(","); 1587 } 1588 } else { 1589 consumeFieldValue(tokenizer, extensionRegistry, target, field, 1590 extension, parseTreeBuilder, unknownFields); 1591 } 1592 } 1593 1594 /** 1595 * Parse a single field value from {@code tokenizer} and merge it into 1596 * {@code builder}. 1597 */ consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<String> unknownFields)1598 private void consumeFieldValue( 1599 final Tokenizer tokenizer, 1600 final ExtensionRegistry extensionRegistry, 1601 final MessageReflection.MergeTarget target, 1602 final FieldDescriptor field, 1603 final ExtensionRegistry.ExtensionInfo extension, 1604 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1605 List<String> unknownFields) 1606 throws ParseException { 1607 Object value = null; 1608 1609 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1610 final String endToken; 1611 if (tokenizer.tryConsume("<")) { 1612 endToken = ">"; 1613 } else { 1614 tokenizer.consume("{"); 1615 endToken = "}"; 1616 } 1617 1618 final MessageReflection.MergeTarget subField; 1619 subField = target.newMergeTargetForField(field, 1620 (extension == null) ? null : extension.defaultInstance); 1621 1622 while (!tokenizer.tryConsume(endToken)) { 1623 if (tokenizer.atEnd()) { 1624 throw tokenizer.parseException( 1625 "Expected \"" + endToken + "\"."); 1626 } 1627 mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, 1628 unknownFields); 1629 } 1630 1631 value = subField.finish(); 1632 1633 } else { 1634 switch (field.getType()) { 1635 case INT32: 1636 case SINT32: 1637 case SFIXED32: 1638 value = tokenizer.consumeInt32(); 1639 break; 1640 1641 case INT64: 1642 case SINT64: 1643 case SFIXED64: 1644 value = tokenizer.consumeInt64(); 1645 break; 1646 1647 case UINT32: 1648 case FIXED32: 1649 value = tokenizer.consumeUInt32(); 1650 break; 1651 1652 case UINT64: 1653 case FIXED64: 1654 value = tokenizer.consumeUInt64(); 1655 break; 1656 1657 case FLOAT: 1658 value = tokenizer.consumeFloat(); 1659 break; 1660 1661 case DOUBLE: 1662 value = tokenizer.consumeDouble(); 1663 break; 1664 1665 case BOOL: 1666 value = tokenizer.consumeBoolean(); 1667 break; 1668 1669 case STRING: 1670 value = tokenizer.consumeString(); 1671 break; 1672 1673 case BYTES: 1674 value = tokenizer.consumeByteString(); 1675 break; 1676 1677 case ENUM: 1678 final EnumDescriptor enumType = field.getEnumType(); 1679 1680 if (tokenizer.lookingAtInteger()) { 1681 final int number = tokenizer.consumeInt32(); 1682 value = enumType.findValueByNumber(number); 1683 if (value == null) { 1684 throw tokenizer.parseExceptionPreviousToken( 1685 "Enum type \"" + enumType.getFullName() 1686 + "\" has no value with number " + number + '.'); 1687 } 1688 } else { 1689 final String id = tokenizer.consumeIdentifier(); 1690 value = enumType.findValueByName(id); 1691 if (value == null) { 1692 throw tokenizer.parseExceptionPreviousToken( 1693 "Enum type \"" + enumType.getFullName() 1694 + "\" has no value named \"" + id + "\"."); 1695 } 1696 } 1697 1698 break; 1699 1700 case MESSAGE: 1701 case GROUP: 1702 throw new RuntimeException("Can't get here."); 1703 } 1704 } 1705 1706 if (field.isRepeated()) { 1707 target.addRepeatedField(field, value); 1708 } else if ((singularOverwritePolicy 1709 == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) 1710 && target.hasField(field)) { 1711 throw tokenizer.parseExceptionPreviousToken("Non-repeated field \"" 1712 + field.getFullName() + "\" cannot be overwritten."); 1713 } else if ((singularOverwritePolicy 1714 == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES) 1715 && field.getContainingOneof() != null 1716 && target.hasOneof(field.getContainingOneof())) { 1717 Descriptors.OneofDescriptor oneof = field.getContainingOneof(); 1718 throw tokenizer.parseExceptionPreviousToken("Field \"" 1719 + field.getFullName() + "\" is specified along with field \"" 1720 + target.getOneofFieldDescriptor(oneof).getFullName() 1721 + "\", another member of oneof \"" + oneof.getName() + "\"."); 1722 } else { 1723 target.setField(field, value); 1724 } 1725 } 1726 1727 /** 1728 * Skips the next field including the field's name and value. 1729 */ skipField(Tokenizer tokenizer)1730 private void skipField(Tokenizer tokenizer) throws ParseException { 1731 if (tokenizer.tryConsume("[")) { 1732 // Extension name. 1733 do { 1734 tokenizer.consumeIdentifier(); 1735 } while (tokenizer.tryConsume(".")); 1736 tokenizer.consume("]"); 1737 } else { 1738 tokenizer.consumeIdentifier(); 1739 } 1740 1741 // Try to guess the type of this field. 1742 // If this field is not a message, there should be a ":" between the 1743 // field name and the field value and also the field value should not 1744 // start with "{" or "<" which indicates the beginning of a message body. 1745 // If there is no ":" or there is a "{" or "<" after ":", this field has 1746 // to be a message or the input is ill-formed. 1747 if (tokenizer.tryConsume(":") 1748 && !tokenizer.lookingAt("<") 1749 && !tokenizer.lookingAt("{")) { 1750 skipFieldValue(tokenizer); 1751 } else { 1752 skipFieldMessage(tokenizer); 1753 } 1754 // For historical reasons, fields may optionally be separated by commas or 1755 // semicolons. 1756 if (!tokenizer.tryConsume(";")) { 1757 tokenizer.tryConsume(","); 1758 } 1759 } 1760 1761 /** 1762 * Skips the whole body of a message including the beginning delimiter and 1763 * the ending delimiter. 1764 */ skipFieldMessage(Tokenizer tokenizer)1765 private void skipFieldMessage(Tokenizer tokenizer) throws ParseException { 1766 final String delimiter; 1767 if (tokenizer.tryConsume("<")) { 1768 delimiter = ">"; 1769 } else { 1770 tokenizer.consume("{"); 1771 delimiter = "}"; 1772 } 1773 while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { 1774 skipField(tokenizer); 1775 } 1776 tokenizer.consume(delimiter); 1777 } 1778 1779 /** 1780 * Skips a field value. 1781 */ skipFieldValue(Tokenizer tokenizer)1782 private void skipFieldValue(Tokenizer tokenizer) throws ParseException { 1783 if (tokenizer.tryConsumeString()) { 1784 while (tokenizer.tryConsumeString()) {} 1785 return; 1786 } 1787 if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean 1788 && !tokenizer.tryConsumeInt64() // includes int32 1789 && !tokenizer.tryConsumeUInt64() // includes uint32 1790 && !tokenizer.tryConsumeDouble() 1791 && !tokenizer.tryConsumeFloat()) { 1792 throw tokenizer.parseException( 1793 "Invalid field value: " + tokenizer.currentToken); 1794 } 1795 } 1796 } 1797 1798 // ================================================================= 1799 // Utility functions 1800 // 1801 // Some of these methods are package-private because Descriptors.java uses 1802 // them. 1803 1804 /** 1805 * Escapes bytes in the format used in protocol buffer text format, which 1806 * is the same as the format used for C string literals. All bytes 1807 * that are not printable 7-bit ASCII characters are escaped, as well as 1808 * backslash, single-quote, and double-quote characters. Characters for 1809 * which no defined short-hand escape sequence is defined will be escaped 1810 * using 3-digit octal sequences. 1811 */ escapeBytes(ByteString input)1812 public static String escapeBytes(ByteString input) { 1813 return TextFormatEscaper.escapeBytes(input); 1814 } 1815 1816 /** 1817 * Like {@link #escapeBytes(ByteString)}, but used for byte array. 1818 */ escapeBytes(byte[] input)1819 public static String escapeBytes(byte[] input) { 1820 return TextFormatEscaper.escapeBytes(input); 1821 } 1822 1823 /** 1824 * Un-escape a byte sequence as escaped using 1825 * {@link #escapeBytes(ByteString)}. Two-digit hex escapes (starting with 1826 * "\x") are also recognized. 1827 */ unescapeBytes(final CharSequence charString)1828 public static ByteString unescapeBytes(final CharSequence charString) 1829 throws InvalidEscapeSequenceException { 1830 // First convert the Java character sequence to UTF-8 bytes. 1831 ByteString input = ByteString.copyFromUtf8(charString.toString()); 1832 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 1833 // escapes can all be expressed with ASCII characters, so it is safe to 1834 // operate on bytes here. 1835 // 1836 // Unescaping the input byte array will result in a byte sequence that's no 1837 // longer than the input. That's because each escape sequence is between 1838 // two and four bytes long and stands for a single byte. 1839 final byte[] result = new byte[input.size()]; 1840 int pos = 0; 1841 for (int i = 0; i < input.size(); i++) { 1842 byte c = input.byteAt(i); 1843 if (c == '\\') { 1844 if (i + 1 < input.size()) { 1845 ++i; 1846 c = input.byteAt(i); 1847 if (isOctal(c)) { 1848 // Octal escape. 1849 int code = digitValue(c); 1850 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 1851 ++i; 1852 code = code * 8 + digitValue(input.byteAt(i)); 1853 } 1854 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 1855 ++i; 1856 code = code * 8 + digitValue(input.byteAt(i)); 1857 } 1858 // TODO: Check that 0 <= code && code <= 0xFF. 1859 result[pos++] = (byte) code; 1860 } else { 1861 switch (c) { 1862 case 'a' : result[pos++] = 0x07; break; 1863 case 'b' : result[pos++] = '\b'; break; 1864 case 'f' : result[pos++] = '\f'; break; 1865 case 'n' : result[pos++] = '\n'; break; 1866 case 'r' : result[pos++] = '\r'; break; 1867 case 't' : result[pos++] = '\t'; break; 1868 case 'v' : result[pos++] = 0x0b; break; 1869 case '\\': result[pos++] = '\\'; break; 1870 case '\'': result[pos++] = '\''; break; 1871 case '"' : result[pos++] = '\"'; break; 1872 1873 case 'x': 1874 // hex escape 1875 int code = 0; 1876 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 1877 ++i; 1878 code = digitValue(input.byteAt(i)); 1879 } else { 1880 throw new InvalidEscapeSequenceException( 1881 "Invalid escape sequence: '\\x' with no digits"); 1882 } 1883 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 1884 ++i; 1885 code = code * 16 + digitValue(input.byteAt(i)); 1886 } 1887 result[pos++] = (byte) code; 1888 break; 1889 1890 default: 1891 throw new InvalidEscapeSequenceException( 1892 "Invalid escape sequence: '\\" + (char) c + '\''); 1893 } 1894 } 1895 } else { 1896 throw new InvalidEscapeSequenceException( 1897 "Invalid escape sequence: '\\' at end of string."); 1898 } 1899 } else { 1900 result[pos++] = c; 1901 } 1902 } 1903 1904 return result.length == pos 1905 ? ByteString.wrap(result) // This reference has not been out of our control. 1906 : ByteString.copyFrom(result, 0, pos); 1907 } 1908 1909 /** 1910 * Thrown by {@link TextFormat#unescapeBytes} and 1911 * {@link TextFormat#unescapeText} when an invalid escape sequence is seen. 1912 */ 1913 public static class InvalidEscapeSequenceException extends IOException { 1914 private static final long serialVersionUID = -8164033650142593304L; 1915 InvalidEscapeSequenceException(final String description)1916 InvalidEscapeSequenceException(final String description) { 1917 super(description); 1918 } 1919 } 1920 1921 /** 1922 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. 1923 * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped 1924 * individually as a 3-digit octal escape. Yes, it's weird. 1925 */ escapeText(final String input)1926 static String escapeText(final String input) { 1927 return escapeBytes(ByteString.copyFromUtf8(input)); 1928 } 1929 1930 /** 1931 * Escape double quotes and backslashes in a String for unicode output of a message. 1932 */ escapeDoubleQuotesAndBackslashes(final String input)1933 public static String escapeDoubleQuotesAndBackslashes(final String input) { 1934 return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input); 1935 } 1936 1937 /** 1938 * Un-escape a text string as escaped using {@link #escapeText(String)}. 1939 * Two-digit hex escapes (starting with "\x") are also recognized. 1940 */ unescapeText(final String input)1941 static String unescapeText(final String input) 1942 throws InvalidEscapeSequenceException { 1943 return unescapeBytes(input).toStringUtf8(); 1944 } 1945 1946 /** Is this an octal digit? */ isOctal(final byte c)1947 private static boolean isOctal(final byte c) { 1948 return '0' <= c && c <= '7'; 1949 } 1950 1951 /** Is this a hex digit? */ isHex(final byte c)1952 private static boolean isHex(final byte c) { 1953 return ('0' <= c && c <= '9') 1954 || ('a' <= c && c <= 'f') 1955 || ('A' <= c && c <= 'F'); 1956 } 1957 1958 /** 1959 * Interpret a character as a digit (in any base up to 36) and return the 1960 * numeric value. This is like {@code Character.digit()} but we don't accept 1961 * non-ASCII digits. 1962 */ digitValue(final byte c)1963 private static int digitValue(final byte c) { 1964 if ('0' <= c && c <= '9') { 1965 return c - '0'; 1966 } else if ('a' <= c && c <= 'z') { 1967 return c - 'a' + 10; 1968 } else { 1969 return c - 'A' + 10; 1970 } 1971 } 1972 1973 /** 1974 * Parse a 32-bit signed integer from the text. Unlike the Java standard 1975 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1976 * and "0" to signify hexadecimal and octal numbers, respectively. 1977 */ parseInt32(final String text)1978 static int parseInt32(final String text) throws NumberFormatException { 1979 return (int) parseInteger(text, true, false); 1980 } 1981 1982 /** 1983 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard 1984 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1985 * and "0" to signify hexadecimal and octal numbers, respectively. The 1986 * result is coerced to a (signed) {@code int} when returned since Java has 1987 * no unsigned integer type. 1988 */ parseUInt32(final String text)1989 static int parseUInt32(final String text) throws NumberFormatException { 1990 return (int) parseInteger(text, false, false); 1991 } 1992 1993 /** 1994 * Parse a 64-bit signed integer from the text. Unlike the Java standard 1995 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 1996 * and "0" to signify hexadecimal and octal numbers, respectively. 1997 */ parseInt64(final String text)1998 static long parseInt64(final String text) throws NumberFormatException { 1999 return parseInteger(text, true, true); 2000 } 2001 2002 /** 2003 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard 2004 * {@code Integer.parseInt()}, this function recognizes the prefixes "0x" 2005 * and "0" to signify hexadecimal and octal numbers, respectively. The 2006 * result is coerced to a (signed) {@code long} when returned since Java has 2007 * no unsigned long type. 2008 */ parseUInt64(final String text)2009 static long parseUInt64(final String text) throws NumberFormatException { 2010 return parseInteger(text, false, true); 2011 } 2012 parseInteger(final String text, final boolean isSigned, final boolean isLong)2013 private static long parseInteger(final String text, 2014 final boolean isSigned, 2015 final boolean isLong) 2016 throws NumberFormatException { 2017 int pos = 0; 2018 2019 boolean negative = false; 2020 if (text.startsWith("-", pos)) { 2021 if (!isSigned) { 2022 throw new NumberFormatException("Number must be positive: " + text); 2023 } 2024 ++pos; 2025 negative = true; 2026 } 2027 2028 int radix = 10; 2029 if (text.startsWith("0x", pos)) { 2030 pos += 2; 2031 radix = 16; 2032 } else if (text.startsWith("0", pos)) { 2033 radix = 8; 2034 } 2035 2036 final String numberText = text.substring(pos); 2037 2038 long result = 0; 2039 if (numberText.length() < 16) { 2040 // Can safely assume no overflow. 2041 result = Long.parseLong(numberText, radix); 2042 if (negative) { 2043 result = -result; 2044 } 2045 2046 // Check bounds. 2047 // No need to check for 64-bit numbers since they'd have to be 16 chars 2048 // or longer to overflow. 2049 if (!isLong) { 2050 if (isSigned) { 2051 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 2052 throw new NumberFormatException( 2053 "Number out of range for 32-bit signed integer: " + text); 2054 } 2055 } else { 2056 if (result >= (1L << 32) || result < 0) { 2057 throw new NumberFormatException( 2058 "Number out of range for 32-bit unsigned integer: " + text); 2059 } 2060 } 2061 } 2062 } else { 2063 BigInteger bigValue = new BigInteger(numberText, radix); 2064 if (negative) { 2065 bigValue = bigValue.negate(); 2066 } 2067 2068 // Check bounds. 2069 if (!isLong) { 2070 if (isSigned) { 2071 if (bigValue.bitLength() > 31) { 2072 throw new NumberFormatException( 2073 "Number out of range for 32-bit signed integer: " + text); 2074 } 2075 } else { 2076 if (bigValue.bitLength() > 32) { 2077 throw new NumberFormatException( 2078 "Number out of range for 32-bit unsigned integer: " + text); 2079 } 2080 } 2081 } else { 2082 if (isSigned) { 2083 if (bigValue.bitLength() > 63) { 2084 throw new NumberFormatException( 2085 "Number out of range for 64-bit signed integer: " + text); 2086 } 2087 } else { 2088 if (bigValue.bitLength() > 64) { 2089 throw new NumberFormatException( 2090 "Number out of range for 64-bit unsigned integer: " + text); 2091 } 2092 } 2093 } 2094 2095 result = bigValue.longValue(); 2096 } 2097 2098 return result; 2099 } 2100 } 2101