1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import static java.nio.charset.StandardCharsets.UTF_8; 34 35 import com.google.protobuf.Descriptors.Descriptor; 36 import com.google.protobuf.Descriptors.EnumDescriptor; 37 import com.google.protobuf.Descriptors.EnumValueDescriptor; 38 import com.google.protobuf.Descriptors.FieldDescriptor; 39 import com.google.protobuf.MessageReflection.MergeTarget; 40 import java.io.IOException; 41 import java.math.BigInteger; 42 import java.nio.CharBuffer; 43 import java.util.ArrayList; 44 import java.util.Collections; 45 import java.util.List; 46 import java.util.Locale; 47 import java.util.Map; 48 import java.util.logging.Logger; 49 import java.util.regex.Matcher; 50 import java.util.regex.Pattern; 51 52 /** 53 * Provide text parsing and formatting support for proto2 instances. The implementation largely 54 * follows google/protobuf/text_format.cc. 55 * 56 * @author wenboz@google.com Wenbo Zhu 57 * @author kenton@google.com Kenton Varda 58 */ 59 public final class TextFormat { TextFormat()60 private TextFormat() {} 61 62 private static final Logger logger = Logger.getLogger(TextFormat.class.getName()); 63 64 /** 65 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 66 * (This representation is the new version of the classic "ProtocolPrinter" output from the 67 * original Protocol Buffer system) 68 * 69 * @deprecated Use {@code printer().print(MessageOrBuilder, Appendable)} 70 */ 71 @Deprecated print(final MessageOrBuilder message, final Appendable output)72 public static void print(final MessageOrBuilder message, final Appendable output) 73 throws IOException { 74 printer().print(message, output); 75 } 76 77 /** 78 * Outputs a textual representation of {@code fields} to {@code output}. 79 * 80 * @deprecated Use {@code printer().print(UnknownFieldSet, Appendable)} 81 */ 82 @Deprecated print(final UnknownFieldSet fields, final Appendable output)83 public static void print(final UnknownFieldSet fields, final Appendable output) 84 throws IOException { 85 printer().print(fields, output); 86 } 87 88 /** 89 * Same as {@code print()}, except that non-ASCII characters are not escaped. 90 * 91 * @deprecated Use {@code printer().escapingNonAscii(false).print(MessageOrBuilder, Appendable)} 92 */ 93 @Deprecated printUnicode(final MessageOrBuilder message, final Appendable output)94 public static void printUnicode(final MessageOrBuilder message, final Appendable output) 95 throws IOException { 96 printer().escapingNonAscii(false).print(message, output); 97 } 98 99 /** 100 * Same as {@code print()}, except that non-ASCII characters are not escaped. 101 * 102 * @deprecated Use {@code printer().escapingNonAscii(false).print(UnknownFieldSet, Appendable)} 103 */ 104 @Deprecated printUnicode(final UnknownFieldSet fields, final Appendable output)105 public static void printUnicode(final UnknownFieldSet fields, final Appendable output) 106 throws IOException { 107 printer().escapingNonAscii(false).print(fields, output); 108 } 109 110 /** 111 * Generates a human readable form of this message, useful for debugging and other purposes, with 112 * no newline characters. This is just a trivial wrapper around 113 * {@link TextFormat.Printer#shortDebugString(MessageOrBuilder)}. 114 */ shortDebugString(final MessageOrBuilder message)115 public static String shortDebugString(final MessageOrBuilder message) { 116 return printer().shortDebugString(message); 117 } 118 119 /** 120 * Generates a human readable form of the field, useful for debugging and other purposes, with 121 * no newline characters. 122 * 123 * @deprecated Use {@code printer().shortDebugString(FieldDescriptor, Object)} 124 */ 125 @Deprecated shortDebugString(final FieldDescriptor field, final Object value)126 public static String shortDebugString(final FieldDescriptor field, final Object value) { 127 return printer().shortDebugString(field, value); 128 } 129 // 130 /** 131 * Generates a human readable form of the unknown fields, useful for debugging and other 132 * purposes, with no newline characters. 133 * 134 * @deprecated Use {@code printer().shortDebugString(UnknownFieldSet)} 135 */ 136 @Deprecated shortDebugString(final UnknownFieldSet fields)137 public static String shortDebugString(final UnknownFieldSet fields) { 138 return printer().shortDebugString(fields); 139 } 140 141 /** 142 * Like {@code print()}, but writes directly to a {@code String} and returns it. 143 * 144 * @deprecated Use {@code message.toString()} 145 */ 146 @Deprecated printToString(final MessageOrBuilder message)147 public static String printToString(final MessageOrBuilder message) { 148 return printer().printToString(message); 149 } 150 151 /** 152 * Like {@code print()}, but writes directly to a {@code String} and returns it. 153 * 154 * @deprecated Use {@link UnknownFieldSet#toString()} 155 */ 156 @Deprecated printToString(final UnknownFieldSet fields)157 public static String printToString(final UnknownFieldSet fields) { 158 return printer().printToString(fields); 159 } 160 161 /** 162 * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not 163 * escaped in backslash+octals. 164 * 165 * @deprecated Use {@code printer().escapingNonAscii(false).printToString(MessageOrBuilder)} 166 */ 167 @Deprecated printToUnicodeString(final MessageOrBuilder message)168 public static String printToUnicodeString(final MessageOrBuilder message) { 169 return printer().escapingNonAscii(false).printToString(message); 170 } 171 172 /** 173 * Same as {@code printToString()}, except that non-ASCII characters in string type fields are 174 * not escaped in backslash+octals. 175 * 176 * @deprecated Use {@code printer().escapingNonAscii(false).printToString(UnknownFieldSet)} 177 */ 178 @Deprecated printToUnicodeString(final UnknownFieldSet fields)179 public static String printToUnicodeString(final UnknownFieldSet fields) { 180 return printer().escapingNonAscii(false).printToString(fields); 181 } 182 // 183 /** @deprecated Use {@code printer().printField(FieldDescriptor, Object, Appendable)} */ 184 @Deprecated printField( final FieldDescriptor field, final Object value, final Appendable output)185 public static void printField( 186 final FieldDescriptor field, final Object value, final Appendable output) 187 throws IOException { 188 printer().printField(field, value, output); 189 } 190 // 191 /** @deprecated Use {@code printer().printFieldToString(FieldDescriptor, Object)} */ 192 @Deprecated printFieldToString(final FieldDescriptor field, final Object value)193 public static String printFieldToString(final FieldDescriptor field, final Object value) { 194 return printer().printFieldToString(field, value); 195 } 196 // 197 /** 198 * Outputs a unicode textual representation of the value of given field value. 199 * 200 * <p>Same as {@code printFieldValue()}, except that non-ASCII characters in string type fields 201 * are not escaped in backslash+octals. 202 * 203 * @deprecated Use {@code printer().escapingNonAscii(false).printFieldValue(FieldDescriptor, 204 * Object, Appendable)} 205 * @param field the descriptor of the field 206 * @param value the value of the field 207 * @param output the output to which to append the formatted value 208 * @throws ClassCastException if the value is not appropriate for the given field descriptor 209 * @throws IOException if there is an exception writing to the output 210 */ 211 @Deprecated printUnicodeFieldValue( final FieldDescriptor field, final Object value, final Appendable output)212 public static void printUnicodeFieldValue( 213 final FieldDescriptor field, final Object value, final Appendable output) 214 throws IOException { 215 printer().escapingNonAscii(false).printFieldValue(field, value, output); 216 } 217 218 /** 219 * Outputs a textual representation of the value of given field value. 220 * 221 * @deprecated Use {@code printer().printFieldValue(FieldDescriptor, Object, Appendable)} 222 * @param field the descriptor of the field 223 * @param value the value of the field 224 * @param output the output to which to append the formatted value 225 * @throws ClassCastException if the value is not appropriate for the given field descriptor 226 * @throws IOException if there is an exception writing to the output 227 */ 228 @Deprecated printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)229 public static void printFieldValue( 230 final FieldDescriptor field, final Object value, final Appendable output) throws IOException { 231 printer().printFieldValue(field, value, output); 232 } 233 234 /** 235 * Outputs a textual representation of the value of an unknown field. 236 * 237 * @param tag the field's tag number 238 * @param value the value of the field 239 * @param output the output to which to append the formatted value 240 * @throws ClassCastException if the value is not appropriate for the given field descriptor 241 * @throws IOException if there is an exception writing to the output 242 */ printUnknownFieldValue( final int tag, final Object value, final Appendable output)243 public static void printUnknownFieldValue( 244 final int tag, final Object value, final Appendable output) throws IOException { 245 printUnknownFieldValue(tag, value, multiLineOutput(output)); 246 } 247 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)248 private static void printUnknownFieldValue( 249 final int tag, final Object value, final TextGenerator generator) throws IOException { 250 switch (WireFormat.getTagWireType(tag)) { 251 case WireFormat.WIRETYPE_VARINT: 252 generator.print(unsignedToString((Long) value)); 253 break; 254 case WireFormat.WIRETYPE_FIXED32: 255 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 256 break; 257 case WireFormat.WIRETYPE_FIXED64: 258 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 259 break; 260 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 261 try { 262 // Try to parse and print the field as an embedded message 263 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 264 generator.print("{"); 265 generator.eol(); 266 generator.indent(); 267 Printer.printUnknownFields(message, generator); 268 generator.outdent(); 269 generator.print("}"); 270 } catch (InvalidProtocolBufferException e) { 271 // If not parseable as a message, print as a String 272 generator.print("\""); 273 generator.print(escapeBytes((ByteString) value)); 274 generator.print("\""); 275 } 276 break; 277 case WireFormat.WIRETYPE_START_GROUP: 278 Printer.printUnknownFields((UnknownFieldSet) value, generator); 279 break; 280 default: 281 throw new IllegalArgumentException("Bad tag: " + tag); 282 } 283 } 284 285 /** Printer instance which escapes non-ASCII characters. */ printer()286 public static Printer printer() { 287 return Printer.DEFAULT; 288 } 289 290 /** Helper class for converting protobufs to text. */ 291 public static final class Printer { 292 293 // Printer instance which escapes non-ASCII characters. 294 private static final Printer DEFAULT = new Printer(true, TypeRegistry.getEmptyTypeRegistry()); 295 296 /** Whether to escape non ASCII characters with backslash and octal. */ 297 private final boolean escapeNonAscii; 298 299 private final TypeRegistry typeRegistry; 300 Printer(boolean escapeNonAscii, TypeRegistry typeRegistry)301 private Printer(boolean escapeNonAscii, TypeRegistry typeRegistry) { 302 this.escapeNonAscii = escapeNonAscii; 303 this.typeRegistry = typeRegistry; 304 } 305 306 /** 307 * Return a new Printer instance with the specified escape mode. 308 * 309 * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the 310 * default behavior. If false, the new Printer will print non-ASCII characters as is. In 311 * either case, the new Printer still escapes newlines and quotes in strings. 312 * @return a new Printer that clones all other configurations from the current {@link Printer}, 313 * with the escape mode set to the given parameter. 314 */ escapingNonAscii(boolean escapeNonAscii)315 public Printer escapingNonAscii(boolean escapeNonAscii) { 316 return new Printer(escapeNonAscii, typeRegistry); 317 } 318 319 /** 320 * Creates a new {@link Printer} using the given typeRegistry. The new Printer clones all other 321 * configurations from the current {@link Printer}. 322 * 323 * @throws IllegalArgumentException if a registry is already set. 324 */ usingTypeRegistry(TypeRegistry typeRegistry)325 public Printer usingTypeRegistry(TypeRegistry typeRegistry) { 326 if (this.typeRegistry != TypeRegistry.getEmptyTypeRegistry()) { 327 throw new IllegalArgumentException("Only one typeRegistry is allowed."); 328 } 329 return new Printer(escapeNonAscii, typeRegistry); 330 } 331 332 /** 333 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 334 * (This representation is the new version of the classic "ProtocolPrinter" output from the 335 * original Protocol Buffer system) 336 */ print(final MessageOrBuilder message, final Appendable output)337 public void print(final MessageOrBuilder message, final Appendable output) throws IOException { 338 print(message, multiLineOutput(output)); 339 } 340 341 /** Outputs a textual representation of {@code fields} to {@code output}. */ print(final UnknownFieldSet fields, final Appendable output)342 public void print(final UnknownFieldSet fields, final Appendable output) throws IOException { 343 printUnknownFields(fields, multiLineOutput(output)); 344 } 345 print(final MessageOrBuilder message, final TextGenerator generator)346 private void print(final MessageOrBuilder message, final TextGenerator generator) 347 throws IOException { 348 if (message.getDescriptorForType().getFullName().equals("google.protobuf.Any") 349 && printAny(message, generator)) { 350 return; 351 } 352 printMessage(message, generator); 353 } 354 355 /** 356 * Attempt to print the 'google.protobuf.Any' message in a human-friendly format. Returns false 357 * if the message isn't a valid 'google.protobuf.Any' message (in which case the message should 358 * be rendered just like a regular message to help debugging). 359 */ printAny(final MessageOrBuilder message, final TextGenerator generator)360 private boolean printAny(final MessageOrBuilder message, final TextGenerator generator) 361 throws IOException { 362 Descriptor messageType = message.getDescriptorForType(); 363 FieldDescriptor typeUrlField = messageType.findFieldByNumber(1); 364 FieldDescriptor valueField = messageType.findFieldByNumber(2); 365 if (typeUrlField == null 366 || typeUrlField.getType() != FieldDescriptor.Type.STRING 367 || valueField == null 368 || valueField.getType() != FieldDescriptor.Type.BYTES) { 369 // The message may look like an Any but isn't actually an Any message (might happen if the 370 // user tries to use DynamicMessage to construct an Any from incomplete Descriptor). 371 return false; 372 } 373 String typeUrl = (String) message.getField(typeUrlField); 374 // If type_url is not set, we will not be able to decode the content of the value, so just 375 // print out the Any like a regular message. 376 if (typeUrl.isEmpty()) { 377 return false; 378 } 379 Object value = message.getField(valueField); 380 381 Message.Builder contentBuilder = null; 382 try { 383 Descriptor contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl); 384 if (contentType == null) { 385 return false; 386 } 387 contentBuilder = DynamicMessage.getDefaultInstance(contentType).newBuilderForType(); 388 contentBuilder.mergeFrom((ByteString) value); 389 } catch (InvalidProtocolBufferException e) { 390 // The value of Any is malformed. We cannot print it out nicely, so fallback to printing out 391 // the type_url and value as bytes. Note that we fail open here to be consistent with 392 // text_format.cc, and also to allow a way for users to inspect the content of the broken 393 // message. 394 return false; 395 } 396 generator.print("["); 397 generator.print(typeUrl); 398 generator.print("] {"); 399 generator.eol(); 400 generator.indent(); 401 print(contentBuilder, generator); 402 generator.outdent(); 403 generator.print("}"); 404 generator.eol(); 405 return true; 406 } 407 printFieldToString(final FieldDescriptor field, final Object value)408 public String printFieldToString(final FieldDescriptor field, final Object value) { 409 try { 410 final StringBuilder text = new StringBuilder(); 411 printField(field, value, text); 412 return text.toString(); 413 } catch (IOException e) { 414 throw new IllegalStateException(e); 415 } 416 } 417 printField(final FieldDescriptor field, final Object value, final Appendable output)418 public void printField(final FieldDescriptor field, final Object value, final Appendable output) 419 throws IOException { 420 printField(field, value, multiLineOutput(output)); 421 } 422 printField( final FieldDescriptor field, final Object value, final TextGenerator generator)423 private void printField( 424 final FieldDescriptor field, final Object value, final TextGenerator generator) 425 throws IOException { 426 // Sort map field entries by key 427 if (field.isMapField()) { 428 List<MapEntryAdapter> adapters = new ArrayList<>(); 429 for (Object entry : (List<?>) value) { 430 adapters.add(new MapEntryAdapter(entry, field)); 431 } 432 Collections.sort(adapters); 433 for (MapEntryAdapter adapter : adapters) { 434 printSingleField(field, adapter.getEntry(), generator); 435 } 436 } else if (field.isRepeated()) { 437 // Repeated field. Print each element. 438 for (Object element : (List<?>) value) { 439 printSingleField(field, element, generator); 440 } 441 } else { 442 printSingleField(field, value, generator); 443 } 444 } 445 446 /** 447 * An adapter class that can take a MapEntry or a MutableMapEntry and returns its key and entry. 448 * This class is created solely for the purpose of sorting map entries by its key and prevent 449 * duplicated logic by having a separate comparator for MapEntry and MutableMapEntry. 450 */ 451 private static class MapEntryAdapter implements Comparable<MapEntryAdapter> { 452 private Object entry; 453 454 @SuppressWarnings({"rawtypes"}) 455 private MapEntry mapEntry; 456 457 458 private final FieldDescriptor.JavaType fieldType; 459 MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor)460 public MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor) { 461 if (entry instanceof MapEntry) { 462 this.mapEntry = (MapEntry) entry; 463 } else { 464 this.entry = entry; 465 } 466 this.fieldType = extractFieldType(fieldDescriptor); 467 } 468 extractFieldType(FieldDescriptor fieldDescriptor)469 private static FieldDescriptor.JavaType extractFieldType(FieldDescriptor fieldDescriptor) { 470 return fieldDescriptor.getMessageType().getFields().get(0).getJavaType(); 471 } 472 getKey()473 public Object getKey() { 474 if (mapEntry != null) { 475 return mapEntry.getKey(); 476 } 477 return null; 478 } 479 getEntry()480 public Object getEntry() { 481 if (mapEntry != null) { 482 return mapEntry; 483 } 484 return entry; 485 } 486 487 @Override compareTo(MapEntryAdapter b)488 public int compareTo(MapEntryAdapter b) { 489 if (getKey() == null || b.getKey() == null) { 490 logger.info("Invalid key for map field."); 491 return -1; 492 } 493 switch (fieldType) { 494 case BOOLEAN: 495 return Boolean.compare((boolean) getKey(), (boolean) b.getKey()); 496 case LONG: 497 return Long.compare((long) getKey(), (long) b.getKey()); 498 case INT: 499 return Integer.compare((int) getKey(), (int) b.getKey()); 500 case STRING: 501 String aString = (String) getKey(); 502 String bString = (String) b.getKey(); 503 if (aString == null && bString == null) { 504 return 0; 505 } else if (aString == null && bString != null) { 506 return -1; 507 } else if (aString != null && bString == null) { 508 return 1; 509 } else { 510 return aString.compareTo(bString); 511 } 512 default: 513 return 0; 514 } 515 } 516 } 517 518 /** 519 * Outputs a textual representation of the value of given field value. 520 * 521 * @param field the descriptor of the field 522 * @param value the value of the field 523 * @param output the output to which to append the formatted value 524 * @throws ClassCastException if the value is not appropriate for the given field descriptor 525 * @throws IOException if there is an exception writing to the output 526 */ printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)527 public void printFieldValue( 528 final FieldDescriptor field, final Object value, final Appendable output) 529 throws IOException { 530 printFieldValue(field, value, multiLineOutput(output)); 531 } 532 printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)533 private void printFieldValue( 534 final FieldDescriptor field, final Object value, final TextGenerator generator) 535 throws IOException { 536 switch (field.getType()) { 537 case INT32: 538 case SINT32: 539 case SFIXED32: 540 generator.print(((Integer) value).toString()); 541 break; 542 543 case INT64: 544 case SINT64: 545 case SFIXED64: 546 generator.print(((Long) value).toString()); 547 break; 548 549 case BOOL: 550 generator.print(((Boolean) value).toString()); 551 break; 552 553 case FLOAT: 554 generator.print(((Float) value).toString()); 555 break; 556 557 case DOUBLE: 558 generator.print(((Double) value).toString()); 559 break; 560 561 case UINT32: 562 case FIXED32: 563 generator.print(unsignedToString((Integer) value)); 564 break; 565 566 case UINT64: 567 case FIXED64: 568 generator.print(unsignedToString((Long) value)); 569 break; 570 571 case STRING: 572 generator.print("\""); 573 generator.print( 574 escapeNonAscii 575 ? TextFormatEscaper.escapeText((String) value) 576 : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n")); 577 generator.print("\""); 578 break; 579 580 case BYTES: 581 generator.print("\""); 582 if (value instanceof ByteString) { 583 generator.print(escapeBytes((ByteString) value)); 584 } else { 585 generator.print(escapeBytes((byte[]) value)); 586 } 587 generator.print("\""); 588 break; 589 590 case ENUM: 591 generator.print(((EnumValueDescriptor) value).getName()); 592 break; 593 594 case MESSAGE: 595 case GROUP: 596 print((MessageOrBuilder) value, generator); 597 break; 598 } 599 } 600 601 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final MessageOrBuilder message)602 public String printToString(final MessageOrBuilder message) { 603 try { 604 final StringBuilder text = new StringBuilder(); 605 print(message, text); 606 return text.toString(); 607 } catch (IOException e) { 608 throw new IllegalStateException(e); 609 } 610 } 611 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final UnknownFieldSet fields)612 public String printToString(final UnknownFieldSet fields) { 613 try { 614 final StringBuilder text = new StringBuilder(); 615 print(fields, text); 616 return text.toString(); 617 } catch (IOException e) { 618 throw new IllegalStateException(e); 619 } 620 } 621 622 /** 623 * Generates a human readable form of this message, useful for debugging and other purposes, 624 * with no newline characters. 625 */ shortDebugString(final MessageOrBuilder message)626 public String shortDebugString(final MessageOrBuilder message) { 627 try { 628 final StringBuilder text = new StringBuilder(); 629 print(message, singleLineOutput(text)); 630 return text.toString(); 631 } catch (IOException e) { 632 throw new IllegalStateException(e); 633 } 634 } 635 636 /** 637 * Generates a human readable form of the field, useful for debugging and other purposes, with 638 * no newline characters. 639 */ shortDebugString(final FieldDescriptor field, final Object value)640 public String shortDebugString(final FieldDescriptor field, final Object value) { 641 try { 642 final StringBuilder text = new StringBuilder(); 643 printField(field, value, singleLineOutput(text)); 644 return text.toString(); 645 } catch (IOException e) { 646 throw new IllegalStateException(e); 647 } 648 } 649 650 /** 651 * Generates a human readable form of the unknown fields, useful for debugging and other 652 * purposes, with no newline characters. 653 */ shortDebugString(final UnknownFieldSet fields)654 public String shortDebugString(final UnknownFieldSet fields) { 655 try { 656 final StringBuilder text = new StringBuilder(); 657 printUnknownFields(fields, singleLineOutput(text)); 658 return text.toString(); 659 } catch (IOException e) { 660 throw new IllegalStateException(e); 661 } 662 } 663 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)664 private static void printUnknownFieldValue( 665 final int tag, final Object value, final TextGenerator generator) throws IOException { 666 switch (WireFormat.getTagWireType(tag)) { 667 case WireFormat.WIRETYPE_VARINT: 668 generator.print(unsignedToString((Long) value)); 669 break; 670 case WireFormat.WIRETYPE_FIXED32: 671 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 672 break; 673 case WireFormat.WIRETYPE_FIXED64: 674 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 675 break; 676 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 677 try { 678 // Try to parse and print the field as an embedded message 679 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 680 generator.print("{"); 681 generator.eol(); 682 generator.indent(); 683 printUnknownFields(message, generator); 684 generator.outdent(); 685 generator.print("}"); 686 } catch (InvalidProtocolBufferException e) { 687 // If not parseable as a message, print as a String 688 generator.print("\""); 689 generator.print(escapeBytes((ByteString) value)); 690 generator.print("\""); 691 } 692 break; 693 case WireFormat.WIRETYPE_START_GROUP: 694 printUnknownFields((UnknownFieldSet) value, generator); 695 break; 696 default: 697 throw new IllegalArgumentException("Bad tag: " + tag); 698 } 699 } 700 printMessage(final MessageOrBuilder message, final TextGenerator generator)701 private void printMessage(final MessageOrBuilder message, final TextGenerator generator) 702 throws IOException { 703 for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) { 704 printField(field.getKey(), field.getValue(), generator); 705 } 706 printUnknownFields(message.getUnknownFields(), generator); 707 } 708 printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)709 private void printSingleField( 710 final FieldDescriptor field, final Object value, final TextGenerator generator) 711 throws IOException { 712 if (field.isExtension()) { 713 generator.print("["); 714 // We special-case MessageSet elements for compatibility with proto1. 715 if (field.getContainingType().getOptions().getMessageSetWireFormat() 716 && (field.getType() == FieldDescriptor.Type.MESSAGE) 717 && (field.isOptional()) 718 // object equality 719 && (field.getExtensionScope() == field.getMessageType())) { 720 generator.print(field.getMessageType().getFullName()); 721 } else { 722 generator.print(field.getFullName()); 723 } 724 generator.print("]"); 725 } else { 726 if (field.getType() == FieldDescriptor.Type.GROUP) { 727 // Groups must be serialized with their original capitalization. 728 generator.print(field.getMessageType().getName()); 729 } else { 730 generator.print(field.getName()); 731 } 732 } 733 734 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 735 generator.print(" {"); 736 generator.eol(); 737 generator.indent(); 738 } else { 739 generator.print(": "); 740 } 741 742 printFieldValue(field, value, generator); 743 744 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 745 generator.outdent(); 746 generator.print("}"); 747 } 748 generator.eol(); 749 } 750 printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator)751 private static void printUnknownFields( 752 final UnknownFieldSet unknownFields, final TextGenerator generator) throws IOException { 753 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) { 754 final int number = entry.getKey(); 755 final UnknownFieldSet.Field field = entry.getValue(); 756 printUnknownField(number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator); 757 printUnknownField(number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator); 758 printUnknownField(number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator); 759 printUnknownField( 760 number, 761 WireFormat.WIRETYPE_LENGTH_DELIMITED, 762 field.getLengthDelimitedList(), 763 generator); 764 for (final UnknownFieldSet value : field.getGroupList()) { 765 generator.print(entry.getKey().toString()); 766 generator.print(" {"); 767 generator.eol(); 768 generator.indent(); 769 printUnknownFields(value, generator); 770 generator.outdent(); 771 generator.print("}"); 772 generator.eol(); 773 } 774 } 775 } 776 printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator)777 private static void printUnknownField( 778 final int number, final int wireType, final List<?> values, final TextGenerator generator) 779 throws IOException { 780 for (final Object value : values) { 781 generator.print(String.valueOf(number)); 782 generator.print(": "); 783 printUnknownFieldValue(wireType, value, generator); 784 generator.eol(); 785 } 786 } 787 } 788 789 /** Convert an unsigned 32-bit integer to a string. */ unsignedToString(final int value)790 public static String unsignedToString(final int value) { 791 if (value >= 0) { 792 return Integer.toString(value); 793 } else { 794 return Long.toString(value & 0x00000000FFFFFFFFL); 795 } 796 } 797 798 /** Convert an unsigned 64-bit integer to a string. */ unsignedToString(final long value)799 public static String unsignedToString(final long value) { 800 if (value >= 0) { 801 return Long.toString(value); 802 } else { 803 // Pull off the most-significant bit so that BigInteger doesn't think 804 // the number is negative, then set it again using setBit(). 805 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString(); 806 } 807 } 808 multiLineOutput(Appendable output)809 private static TextGenerator multiLineOutput(Appendable output) { 810 return new TextGenerator(output, false); 811 } 812 singleLineOutput(Appendable output)813 private static TextGenerator singleLineOutput(Appendable output) { 814 return new TextGenerator(output, true); 815 } 816 817 /** An inner class for writing text to the output stream. */ 818 private static final class TextGenerator { 819 private final Appendable output; 820 private final StringBuilder indent = new StringBuilder(); 821 private final boolean singleLineMode; 822 // While technically we are "at the start of a line" at the very beginning of the output, all 823 // we would do in response to this is emit the (zero length) indentation, so it has no effect. 824 // Setting it false here does however suppress an unwanted leading space in single-line mode. 825 private boolean atStartOfLine = false; 826 TextGenerator(final Appendable output, boolean singleLineMode)827 private TextGenerator(final Appendable output, boolean singleLineMode) { 828 this.output = output; 829 this.singleLineMode = singleLineMode; 830 } 831 832 /** 833 * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the 834 * beginning of each line of text. Indent() may be called multiple times to produce deeper 835 * indents. 836 */ indent()837 public void indent() { 838 indent.append(" "); 839 } 840 841 /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */ outdent()842 public void outdent() { 843 final int length = indent.length(); 844 if (length == 0) { 845 throw new IllegalArgumentException(" Outdent() without matching Indent()."); 846 } 847 indent.setLength(length - 2); 848 } 849 850 /** 851 * Print text to the output stream. Bare newlines are never expected to be passed to this 852 * method; to indicate the end of a line, call "eol()". 853 */ print(final CharSequence text)854 public void print(final CharSequence text) throws IOException { 855 if (atStartOfLine) { 856 atStartOfLine = false; 857 output.append(singleLineMode ? " " : indent); 858 } 859 output.append(text); 860 } 861 862 /** 863 * Signifies reaching the "end of the current line" in the output. In single-line mode, this 864 * does not result in a newline being emitted, but ensures that a separating space is written 865 * before the next output. 866 */ eol()867 public void eol() throws IOException { 868 if (!singleLineMode) { 869 output.append("\n"); 870 } 871 atStartOfLine = true; 872 } 873 } 874 875 // ================================================================= 876 // Parsing 877 878 /** 879 * Represents a stream of tokens parsed from a {@code String}. 880 * 881 * <p>The Java standard library provides many classes that you might think would be useful for 882 * implementing this, but aren't. For example: 883 * 884 * <ul> 885 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something 886 * that would get us close to what we want -- except for one fatal flaw: It automatically 887 * un-escapes strings using Java escape sequences, which do not include all the escape 888 * sequences we need to support (e.g. '\x'). 889 * <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular 890 * expressions out of a stream (so we wouldn't have to load the entire input into a single 891 * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with 892 * some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and 893 * ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code 894 * Scanner} provides no way to inspect the contents of delimiters, making it impossible to 895 * keep track of line and column numbers. 896 * </ul> 897 * 898 * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need 899 * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least. 900 * Unfortunately, this implies that we need to have the entire input in one contiguous string. 901 */ 902 private static final class Tokenizer { 903 private final CharSequence text; 904 private final Matcher matcher; 905 private String currentToken; 906 907 // The character index within this.text at which the current token begins. 908 private int pos = 0; 909 910 // The line and column numbers of the current token. 911 private int line = 0; 912 private int column = 0; 913 914 // The line and column numbers of the previous token (allows throwing 915 // errors *after* consuming). 916 private int previousLine = 0; 917 private int previousColumn = 0; 918 919 // We use possessive quantifiers (*+ and ++) because otherwise the Java 920 // regex matcher has stack overflows on large inputs. 921 private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 922 private static final Pattern TOKEN = 923 Pattern.compile( 924 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier 925 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number 926 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string 927 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 928 Pattern.MULTILINE); 929 930 private static final Pattern DOUBLE_INFINITY = 931 Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE); 932 private static final Pattern FLOAT_INFINITY = 933 Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE); 934 private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE); 935 936 /** Construct a tokenizer that parses tokens from the given text. */ Tokenizer(final CharSequence text)937 private Tokenizer(final CharSequence text) { 938 this.text = text; 939 this.matcher = WHITESPACE.matcher(text); 940 skipWhitespace(); 941 nextToken(); 942 } 943 getPreviousLine()944 int getPreviousLine() { 945 return previousLine; 946 } 947 getPreviousColumn()948 int getPreviousColumn() { 949 return previousColumn; 950 } 951 getLine()952 int getLine() { 953 return line; 954 } 955 getColumn()956 int getColumn() { 957 return column; 958 } 959 960 /** Are we at the end of the input? */ atEnd()961 public boolean atEnd() { 962 return currentToken.length() == 0; 963 } 964 965 /** Advance to the next token. */ nextToken()966 public void nextToken() { 967 previousLine = line; 968 previousColumn = column; 969 970 // Advance the line counter to the current position. 971 while (pos < matcher.regionStart()) { 972 if (text.charAt(pos) == '\n') { 973 ++line; 974 column = 0; 975 } else { 976 ++column; 977 } 978 ++pos; 979 } 980 981 // Match the next token. 982 if (matcher.regionStart() == matcher.regionEnd()) { 983 // EOF 984 currentToken = ""; 985 } else { 986 matcher.usePattern(TOKEN); 987 if (matcher.lookingAt()) { 988 currentToken = matcher.group(); 989 matcher.region(matcher.end(), matcher.regionEnd()); 990 } else { 991 // Take one character. 992 currentToken = String.valueOf(text.charAt(pos)); 993 matcher.region(pos + 1, matcher.regionEnd()); 994 } 995 996 skipWhitespace(); 997 } 998 } 999 1000 /** Skip over any whitespace so that the matcher region starts at the next token. */ skipWhitespace()1001 private void skipWhitespace() { 1002 matcher.usePattern(WHITESPACE); 1003 if (matcher.lookingAt()) { 1004 matcher.region(matcher.end(), matcher.regionEnd()); 1005 } 1006 } 1007 1008 /** 1009 * If the next token exactly matches {@code token}, consume it and return {@code true}. 1010 * Otherwise, return {@code false} without doing anything. 1011 */ tryConsume(final String token)1012 public boolean tryConsume(final String token) { 1013 if (currentToken.equals(token)) { 1014 nextToken(); 1015 return true; 1016 } else { 1017 return false; 1018 } 1019 } 1020 1021 /** 1022 * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link 1023 * ParseException}. 1024 */ consume(final String token)1025 public void consume(final String token) throws ParseException { 1026 if (!tryConsume(token)) { 1027 throw parseException("Expected \"" + token + "\"."); 1028 } 1029 } 1030 1031 /** Returns {@code true} if the next token is an integer, but does not consume it. */ lookingAtInteger()1032 public boolean lookingAtInteger() { 1033 if (currentToken.length() == 0) { 1034 return false; 1035 } 1036 1037 final char c = currentToken.charAt(0); 1038 return ('0' <= c && c <= '9') || c == '-' || c == '+'; 1039 } 1040 1041 /** Returns {@code true} if the current token's text is equal to that specified. */ lookingAt(String text)1042 public boolean lookingAt(String text) { 1043 return currentToken.equals(text); 1044 } 1045 1046 /** 1047 * If the next token is an identifier, consume it and return its value. Otherwise, throw a 1048 * {@link ParseException}. 1049 */ consumeIdentifier()1050 public String consumeIdentifier() throws ParseException { 1051 for (int i = 0; i < currentToken.length(); i++) { 1052 final char c = currentToken.charAt(i); 1053 if (('a' <= c && c <= 'z') 1054 || ('A' <= c && c <= 'Z') 1055 || ('0' <= c && c <= '9') 1056 || (c == '_') 1057 || (c == '.')) { 1058 // OK 1059 } else { 1060 throw parseException("Expected identifier. Found '" + currentToken + "'"); 1061 } 1062 } 1063 1064 final String result = currentToken; 1065 nextToken(); 1066 return result; 1067 } 1068 1069 /** 1070 * If the next token is an identifier, consume it and return {@code true}. Otherwise, return 1071 * {@code false} without doing anything. 1072 */ tryConsumeIdentifier()1073 public boolean tryConsumeIdentifier() { 1074 try { 1075 consumeIdentifier(); 1076 return true; 1077 } catch (ParseException e) { 1078 return false; 1079 } 1080 } 1081 1082 /** 1083 * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise, 1084 * throw a {@link ParseException}. 1085 */ consumeInt32()1086 public int consumeInt32() throws ParseException { 1087 try { 1088 final int result = parseInt32(currentToken); 1089 nextToken(); 1090 return result; 1091 } catch (NumberFormatException e) { 1092 throw integerParseException(e); 1093 } 1094 } 1095 1096 /** 1097 * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise, 1098 * throw a {@link ParseException}. 1099 */ consumeUInt32()1100 public int consumeUInt32() throws ParseException { 1101 try { 1102 final int result = parseUInt32(currentToken); 1103 nextToken(); 1104 return result; 1105 } catch (NumberFormatException e) { 1106 throw integerParseException(e); 1107 } 1108 } 1109 1110 /** 1111 * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise, 1112 * throw a {@link ParseException}. 1113 */ consumeInt64()1114 public long consumeInt64() throws ParseException { 1115 try { 1116 final long result = parseInt64(currentToken); 1117 nextToken(); 1118 return result; 1119 } catch (NumberFormatException e) { 1120 throw integerParseException(e); 1121 } 1122 } 1123 1124 /** 1125 * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise, 1126 * return {@code false} without doing anything. 1127 */ tryConsumeInt64()1128 public boolean tryConsumeInt64() { 1129 try { 1130 consumeInt64(); 1131 return true; 1132 } catch (ParseException e) { 1133 return false; 1134 } 1135 } 1136 1137 /** 1138 * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise, 1139 * throw a {@link ParseException}. 1140 */ consumeUInt64()1141 public long consumeUInt64() throws ParseException { 1142 try { 1143 final long result = parseUInt64(currentToken); 1144 nextToken(); 1145 return result; 1146 } catch (NumberFormatException e) { 1147 throw integerParseException(e); 1148 } 1149 } 1150 1151 /** 1152 * If the next token is a 64-bit unsigned integer, consume it and return {@code true}. 1153 * Otherwise, return {@code false} without doing anything. 1154 */ tryConsumeUInt64()1155 public boolean tryConsumeUInt64() { 1156 try { 1157 consumeUInt64(); 1158 return true; 1159 } catch (ParseException e) { 1160 return false; 1161 } 1162 } 1163 1164 /** 1165 * If the next token is a double, consume it and return its value. Otherwise, throw a {@link 1166 * ParseException}. 1167 */ consumeDouble()1168 public double consumeDouble() throws ParseException { 1169 // We need to parse infinity and nan separately because 1170 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 1171 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 1172 final boolean negative = currentToken.startsWith("-"); 1173 nextToken(); 1174 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 1175 } 1176 if (currentToken.equalsIgnoreCase("nan")) { 1177 nextToken(); 1178 return Double.NaN; 1179 } 1180 try { 1181 final double result = Double.parseDouble(currentToken); 1182 nextToken(); 1183 return result; 1184 } catch (NumberFormatException e) { 1185 throw floatParseException(e); 1186 } 1187 } 1188 1189 /** 1190 * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code 1191 * false} without doing anything. 1192 */ tryConsumeDouble()1193 public boolean tryConsumeDouble() { 1194 try { 1195 consumeDouble(); 1196 return true; 1197 } catch (ParseException e) { 1198 return false; 1199 } 1200 } 1201 1202 /** 1203 * If the next token is a float, consume it and return its value. Otherwise, throw a {@link 1204 * ParseException}. 1205 */ consumeFloat()1206 public float consumeFloat() throws ParseException { 1207 // We need to parse infinity and nan separately because 1208 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 1209 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 1210 final boolean negative = currentToken.startsWith("-"); 1211 nextToken(); 1212 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 1213 } 1214 if (FLOAT_NAN.matcher(currentToken).matches()) { 1215 nextToken(); 1216 return Float.NaN; 1217 } 1218 try { 1219 final float result = Float.parseFloat(currentToken); 1220 nextToken(); 1221 return result; 1222 } catch (NumberFormatException e) { 1223 throw floatParseException(e); 1224 } 1225 } 1226 1227 /** 1228 * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code 1229 * false} without doing anything. 1230 */ tryConsumeFloat()1231 public boolean tryConsumeFloat() { 1232 try { 1233 consumeFloat(); 1234 return true; 1235 } catch (ParseException e) { 1236 return false; 1237 } 1238 } 1239 1240 /** 1241 * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link 1242 * ParseException}. 1243 */ consumeBoolean()1244 public boolean consumeBoolean() throws ParseException { 1245 if (currentToken.equals("true") 1246 || currentToken.equals("True") 1247 || currentToken.equals("t") 1248 || currentToken.equals("1")) { 1249 nextToken(); 1250 return true; 1251 } else if (currentToken.equals("false") 1252 || currentToken.equals("False") 1253 || currentToken.equals("f") 1254 || currentToken.equals("0")) { 1255 nextToken(); 1256 return false; 1257 } else { 1258 throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\"."); 1259 } 1260 } 1261 1262 /** 1263 * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw 1264 * a {@link ParseException}. 1265 */ consumeString()1266 public String consumeString() throws ParseException { 1267 return consumeByteString().toStringUtf8(); 1268 } 1269 1270 /** If the next token is a string, consume it and return true. Otherwise, return false. */ tryConsumeString()1271 public boolean tryConsumeString() { 1272 try { 1273 consumeString(); 1274 return true; 1275 } catch (ParseException e) { 1276 return false; 1277 } 1278 } 1279 1280 /** 1281 * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return 1282 * it. Otherwise, throw a {@link ParseException}. 1283 */ consumeByteString()1284 public ByteString consumeByteString() throws ParseException { 1285 List<ByteString> list = new ArrayList<ByteString>(); 1286 consumeByteString(list); 1287 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 1288 consumeByteString(list); 1289 } 1290 return ByteString.copyFrom(list); 1291 } 1292 1293 /** 1294 * Like {@link #consumeByteString()} but adds each token of the string to the given list. String 1295 * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically 1296 * concatenated, like in C or Python. 1297 */ consumeByteString(List<ByteString> list)1298 private void consumeByteString(List<ByteString> list) throws ParseException { 1299 final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; 1300 if (quote != '\"' && quote != '\'') { 1301 throw parseException("Expected string."); 1302 } 1303 1304 if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) { 1305 throw parseException("String missing ending quote."); 1306 } 1307 1308 try { 1309 final String escaped = currentToken.substring(1, currentToken.length() - 1); 1310 final ByteString result = unescapeBytes(escaped); 1311 nextToken(); 1312 list.add(result); 1313 } catch (InvalidEscapeSequenceException e) { 1314 throw parseException(e.getMessage()); 1315 } 1316 } 1317 1318 /** 1319 * Returns a {@link ParseException} with the current line and column numbers in the description, 1320 * suitable for throwing. 1321 */ parseException(final String description)1322 public ParseException parseException(final String description) { 1323 // Note: People generally prefer one-based line and column numbers. 1324 return new ParseException(line + 1, column + 1, description); 1325 } 1326 1327 /** 1328 * Returns a {@link ParseException} with the line and column numbers of the previous token in 1329 * the description, suitable for throwing. 1330 */ parseExceptionPreviousToken(final String description)1331 public ParseException parseExceptionPreviousToken(final String description) { 1332 // Note: People generally prefer one-based line and column numbers. 1333 return new ParseException(previousLine + 1, previousColumn + 1, description); 1334 } 1335 1336 /** 1337 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1338 * when trying to parse an integer. 1339 */ integerParseException(final NumberFormatException e)1340 private ParseException integerParseException(final NumberFormatException e) { 1341 return parseException("Couldn't parse integer: " + e.getMessage()); 1342 } 1343 1344 /** 1345 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1346 * when trying to parse a float or double. 1347 */ floatParseException(final NumberFormatException e)1348 private ParseException floatParseException(final NumberFormatException e) { 1349 return parseException("Couldn't parse number: " + e.getMessage()); 1350 } 1351 1352 /** 1353 * Returns a {@link UnknownFieldParseException} with the line and column numbers of the previous 1354 * token in the description, and the unknown field name, suitable for throwing. 1355 */ unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1356 public UnknownFieldParseException unknownFieldParseExceptionPreviousToken( 1357 final String unknownField, final String description) { 1358 // Note: People generally prefer one-based line and column numbers. 1359 return new UnknownFieldParseException( 1360 previousLine + 1, previousColumn + 1, unknownField, description); 1361 } 1362 } 1363 1364 /** Thrown when parsing an invalid text format message. */ 1365 public static class ParseException extends IOException { 1366 private static final long serialVersionUID = 3196188060225107702L; 1367 1368 private final int line; 1369 private final int column; 1370 1371 /** Create a new instance, with -1 as the line and column numbers. */ ParseException(final String message)1372 public ParseException(final String message) { 1373 this(-1, -1, message); 1374 } 1375 1376 /** 1377 * Create a new instance 1378 * 1379 * @param line the line number where the parse error occurred, using 1-offset. 1380 * @param column the column number where the parser error occurred, using 1-offset. 1381 */ ParseException(final int line, final int column, final String message)1382 public ParseException(final int line, final int column, final String message) { 1383 super(Integer.toString(line) + ":" + column + ": " + message); 1384 this.line = line; 1385 this.column = column; 1386 } 1387 1388 /** 1389 * Return the line where the parse exception occurred, or -1 when none is provided. The value is 1390 * specified as 1-offset, so the first line is line 1. 1391 */ getLine()1392 public int getLine() { 1393 return line; 1394 } 1395 1396 /** 1397 * Return the column where the parse exception occurred, or -1 when none is provided. The value 1398 * is specified as 1-offset, so the first line is line 1. 1399 */ getColumn()1400 public int getColumn() { 1401 return column; 1402 } 1403 } 1404 1405 /** Thrown when encountering an unknown field while parsing a text format message. */ 1406 public static class UnknownFieldParseException extends ParseException { 1407 private final String unknownField; 1408 1409 /** 1410 * Create a new instance, with -1 as the line and column numbers, and an empty unknown field 1411 * name. 1412 */ UnknownFieldParseException(final String message)1413 public UnknownFieldParseException(final String message) { 1414 this(-1, -1, "", message); 1415 } 1416 1417 /** 1418 * Create a new instance 1419 * 1420 * @param line the line number where the parse error occurred, using 1-offset. 1421 * @param column the column number where the parser error occurred, using 1-offset. 1422 * @param unknownField the name of the unknown field found while parsing. 1423 */ UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1424 public UnknownFieldParseException( 1425 final int line, final int column, final String unknownField, final String message) { 1426 super(line, column, message); 1427 this.unknownField = unknownField; 1428 } 1429 1430 /** 1431 * Return the name of the unknown field encountered while parsing the protocol buffer string. 1432 */ getUnknownField()1433 public String getUnknownField() { 1434 return unknownField; 1435 } 1436 } 1437 1438 private static final Parser PARSER = Parser.newBuilder().build(); 1439 1440 /** 1441 * Return a {@link Parser} instance which can parse text-format messages. The returned instance is 1442 * thread-safe. 1443 */ getParser()1444 public static Parser getParser() { 1445 return PARSER; 1446 } 1447 1448 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final Readable input, final Message.Builder builder)1449 public static void merge(final Readable input, final Message.Builder builder) throws IOException { 1450 PARSER.merge(input, builder); 1451 } 1452 1453 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final CharSequence input, final Message.Builder builder)1454 public static void merge(final CharSequence input, final Message.Builder builder) 1455 throws ParseException { 1456 PARSER.merge(input, builder); 1457 } 1458 1459 /** 1460 * Parse a text-format message from {@code input}. 1461 * 1462 * @return the parsed message, guaranteed initialized 1463 */ parse(final CharSequence input, final Class<T> protoClass)1464 public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass) 1465 throws ParseException { 1466 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1467 merge(input, builder); 1468 @SuppressWarnings("unchecked") 1469 T output = (T) builder.build(); 1470 return output; 1471 } 1472 1473 /** 1474 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1475 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1476 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1477 public static void merge( 1478 final Readable input, 1479 final ExtensionRegistry extensionRegistry, 1480 final Message.Builder builder) 1481 throws IOException { 1482 PARSER.merge(input, extensionRegistry, builder); 1483 } 1484 1485 1486 /** 1487 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1488 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1489 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1490 public static void merge( 1491 final CharSequence input, 1492 final ExtensionRegistry extensionRegistry, 1493 final Message.Builder builder) 1494 throws ParseException { 1495 PARSER.merge(input, extensionRegistry, builder); 1496 } 1497 1498 /** 1499 * Parse a text-format message from {@code input}. Extensions will be recognized if they are 1500 * registered in {@code extensionRegistry}. 1501 * 1502 * @return the parsed message, guaranteed initialized 1503 */ parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1504 public static <T extends Message> T parse( 1505 final CharSequence input, 1506 final ExtensionRegistry extensionRegistry, 1507 final Class<T> protoClass) 1508 throws ParseException { 1509 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1510 merge(input, extensionRegistry, builder); 1511 @SuppressWarnings("unchecked") 1512 T output = (T) builder.build(); 1513 return output; 1514 } 1515 1516 1517 /** 1518 * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely 1519 * follows google/protobuf/text_format.cc. 1520 * 1521 * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to 1522 * control the parser behavior. 1523 */ 1524 public static class Parser { 1525 /** 1526 * Determines if repeated values for non-repeated fields and oneofs are permitted. For example, 1527 * given required/optional field "foo" and a oneof containing "baz" and "qux": 1528 * 1529 * <ul> 1530 * <li>"foo: 1 foo: 2" 1531 * <li>"baz: 1 qux: 2" 1532 * <li>merging "foo: 2" into a proto in which foo is already set, or 1533 * <li>merging "qux: 2" into a proto in which baz is already set. 1534 * </ul> 1535 */ 1536 public enum SingularOverwritePolicy { 1537 /** 1538 * Later values are merged with earlier values. For primitive fields or conflicting oneofs, 1539 * the last value is retained. 1540 */ 1541 ALLOW_SINGULAR_OVERWRITES, 1542 /** An error is issued. */ 1543 FORBID_SINGULAR_OVERWRITES 1544 } 1545 1546 private final TypeRegistry typeRegistry; 1547 private final boolean allowUnknownFields; 1548 private final boolean allowUnknownEnumValues; 1549 private final boolean allowUnknownExtensions; 1550 private final SingularOverwritePolicy singularOverwritePolicy; 1551 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder; 1552 Parser( TypeRegistry typeRegistry, boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1553 private Parser( 1554 TypeRegistry typeRegistry, 1555 boolean allowUnknownFields, 1556 boolean allowUnknownEnumValues, 1557 boolean allowUnknownExtensions, 1558 SingularOverwritePolicy singularOverwritePolicy, 1559 TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1560 this.typeRegistry = typeRegistry; 1561 this.allowUnknownFields = allowUnknownFields; 1562 this.allowUnknownEnumValues = allowUnknownEnumValues; 1563 this.allowUnknownExtensions = allowUnknownExtensions; 1564 this.singularOverwritePolicy = singularOverwritePolicy; 1565 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1566 } 1567 1568 /** Returns a new instance of {@link Builder}. */ newBuilder()1569 public static Builder newBuilder() { 1570 return new Builder(); 1571 } 1572 1573 /** Builder that can be used to obtain new instances of {@link Parser}. */ 1574 public static class Builder { 1575 private boolean allowUnknownFields = false; 1576 private boolean allowUnknownEnumValues = false; 1577 private boolean allowUnknownExtensions = false; 1578 private SingularOverwritePolicy singularOverwritePolicy = 1579 SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; 1580 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null; 1581 private TypeRegistry typeRegistry = TypeRegistry.getEmptyTypeRegistry(); 1582 1583 /** 1584 * Sets the TypeRegistry for resolving Any. If this is not set, TextFormat will not be able to 1585 * parse Any unless Any is write as bytes. 1586 * 1587 * @throws IllegalArgumentException if a registry is already set. 1588 */ setTypeRegistry(TypeRegistry typeRegistry)1589 public Builder setTypeRegistry(TypeRegistry typeRegistry) { 1590 this.typeRegistry = typeRegistry; 1591 return this; 1592 } 1593 1594 /** 1595 * Set whether this parser will allow unknown fields. By default, an exception is thrown if an 1596 * unknown field is encountered. If this is set, the parser will only log a warning. Allow 1597 * unknown fields will also allow unknown extensions. 1598 * 1599 * <p>Use of this parameter is discouraged which may hide some errors (e.g. 1600 * spelling error on field name). 1601 */ setAllowUnknownFields(boolean allowUnknownFields)1602 public Builder setAllowUnknownFields(boolean allowUnknownFields) { 1603 this.allowUnknownFields = allowUnknownFields; 1604 return this; 1605 } 1606 1607 /** 1608 * Set whether this parser will allow unknown extensions. By default, an 1609 * exception is thrown if unknown extension is encountered. If this is set true, 1610 * the parser will only log a warning. Allow unknown extensions does not mean 1611 * allow normal unknown fields. 1612 */ setAllowUnknownExtensions(boolean allowUnknownExtensions)1613 public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) { 1614 this.allowUnknownExtensions = allowUnknownExtensions; 1615 return this; 1616 } 1617 1618 /** Sets parser behavior when a non-repeated field appears more than once. */ setSingularOverwritePolicy(SingularOverwritePolicy p)1619 public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { 1620 this.singularOverwritePolicy = p; 1621 return this; 1622 } 1623 setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1624 public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1625 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1626 return this; 1627 } 1628 build()1629 public Parser build() { 1630 return new Parser( 1631 typeRegistry, 1632 allowUnknownFields, 1633 allowUnknownEnumValues, 1634 allowUnknownExtensions, 1635 singularOverwritePolicy, 1636 parseInfoTreeBuilder); 1637 } 1638 } 1639 1640 /** 1641 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1642 */ merge(final Readable input, final Message.Builder builder)1643 public void merge(final Readable input, final Message.Builder builder) throws IOException { 1644 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1645 } 1646 1647 /** 1648 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1649 */ merge(final CharSequence input, final Message.Builder builder)1650 public void merge(final CharSequence input, final Message.Builder builder) 1651 throws ParseException { 1652 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1653 } 1654 1655 /** 1656 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1657 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1658 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1659 public void merge( 1660 final Readable input, 1661 final ExtensionRegistry extensionRegistry, 1662 final Message.Builder builder) 1663 throws IOException { 1664 // Read the entire input to a String then parse that. 1665 1666 // If StreamTokenizer was not so limited, or if there were a kind 1667 // of Reader that could read in chunks that match some particular regex, 1668 // or if we wanted to write a custom Reader to tokenize our stream, then 1669 // we would not have to read to one big String. Alas, none of these is 1670 // the case. Oh well. 1671 1672 merge(toStringBuilder(input), extensionRegistry, builder); 1673 } 1674 1675 1676 private static final int BUFFER_SIZE = 4096; 1677 1678 // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) 1679 // overhead is worthwhile toStringBuilder(final Readable input)1680 private static StringBuilder toStringBuilder(final Readable input) throws IOException { 1681 final StringBuilder text = new StringBuilder(); 1682 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1683 while (true) { 1684 final int n = input.read(buffer); 1685 if (n == -1) { 1686 break; 1687 } 1688 buffer.flip(); 1689 text.append(buffer, 0, n); 1690 } 1691 return text; 1692 } 1693 1694 static final class UnknownField { 1695 static enum Type { 1696 FIELD, EXTENSION; 1697 } 1698 1699 final String message; 1700 final Type type; 1701 UnknownField(String message, Type type)1702 UnknownField(String message, Type type) { 1703 this.message = message; 1704 this.type = type; 1705 } 1706 } 1707 1708 // Check both unknown fields and unknown extensions and log warning messages 1709 // or throw exceptions according to the flag. checkUnknownFields(final List<UnknownField> unknownFields)1710 private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException { 1711 if (unknownFields.isEmpty()) { 1712 return; 1713 } 1714 1715 StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:"); 1716 for (UnknownField field : unknownFields) { 1717 msg.append('\n').append(field.message); 1718 } 1719 1720 if (allowUnknownFields) { 1721 logger.warning(msg.toString()); 1722 return; 1723 } 1724 1725 int firstErrorIndex = 0; 1726 if (allowUnknownExtensions) { 1727 boolean allUnknownExtensions = true; 1728 for (UnknownField field : unknownFields) { 1729 if (field.type == UnknownField.Type.FIELD) { 1730 allUnknownExtensions = false; 1731 break; 1732 } 1733 ++firstErrorIndex; 1734 } 1735 if (allUnknownExtensions) { 1736 logger.warning(msg.toString()); 1737 return; 1738 } 1739 } 1740 1741 String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":"); 1742 throw new ParseException( 1743 Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString()); 1744 } 1745 1746 /** 1747 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1748 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1749 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1750 public void merge( 1751 final CharSequence input, 1752 final ExtensionRegistry extensionRegistry, 1753 final Message.Builder builder) 1754 throws ParseException { 1755 final Tokenizer tokenizer = new Tokenizer(input); 1756 MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder); 1757 1758 List<UnknownField> unknownFields = new ArrayList<UnknownField>(); 1759 1760 while (!tokenizer.atEnd()) { 1761 mergeField(tokenizer, extensionRegistry, target, unknownFields); 1762 } 1763 1764 checkUnknownFields(unknownFields); 1765 } 1766 1767 1768 /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields)1769 private void mergeField( 1770 final Tokenizer tokenizer, 1771 final ExtensionRegistry extensionRegistry, 1772 final MessageReflection.MergeTarget target, 1773 List<UnknownField> unknownFields) 1774 throws ParseException { 1775 mergeField( 1776 tokenizer, 1777 extensionRegistry, 1778 target, 1779 parseInfoTreeBuilder, 1780 unknownFields); 1781 } 1782 1783 /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1784 private void mergeField( 1785 final Tokenizer tokenizer, 1786 final ExtensionRegistry extensionRegistry, 1787 final MessageReflection.MergeTarget target, 1788 TextFormatParseInfoTree.Builder parseTreeBuilder, 1789 List<UnknownField> unknownFields) 1790 throws ParseException { 1791 FieldDescriptor field = null; 1792 int startLine = tokenizer.getLine(); 1793 int startColumn = tokenizer.getColumn(); 1794 final Descriptor type = target.getDescriptorForType(); 1795 ExtensionRegistry.ExtensionInfo extension = null; 1796 1797 if ("google.protobuf.Any".equals(type.getFullName()) && tokenizer.tryConsume("[")) { 1798 mergeAnyFieldValue(tokenizer, extensionRegistry, target, parseTreeBuilder, unknownFields, 1799 type); 1800 return; 1801 } 1802 1803 if (tokenizer.tryConsume("[")) { 1804 // An extension. 1805 final StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier()); 1806 while (tokenizer.tryConsume(".")) { 1807 name.append('.'); 1808 name.append(tokenizer.consumeIdentifier()); 1809 } 1810 1811 extension = target.findExtensionByName(extensionRegistry, name.toString()); 1812 1813 if (extension == null) { 1814 String message = 1815 (tokenizer.getPreviousLine() + 1) 1816 + ":" 1817 + (tokenizer.getPreviousColumn() + 1) 1818 + ":\t" 1819 + type.getFullName() 1820 + ".[" 1821 + name 1822 + "]"; 1823 unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION)); 1824 } else { 1825 if (extension.descriptor.getContainingType() != type) { 1826 throw tokenizer.parseExceptionPreviousToken( 1827 "Extension \"" 1828 + name 1829 + "\" does not extend message type \"" 1830 + type.getFullName() 1831 + "\"."); 1832 } 1833 field = extension.descriptor; 1834 } 1835 1836 tokenizer.consume("]"); 1837 } else { 1838 final String name = tokenizer.consumeIdentifier(); 1839 field = type.findFieldByName(name); 1840 1841 // Group names are expected to be capitalized as they appear in the 1842 // .proto file, which actually matches their type names, not their field 1843 // names. 1844 if (field == null) { 1845 // Explicitly specify US locale so that this code does not break when 1846 // executing in Turkey. 1847 final String lowerName = name.toLowerCase(Locale.US); 1848 field = type.findFieldByName(lowerName); 1849 // If the case-insensitive match worked but the field is NOT a group, 1850 if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { 1851 field = null; 1852 } 1853 } 1854 // Again, special-case group names as described above. 1855 if (field != null 1856 && field.getType() == FieldDescriptor.Type.GROUP 1857 && !field.getMessageType().getName().equals(name)) { 1858 field = null; 1859 } 1860 1861 if (field == null) { 1862 String message = (tokenizer.getPreviousLine() + 1) 1863 + ":" 1864 + (tokenizer.getPreviousColumn() + 1) 1865 + ":\t" 1866 + type.getFullName() 1867 + "." 1868 + name; 1869 unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD)); 1870 } 1871 } 1872 1873 // Skips unknown fields. 1874 if (field == null) { 1875 // Try to guess the type of this field. 1876 // If this field is not a message, there should be a ":" between the 1877 // field name and the field value and also the field value should not 1878 // start with "{" or "<" which indicates the beginning of a message body. 1879 // If there is no ":" or there is a "{" or "<" after ":", this field has 1880 // to be a message or the input is ill-formed. 1881 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) { 1882 skipFieldValue(tokenizer); 1883 } else { 1884 skipFieldMessage(tokenizer); 1885 } 1886 return; 1887 } 1888 1889 // Handle potential ':'. 1890 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1891 tokenizer.tryConsume(":"); // optional 1892 if (parseTreeBuilder != null) { 1893 TextFormatParseInfoTree.Builder childParseTreeBuilder = 1894 parseTreeBuilder.getBuilderForSubMessageField(field); 1895 consumeFieldValues( 1896 tokenizer, 1897 extensionRegistry, 1898 target, 1899 field, 1900 extension, 1901 childParseTreeBuilder, 1902 unknownFields); 1903 } else { 1904 consumeFieldValues( 1905 tokenizer, 1906 extensionRegistry, 1907 target, 1908 field, 1909 extension, 1910 parseTreeBuilder, 1911 unknownFields); 1912 } 1913 } else { 1914 tokenizer.consume(":"); // required 1915 consumeFieldValues( 1916 tokenizer, 1917 extensionRegistry, 1918 target, 1919 field, 1920 extension, 1921 parseTreeBuilder, 1922 unknownFields); 1923 } 1924 1925 if (parseTreeBuilder != null) { 1926 parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn)); 1927 } 1928 1929 // For historical reasons, fields may optionally be separated by commas or 1930 // semicolons. 1931 if (!tokenizer.tryConsume(";")) { 1932 tokenizer.tryConsume(","); 1933 } 1934 } 1935 1936 /** 1937 * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}. 1938 */ consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1939 private void consumeFieldValues( 1940 final Tokenizer tokenizer, 1941 final ExtensionRegistry extensionRegistry, 1942 final MessageReflection.MergeTarget target, 1943 final FieldDescriptor field, 1944 final ExtensionRegistry.ExtensionInfo extension, 1945 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1946 List<UnknownField> unknownFields) 1947 throws ParseException { 1948 // Support specifying repeated field values as a comma-separated list. 1949 // Ex."foo: [1, 2, 3]" 1950 if (field.isRepeated() && tokenizer.tryConsume("[")) { 1951 if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty. 1952 while (true) { 1953 consumeFieldValue( 1954 tokenizer, 1955 extensionRegistry, 1956 target, 1957 field, 1958 extension, 1959 parseTreeBuilder, 1960 unknownFields); 1961 if (tokenizer.tryConsume("]")) { 1962 // End of list. 1963 break; 1964 } 1965 tokenizer.consume(","); 1966 } 1967 } 1968 } else { 1969 consumeFieldValue( 1970 tokenizer, 1971 extensionRegistry, 1972 target, 1973 field, 1974 extension, 1975 parseTreeBuilder, 1976 unknownFields); 1977 } 1978 } 1979 1980 /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */ consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1981 private void consumeFieldValue( 1982 final Tokenizer tokenizer, 1983 final ExtensionRegistry extensionRegistry, 1984 final MessageReflection.MergeTarget target, 1985 final FieldDescriptor field, 1986 final ExtensionRegistry.ExtensionInfo extension, 1987 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1988 List<UnknownField> unknownFields) 1989 throws ParseException { 1990 if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES 1991 && !field.isRepeated()) { 1992 if (target.hasField(field)) { 1993 throw tokenizer.parseExceptionPreviousToken( 1994 "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten."); 1995 } else if (field.getContainingOneof() != null 1996 && target.hasOneof(field.getContainingOneof())) { 1997 Descriptors.OneofDescriptor oneof = field.getContainingOneof(); 1998 throw tokenizer.parseExceptionPreviousToken( 1999 "Field \"" 2000 + field.getFullName() 2001 + "\" is specified along with field \"" 2002 + target.getOneofFieldDescriptor(oneof).getFullName() 2003 + "\", another member of oneof \"" 2004 + oneof.getName() 2005 + "\"."); 2006 } 2007 } 2008 2009 Object value = null; 2010 2011 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 2012 final String endToken; 2013 if (tokenizer.tryConsume("<")) { 2014 endToken = ">"; 2015 } else { 2016 tokenizer.consume("{"); 2017 endToken = "}"; 2018 } 2019 2020 // Try to parse human readable format of Any in the form: [type_url]: { ... } 2021 if (field.getMessageType().getFullName().equals("google.protobuf.Any") 2022 && tokenizer.tryConsume("[")) { 2023 // Use Proto reflection here since depending on Any would intoduce a cyclic dependency 2024 // (java_proto_library for any_java_proto depends on the protobuf_impl). 2025 Message anyBuilder = DynamicMessage.getDefaultInstance(field.getMessageType()); 2026 MessageReflection.MergeTarget anyField = target.newMergeTargetForField(field, anyBuilder); 2027 mergeAnyFieldValue(tokenizer, extensionRegistry, anyField, parseTreeBuilder, 2028 unknownFields, field.getMessageType()); 2029 value = anyField.finish(); 2030 tokenizer.consume(endToken); 2031 } else { 2032 Message defaultInstance = (extension == null) ? null : extension.defaultInstance; 2033 MessageReflection.MergeTarget subField = 2034 target.newMergeTargetForField(field, defaultInstance); 2035 2036 while (!tokenizer.tryConsume(endToken)) { 2037 if (tokenizer.atEnd()) { 2038 throw tokenizer.parseException("Expected \"" + endToken + "\"."); 2039 } 2040 mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, unknownFields); 2041 } 2042 2043 value = subField.finish(); 2044 } 2045 2046 } else { 2047 switch (field.getType()) { 2048 case INT32: 2049 case SINT32: 2050 case SFIXED32: 2051 value = tokenizer.consumeInt32(); 2052 break; 2053 2054 case INT64: 2055 case SINT64: 2056 case SFIXED64: 2057 value = tokenizer.consumeInt64(); 2058 break; 2059 2060 case UINT32: 2061 case FIXED32: 2062 value = tokenizer.consumeUInt32(); 2063 break; 2064 2065 case UINT64: 2066 case FIXED64: 2067 value = tokenizer.consumeUInt64(); 2068 break; 2069 2070 case FLOAT: 2071 value = tokenizer.consumeFloat(); 2072 break; 2073 2074 case DOUBLE: 2075 value = tokenizer.consumeDouble(); 2076 break; 2077 2078 case BOOL: 2079 value = tokenizer.consumeBoolean(); 2080 break; 2081 2082 case STRING: 2083 value = tokenizer.consumeString(); 2084 break; 2085 2086 case BYTES: 2087 value = tokenizer.consumeByteString(); 2088 break; 2089 2090 case ENUM: 2091 final EnumDescriptor enumType = field.getEnumType(); 2092 2093 if (tokenizer.lookingAtInteger()) { 2094 final int number = tokenizer.consumeInt32(); 2095 value = enumType.findValueByNumber(number); 2096 if (value == null) { 2097 String unknownValueMsg = 2098 "Enum type \"" 2099 + enumType.getFullName() 2100 + "\" has no value with number " 2101 + number 2102 + '.'; 2103 if (allowUnknownEnumValues) { 2104 logger.warning(unknownValueMsg); 2105 return; 2106 } else { 2107 throw tokenizer.parseExceptionPreviousToken( 2108 "Enum type \"" 2109 + enumType.getFullName() 2110 + "\" has no value with number " 2111 + number 2112 + '.'); 2113 } 2114 } 2115 } else { 2116 final String id = tokenizer.consumeIdentifier(); 2117 value = enumType.findValueByName(id); 2118 if (value == null) { 2119 String unknownValueMsg = 2120 "Enum type \"" 2121 + enumType.getFullName() 2122 + "\" has no value named \"" 2123 + id 2124 + "\"."; 2125 if (allowUnknownEnumValues) { 2126 logger.warning(unknownValueMsg); 2127 return; 2128 } else { 2129 throw tokenizer.parseExceptionPreviousToken(unknownValueMsg); 2130 } 2131 } 2132 } 2133 2134 break; 2135 2136 case MESSAGE: 2137 case GROUP: 2138 throw new RuntimeException("Can't get here."); 2139 } 2140 } 2141 2142 if (field.isRepeated()) { 2143 // TODO(b/29122459): If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode, 2144 // check for duplicate map keys here. 2145 target.addRepeatedField(field, value); 2146 } else { 2147 target.setField(field, value); 2148 } 2149 } 2150 mergeAnyFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, MergeTarget target, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, Descriptor anyDescriptor)2151 private void mergeAnyFieldValue( 2152 final Tokenizer tokenizer, 2153 final ExtensionRegistry extensionRegistry, 2154 MergeTarget target, 2155 final TextFormatParseInfoTree.Builder parseTreeBuilder, 2156 List<UnknownField> unknownFields, 2157 Descriptor anyDescriptor) 2158 throws ParseException { 2159 // Try to parse human readable format of Any in the form: [type_url]: { ... } 2160 StringBuilder typeUrlBuilder = new StringBuilder(); 2161 // Parse the type_url inside []. 2162 while (true) { 2163 typeUrlBuilder.append(tokenizer.consumeIdentifier()); 2164 if (tokenizer.tryConsume("]")) { 2165 break; 2166 } 2167 if (tokenizer.tryConsume("/")) { 2168 typeUrlBuilder.append("/"); 2169 } else if (tokenizer.tryConsume(".")) { 2170 typeUrlBuilder.append("."); 2171 } else { 2172 throw tokenizer.parseExceptionPreviousToken("Expected a valid type URL."); 2173 } 2174 } 2175 tokenizer.tryConsume(":"); 2176 final String anyEndToken; 2177 if (tokenizer.tryConsume("<")) { 2178 anyEndToken = ">"; 2179 } else { 2180 tokenizer.consume("{"); 2181 anyEndToken = "}"; 2182 } 2183 String typeUrl = typeUrlBuilder.toString(); 2184 Descriptor contentType = null; 2185 try { 2186 contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl); 2187 } catch (InvalidProtocolBufferException e) { 2188 throw tokenizer.parseException("Invalid valid type URL. Found: " + typeUrl); 2189 } 2190 if (contentType == null) { 2191 throw tokenizer.parseException( 2192 "Unable to parse Any of type: " 2193 + typeUrl 2194 + ". Please make sure that the TypeRegistry contains the descriptors for the given" 2195 + " types."); 2196 } 2197 Message.Builder contentBuilder = 2198 DynamicMessage.getDefaultInstance(contentType).newBuilderForType(); 2199 MessageReflection.BuilderAdapter contentTarget = 2200 new MessageReflection.BuilderAdapter(contentBuilder); 2201 while (!tokenizer.tryConsume(anyEndToken)) { 2202 mergeField(tokenizer, extensionRegistry, contentTarget, parseTreeBuilder, unknownFields); 2203 } 2204 2205 target.setField(anyDescriptor.findFieldByName("type_url"), typeUrlBuilder.toString()); 2206 target.setField( 2207 anyDescriptor.findFieldByName("value"), contentBuilder.build().toByteString()); 2208 } 2209 2210 /** Skips the next field including the field's name and value. */ skipField(Tokenizer tokenizer)2211 private static void skipField(Tokenizer tokenizer) throws ParseException { 2212 if (tokenizer.tryConsume("[")) { 2213 // Extension name. 2214 do { 2215 tokenizer.consumeIdentifier(); 2216 } while (tokenizer.tryConsume(".")); 2217 tokenizer.consume("]"); 2218 } else { 2219 tokenizer.consumeIdentifier(); 2220 } 2221 2222 // Try to guess the type of this field. 2223 // If this field is not a message, there should be a ":" between the 2224 // field name and the field value and also the field value should not 2225 // start with "{" or "<" which indicates the beginning of a message body. 2226 // If there is no ":" or there is a "{" or "<" after ":", this field has 2227 // to be a message or the input is ill-formed. 2228 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) { 2229 skipFieldValue(tokenizer); 2230 } else { 2231 skipFieldMessage(tokenizer); 2232 } 2233 // For historical reasons, fields may optionally be separated by commas or 2234 // semicolons. 2235 if (!tokenizer.tryConsume(";")) { 2236 tokenizer.tryConsume(","); 2237 } 2238 } 2239 2240 /** 2241 * Skips the whole body of a message including the beginning delimiter and the ending delimiter. 2242 */ skipFieldMessage(Tokenizer tokenizer)2243 private static void skipFieldMessage(Tokenizer tokenizer) throws ParseException { 2244 final String delimiter; 2245 if (tokenizer.tryConsume("<")) { 2246 delimiter = ">"; 2247 } else { 2248 tokenizer.consume("{"); 2249 delimiter = "}"; 2250 } 2251 while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { 2252 skipField(tokenizer); 2253 } 2254 tokenizer.consume(delimiter); 2255 } 2256 2257 /** Skips a field value. */ skipFieldValue(Tokenizer tokenizer)2258 private static void skipFieldValue(Tokenizer tokenizer) throws ParseException { 2259 if (tokenizer.tryConsumeString()) { 2260 while (tokenizer.tryConsumeString()) {} 2261 return; 2262 } 2263 if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean 2264 && !tokenizer.tryConsumeInt64() // includes int32 2265 && !tokenizer.tryConsumeUInt64() // includes uint32 2266 && !tokenizer.tryConsumeDouble() 2267 && !tokenizer.tryConsumeFloat()) { 2268 throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken); 2269 } 2270 } 2271 } 2272 2273 // ================================================================= 2274 // Utility functions 2275 // 2276 // Some of these methods are package-private because Descriptors.java uses 2277 // them. 2278 2279 /** 2280 * Escapes bytes in the format used in protocol buffer text format, which is the same as the 2281 * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are 2282 * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which 2283 * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences. 2284 */ escapeBytes(ByteString input)2285 public static String escapeBytes(ByteString input) { 2286 return TextFormatEscaper.escapeBytes(input); 2287 } 2288 2289 /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */ escapeBytes(byte[] input)2290 public static String escapeBytes(byte[] input) { 2291 return TextFormatEscaper.escapeBytes(input); 2292 } 2293 2294 /** 2295 * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex 2296 * escapes (starting with "\x") are also recognized. 2297 */ unescapeBytes(final CharSequence charString)2298 public static ByteString unescapeBytes(final CharSequence charString) 2299 throws InvalidEscapeSequenceException { 2300 // First convert the Java character sequence to UTF-8 bytes. 2301 ByteString input = ByteString.copyFromUtf8(charString.toString()); 2302 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 2303 // escapes can all be expressed with ASCII characters, so it is safe to 2304 // operate on bytes here. 2305 // 2306 // Unescaping the input byte array will result in a byte sequence that's no 2307 // longer than the input. That's because each escape sequence is between 2308 // two and four bytes long and stands for a single byte. 2309 final byte[] result = new byte[input.size()]; 2310 int pos = 0; 2311 for (int i = 0; i < input.size(); i++) { 2312 byte c = input.byteAt(i); 2313 if (c == '\\') { 2314 if (i + 1 < input.size()) { 2315 ++i; 2316 c = input.byteAt(i); 2317 if (isOctal(c)) { 2318 // Octal escape. 2319 int code = digitValue(c); 2320 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2321 ++i; 2322 code = code * 8 + digitValue(input.byteAt(i)); 2323 } 2324 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2325 ++i; 2326 code = code * 8 + digitValue(input.byteAt(i)); 2327 } 2328 // TODO: Check that 0 <= code && code <= 0xFF. 2329 result[pos++] = (byte) code; 2330 } else { 2331 switch (c) { 2332 case 'a': 2333 result[pos++] = 0x07; 2334 break; 2335 case 'b': 2336 result[pos++] = '\b'; 2337 break; 2338 case 'f': 2339 result[pos++] = '\f'; 2340 break; 2341 case 'n': 2342 result[pos++] = '\n'; 2343 break; 2344 case 'r': 2345 result[pos++] = '\r'; 2346 break; 2347 case 't': 2348 result[pos++] = '\t'; 2349 break; 2350 case 'v': 2351 result[pos++] = 0x0b; 2352 break; 2353 case '\\': 2354 result[pos++] = '\\'; 2355 break; 2356 case '\'': 2357 result[pos++] = '\''; 2358 break; 2359 case '"': 2360 result[pos++] = '\"'; 2361 break; 2362 2363 case 'x': 2364 // hex escape 2365 int code = 0; 2366 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2367 ++i; 2368 code = digitValue(input.byteAt(i)); 2369 } else { 2370 throw new InvalidEscapeSequenceException( 2371 "Invalid escape sequence: '\\x' with no digits"); 2372 } 2373 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2374 ++i; 2375 code = code * 16 + digitValue(input.byteAt(i)); 2376 } 2377 result[pos++] = (byte) code; 2378 break; 2379 2380 case 'u': 2381 // Unicode escape 2382 ++i; 2383 if (i + 3 < input.size() 2384 && isHex(input.byteAt(i)) 2385 && isHex(input.byteAt(i + 1)) 2386 && isHex(input.byteAt(i + 2)) 2387 && isHex(input.byteAt(i + 3))) { 2388 char ch = 2389 (char) 2390 (digitValue(input.byteAt(i)) << 12 2391 | digitValue(input.byteAt(i + 1)) << 8 2392 | digitValue(input.byteAt(i + 2)) << 4 2393 | digitValue(input.byteAt(i + 3))); 2394 if (Character.isSurrogate(ch)) { 2395 throw new InvalidEscapeSequenceException( 2396 "Invalid escape sequence: '\\u' refers to a surrogate"); 2397 } 2398 byte[] chUtf8 = Character.toString(ch).getBytes(UTF_8); 2399 System.arraycopy(chUtf8, 0, result, pos, chUtf8.length); 2400 pos += chUtf8.length; 2401 i += 3; 2402 } else { 2403 throw new InvalidEscapeSequenceException( 2404 "Invalid escape sequence: '\\u' with too few hex chars"); 2405 } 2406 break; 2407 2408 case 'U': 2409 // Unicode escape 2410 ++i; 2411 if (i + 7 >= input.size()) { 2412 throw new InvalidEscapeSequenceException( 2413 "Invalid escape sequence: '\\U' with too few hex chars"); 2414 } 2415 int codepoint = 0; 2416 for (int offset = i; offset < i + 8; offset++) { 2417 byte b = input.byteAt(offset); 2418 if (!isHex(b)) { 2419 throw new InvalidEscapeSequenceException( 2420 "Invalid escape sequence: '\\U' with too few hex chars"); 2421 } 2422 codepoint = (codepoint << 4) | digitValue(b); 2423 } 2424 if (!Character.isValidCodePoint(codepoint)) { 2425 throw new InvalidEscapeSequenceException( 2426 "Invalid escape sequence: '\\U" 2427 + input.substring(i, i + 8).toStringUtf8() 2428 + "' is not a valid code point value"); 2429 } 2430 Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codepoint); 2431 if (unicodeBlock.equals(Character.UnicodeBlock.LOW_SURROGATES) 2432 || unicodeBlock.equals(Character.UnicodeBlock.HIGH_SURROGATES) 2433 || unicodeBlock.equals(Character.UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES)) { 2434 throw new InvalidEscapeSequenceException( 2435 "Invalid escape sequence: '\\U" 2436 + input.substring(i, i + 8).toStringUtf8() 2437 + "' refers to a surrogate code unit"); 2438 } 2439 int[] codepoints = new int[1]; 2440 codepoints[0] = codepoint; 2441 byte[] chUtf8 = new String(codepoints, 0, 1).getBytes(UTF_8); 2442 System.arraycopy(chUtf8, 0, result, pos, chUtf8.length); 2443 pos += chUtf8.length; 2444 i += 7; 2445 break; 2446 2447 default: 2448 throw new InvalidEscapeSequenceException( 2449 "Invalid escape sequence: '\\" + (char) c + '\''); 2450 } 2451 } 2452 } else { 2453 throw new InvalidEscapeSequenceException( 2454 "Invalid escape sequence: '\\' at end of string."); 2455 } 2456 } else { 2457 result[pos++] = c; 2458 } 2459 } 2460 2461 return result.length == pos 2462 ? ByteString.wrap(result) // This reference has not been out of our control. 2463 : ByteString.copyFrom(result, 0, pos); 2464 } 2465 2466 /** 2467 * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid 2468 * escape sequence is seen. 2469 */ 2470 public static class InvalidEscapeSequenceException extends IOException { 2471 private static final long serialVersionUID = -8164033650142593304L; 2472 InvalidEscapeSequenceException(final String description)2473 InvalidEscapeSequenceException(final String description) { 2474 super(description); 2475 } 2476 } 2477 2478 /** 2479 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are 2480 * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes, 2481 * it's weird. 2482 */ escapeText(final String input)2483 static String escapeText(final String input) { 2484 return escapeBytes(ByteString.copyFromUtf8(input)); 2485 } 2486 2487 /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */ escapeDoubleQuotesAndBackslashes(final String input)2488 public static String escapeDoubleQuotesAndBackslashes(final String input) { 2489 return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input); 2490 } 2491 2492 /** 2493 * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes 2494 * (starting with "\x") are also recognized. 2495 */ unescapeText(final String input)2496 static String unescapeText(final String input) throws InvalidEscapeSequenceException { 2497 return unescapeBytes(input).toStringUtf8(); 2498 } 2499 2500 /** Is this an octal digit? */ isOctal(final byte c)2501 private static boolean isOctal(final byte c) { 2502 return '0' <= c && c <= '7'; 2503 } 2504 2505 /** Is this a hex digit? */ isHex(final byte c)2506 private static boolean isHex(final byte c) { 2507 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 2508 } 2509 2510 /** 2511 * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is 2512 * like {@code Character.digit()} but we don't accept non-ASCII digits. 2513 */ digitValue(final byte c)2514 private static int digitValue(final byte c) { 2515 if ('0' <= c && c <= '9') { 2516 return c - '0'; 2517 } else if ('a' <= c && c <= 'z') { 2518 return c - 'a' + 10; 2519 } else { 2520 return c - 'A' + 10; 2521 } 2522 } 2523 2524 /** 2525 * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code 2526 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2527 * and octal numbers, respectively. 2528 */ parseInt32(final String text)2529 static int parseInt32(final String text) throws NumberFormatException { 2530 return (int) parseInteger(text, true, false); 2531 } 2532 2533 /** 2534 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code 2535 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2536 * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned 2537 * since Java has no unsigned integer type. 2538 */ parseUInt32(final String text)2539 static int parseUInt32(final String text) throws NumberFormatException { 2540 return (int) parseInteger(text, false, false); 2541 } 2542 2543 /** 2544 * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code 2545 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2546 * and octal numbers, respectively. 2547 */ parseInt64(final String text)2548 static long parseInt64(final String text) throws NumberFormatException { 2549 return parseInteger(text, true, true); 2550 } 2551 2552 /** 2553 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code 2554 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2555 * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned 2556 * since Java has no unsigned long type. 2557 */ parseUInt64(final String text)2558 static long parseUInt64(final String text) throws NumberFormatException { 2559 return parseInteger(text, false, true); 2560 } 2561 parseInteger(final String text, final boolean isSigned, final boolean isLong)2562 private static long parseInteger(final String text, final boolean isSigned, final boolean isLong) 2563 throws NumberFormatException { 2564 int pos = 0; 2565 2566 boolean negative = false; 2567 if (text.startsWith("-", pos)) { 2568 if (!isSigned) { 2569 throw new NumberFormatException("Number must be positive: " + text); 2570 } 2571 ++pos; 2572 negative = true; 2573 } 2574 2575 int radix = 10; 2576 if (text.startsWith("0x", pos)) { 2577 pos += 2; 2578 radix = 16; 2579 } else if (text.startsWith("0", pos)) { 2580 radix = 8; 2581 } 2582 2583 final String numberText = text.substring(pos); 2584 2585 long result = 0; 2586 if (numberText.length() < 16) { 2587 // Can safely assume no overflow. 2588 result = Long.parseLong(numberText, radix); 2589 if (negative) { 2590 result = -result; 2591 } 2592 2593 // Check bounds. 2594 // No need to check for 64-bit numbers since they'd have to be 16 chars 2595 // or longer to overflow. 2596 if (!isLong) { 2597 if (isSigned) { 2598 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 2599 throw new NumberFormatException( 2600 "Number out of range for 32-bit signed integer: " + text); 2601 } 2602 } else { 2603 if (result >= (1L << 32) || result < 0) { 2604 throw new NumberFormatException( 2605 "Number out of range for 32-bit unsigned integer: " + text); 2606 } 2607 } 2608 } 2609 } else { 2610 BigInteger bigValue = new BigInteger(numberText, radix); 2611 if (negative) { 2612 bigValue = bigValue.negate(); 2613 } 2614 2615 // Check bounds. 2616 if (!isLong) { 2617 if (isSigned) { 2618 if (bigValue.bitLength() > 31) { 2619 throw new NumberFormatException( 2620 "Number out of range for 32-bit signed integer: " + text); 2621 } 2622 } else { 2623 if (bigValue.bitLength() > 32) { 2624 throw new NumberFormatException( 2625 "Number out of range for 32-bit unsigned integer: " + text); 2626 } 2627 } 2628 } else { 2629 if (isSigned) { 2630 if (bigValue.bitLength() > 63) { 2631 throw new NumberFormatException( 2632 "Number out of range for 64-bit signed integer: " + text); 2633 } 2634 } else { 2635 if (bigValue.bitLength() > 64) { 2636 throw new NumberFormatException( 2637 "Number out of range for 64-bit unsigned integer: " + text); 2638 } 2639 } 2640 } 2641 2642 result = bigValue.longValue(); 2643 } 2644 2645 return result; 2646 } 2647 } 2648