1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import com.google.protobuf.Descriptors.Descriptor; 34 import com.google.protobuf.Descriptors.EnumDescriptor; 35 import com.google.protobuf.Descriptors.EnumValueDescriptor; 36 import com.google.protobuf.Descriptors.FieldDescriptor; 37 import com.google.protobuf.MessageReflection.MergeTarget; 38 import java.io.IOException; 39 import java.math.BigInteger; 40 import java.nio.CharBuffer; 41 import java.util.ArrayList; 42 import java.util.Collections; 43 import java.util.List; 44 import java.util.Locale; 45 import java.util.Map; 46 import java.util.logging.Logger; 47 import java.util.regex.Matcher; 48 import java.util.regex.Pattern; 49 50 /** 51 * Provide text parsing and formatting support for proto2 instances. The implementation largely 52 * follows google/protobuf/text_format.cc. 53 * 54 * @author wenboz@google.com Wenbo Zhu 55 * @author kenton@google.com Kenton Varda 56 */ 57 public final class TextFormat { TextFormat()58 private TextFormat() {} 59 60 private static final Logger logger = Logger.getLogger(TextFormat.class.getName()); 61 62 /** 63 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 64 * (This representation is the new version of the classic "ProtocolPrinter" output from the 65 * original Protocol Buffer system) 66 * 67 * @deprecated Use {@code printer().print(MessageOrBuilder, Appendable)} 68 */ 69 @Deprecated print(final MessageOrBuilder message, final Appendable output)70 public static void print(final MessageOrBuilder message, final Appendable output) 71 throws IOException { 72 printer().print(message, output); 73 } 74 75 /** 76 * Outputs a textual representation of {@code fields} to {@code output}. 77 * 78 * @deprecated Use {@code printer().print(UnknownFieldSet, Appendable)} 79 */ 80 @Deprecated print(final UnknownFieldSet fields, final Appendable output)81 public static void print(final UnknownFieldSet fields, final Appendable output) 82 throws IOException { 83 printer().print(fields, output); 84 } 85 86 /** 87 * Same as {@code print()}, except that non-ASCII characters are not escaped. 88 * 89 * @deprecated Use {@code printer().escapingNonAscii(false).print(MessageOrBuilder, Appendable)} 90 */ 91 @Deprecated printUnicode(final MessageOrBuilder message, final Appendable output)92 public static void printUnicode(final MessageOrBuilder message, final Appendable output) 93 throws IOException { 94 printer().escapingNonAscii(false).print(message, output); 95 } 96 97 /** 98 * Same as {@code print()}, except that non-ASCII characters are not escaped. 99 * 100 * @deprecated Use {@code printer().escapingNonAscii(false).print(UnknownFieldSet, Appendable)} 101 */ 102 @Deprecated printUnicode(final UnknownFieldSet fields, final Appendable output)103 public static void printUnicode(final UnknownFieldSet fields, final Appendable output) 104 throws IOException { 105 printer().escapingNonAscii(false).print(fields, output); 106 } 107 108 /** 109 * Generates a human readable form of this message, useful for debugging and other purposes, with 110 * no newline characters. This is just a trivial wrapper around 111 * {@link TextFormat.Printer#shortDebugString(MessageOrBuilder)}. 112 */ shortDebugString(final MessageOrBuilder message)113 public static String shortDebugString(final MessageOrBuilder message) { 114 return printer().shortDebugString(message); 115 } 116 117 /** 118 * Generates a human readable form of the field, useful for debugging and other purposes, with 119 * no newline characters. 120 * 121 * @deprecated Use {@code printer().shortDebugString(FieldDescriptor, Object)} 122 */ 123 @Deprecated shortDebugString(final FieldDescriptor field, final Object value)124 public static String shortDebugString(final FieldDescriptor field, final Object value) { 125 return printer().shortDebugString(field, value); 126 } 127 // 128 /** 129 * Generates a human readable form of the unknown fields, useful for debugging and other 130 * purposes, with no newline characters. 131 * 132 * @deprecated Use {@code printer().shortDebugString(UnknownFieldSet)} 133 */ 134 @Deprecated shortDebugString(final UnknownFieldSet fields)135 public static String shortDebugString(final UnknownFieldSet fields) { 136 return printer().shortDebugString(fields); 137 } 138 139 /** 140 * Like {@code print()}, but writes directly to a {@code String} and returns it. 141 * 142 * @deprecated Use {@code message.toString()} 143 */ 144 @Deprecated printToString(final MessageOrBuilder message)145 public static String printToString(final MessageOrBuilder message) { 146 return printer().printToString(message); 147 } 148 149 /** 150 * Like {@code print()}, but writes directly to a {@code String} and returns it. 151 * 152 * @deprecated Use {@link UnknownFieldSet#toString()} 153 */ 154 @Deprecated printToString(final UnknownFieldSet fields)155 public static String printToString(final UnknownFieldSet fields) { 156 return printer().printToString(fields); 157 } 158 159 /** 160 * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not 161 * escaped in backslash+octals. 162 * 163 * @deprecated Use {@code printer().escapingNonAscii(false).printToString(MessageOrBuilder)} 164 */ 165 @Deprecated printToUnicodeString(final MessageOrBuilder message)166 public static String printToUnicodeString(final MessageOrBuilder message) { 167 return printer().escapingNonAscii(false).printToString(message); 168 } 169 170 /** 171 * Same as {@code printToString()}, except that non-ASCII characters in string type fields are 172 * not escaped in backslash+octals. 173 * 174 * @deprecated Use {@code printer().escapingNonAscii(false).printToString(UnknownFieldSet)} 175 */ 176 @Deprecated printToUnicodeString(final UnknownFieldSet fields)177 public static String printToUnicodeString(final UnknownFieldSet fields) { 178 return printer().escapingNonAscii(false).printToString(fields); 179 } 180 // 181 /** @deprecated Use {@code printer().printField(FieldDescriptor, Object, Appendable)} */ 182 @Deprecated printField( final FieldDescriptor field, final Object value, final Appendable output)183 public static void printField( 184 final FieldDescriptor field, final Object value, final Appendable output) 185 throws IOException { 186 printer().printField(field, value, output); 187 } 188 // 189 /** @deprecated Use {@code printer().printFieldToString(FieldDescriptor, Object)} */ 190 @Deprecated printFieldToString(final FieldDescriptor field, final Object value)191 public static String printFieldToString(final FieldDescriptor field, final Object value) { 192 return printer().printFieldToString(field, value); 193 } 194 // 195 /** 196 * Outputs a unicode textual representation of the value of given field value. 197 * 198 * <p>Same as {@code printFieldValue()}, except that non-ASCII characters in string type fields 199 * are not escaped in backslash+octals. 200 * 201 * @deprecated Use {@code printer().escapingNonAscii(false).printFieldValue(FieldDescriptor, 202 * Object, Appendable)} 203 * @param field the descriptor of the field 204 * @param value the value of the field 205 * @param output the output to which to append the formatted value 206 * @throws ClassCastException if the value is not appropriate for the given field descriptor 207 * @throws IOException if there is an exception writing to the output 208 */ 209 @Deprecated printUnicodeFieldValue( final FieldDescriptor field, final Object value, final Appendable output)210 public static void printUnicodeFieldValue( 211 final FieldDescriptor field, final Object value, final Appendable output) 212 throws IOException { 213 printer().escapingNonAscii(false).printFieldValue(field, value, output); 214 } 215 216 /** 217 * Outputs a textual representation of the value of given field value. 218 * 219 * @deprecated Use {@code printer().printFieldValue(FieldDescriptor, Object, Appendable)} 220 * @param field the descriptor of the field 221 * @param value the value of the field 222 * @param output the output to which to append the formatted value 223 * @throws ClassCastException if the value is not appropriate for the given field descriptor 224 * @throws IOException if there is an exception writing to the output 225 */ 226 @Deprecated printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)227 public static void printFieldValue( 228 final FieldDescriptor field, final Object value, final Appendable output) throws IOException { 229 printer().printFieldValue(field, value, output); 230 } 231 232 /** 233 * Outputs a textual representation of the value of an unknown field. 234 * 235 * @param tag the field's tag number 236 * @param value the value of the field 237 * @param output the output to which to append the formatted value 238 * @throws ClassCastException if the value is not appropriate for the given field descriptor 239 * @throws IOException if there is an exception writing to the output 240 */ printUnknownFieldValue( final int tag, final Object value, final Appendable output)241 public static void printUnknownFieldValue( 242 final int tag, final Object value, final Appendable output) throws IOException { 243 printUnknownFieldValue(tag, value, multiLineOutput(output)); 244 } 245 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)246 private static void printUnknownFieldValue( 247 final int tag, final Object value, final TextGenerator generator) throws IOException { 248 switch (WireFormat.getTagWireType(tag)) { 249 case WireFormat.WIRETYPE_VARINT: 250 generator.print(unsignedToString((Long) value)); 251 break; 252 case WireFormat.WIRETYPE_FIXED32: 253 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 254 break; 255 case WireFormat.WIRETYPE_FIXED64: 256 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 257 break; 258 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 259 try { 260 // Try to parse and print the field as an embedded message 261 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 262 generator.print("{"); 263 generator.eol(); 264 generator.indent(); 265 Printer.printUnknownFields(message, generator); 266 generator.outdent(); 267 generator.print("}"); 268 } catch (InvalidProtocolBufferException e) { 269 // If not parseable as a message, print as a String 270 generator.print("\""); 271 generator.print(escapeBytes((ByteString) value)); 272 generator.print("\""); 273 } 274 break; 275 case WireFormat.WIRETYPE_START_GROUP: 276 Printer.printUnknownFields((UnknownFieldSet) value, generator); 277 break; 278 default: 279 throw new IllegalArgumentException("Bad tag: " + tag); 280 } 281 } 282 283 /** Printer instance which escapes non-ASCII characters. */ printer()284 public static Printer printer() { 285 return Printer.DEFAULT; 286 } 287 288 /** Helper class for converting protobufs to text. */ 289 public static final class Printer { 290 291 // Printer instance which escapes non-ASCII characters. 292 private static final Printer DEFAULT = new Printer(true, TypeRegistry.getEmptyTypeRegistry()); 293 294 /** Whether to escape non ASCII characters with backslash and octal. */ 295 private final boolean escapeNonAscii; 296 297 private final TypeRegistry typeRegistry; 298 Printer(boolean escapeNonAscii, TypeRegistry typeRegistry)299 private Printer(boolean escapeNonAscii, TypeRegistry typeRegistry) { 300 this.escapeNonAscii = escapeNonAscii; 301 this.typeRegistry = typeRegistry; 302 } 303 304 /** 305 * Return a new Printer instance with the specified escape mode. 306 * 307 * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the 308 * default behavior. If false, the new Printer will print non-ASCII characters as is. In 309 * either case, the new Printer still escapes newlines and quotes in strings. 310 * @return a new Printer that clones all other configurations from the current {@link Printer}, 311 * with the escape mode set to the given parameter. 312 */ escapingNonAscii(boolean escapeNonAscii)313 public Printer escapingNonAscii(boolean escapeNonAscii) { 314 return new Printer(escapeNonAscii, typeRegistry); 315 } 316 317 /** 318 * Creates a new {@link Printer} using the given typeRegistry. The new Printer clones all other 319 * configurations from the current {@link Printer}. 320 * 321 * @throws IllegalArgumentException if a registry is already set. 322 */ usingTypeRegistry(TypeRegistry typeRegistry)323 public Printer usingTypeRegistry(TypeRegistry typeRegistry) { 324 if (this.typeRegistry != TypeRegistry.getEmptyTypeRegistry()) { 325 throw new IllegalArgumentException("Only one typeRegistry is allowed."); 326 } 327 return new Printer(escapeNonAscii, typeRegistry); 328 } 329 330 /** 331 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 332 * (This representation is the new version of the classic "ProtocolPrinter" output from the 333 * original Protocol Buffer system) 334 */ print(final MessageOrBuilder message, final Appendable output)335 public void print(final MessageOrBuilder message, final Appendable output) throws IOException { 336 print(message, multiLineOutput(output)); 337 } 338 339 /** Outputs a textual representation of {@code fields} to {@code output}. */ print(final UnknownFieldSet fields, final Appendable output)340 public void print(final UnknownFieldSet fields, final Appendable output) throws IOException { 341 printUnknownFields(fields, multiLineOutput(output)); 342 } 343 print(final MessageOrBuilder message, final TextGenerator generator)344 private void print(final MessageOrBuilder message, final TextGenerator generator) 345 throws IOException { 346 if (message.getDescriptorForType().getFullName().equals("google.protobuf.Any") 347 && printAny(message, generator)) { 348 return; 349 } 350 printMessage(message, generator); 351 } 352 353 /** 354 * Attempt to print the 'google.protobuf.Any' message in a human-friendly format. Returns false 355 * if the message isn't a valid 'google.protobuf.Any' message (in which case the message should 356 * be rendered just like a regular message to help debugging). 357 */ printAny(final MessageOrBuilder message, final TextGenerator generator)358 private boolean printAny(final MessageOrBuilder message, final TextGenerator generator) 359 throws IOException { 360 Descriptor messageType = message.getDescriptorForType(); 361 FieldDescriptor typeUrlField = messageType.findFieldByNumber(1); 362 FieldDescriptor valueField = messageType.findFieldByNumber(2); 363 if (typeUrlField == null 364 || typeUrlField.getType() != FieldDescriptor.Type.STRING 365 || valueField == null 366 || valueField.getType() != FieldDescriptor.Type.BYTES) { 367 // The message may look like an Any but isn't actually an Any message (might happen if the 368 // user tries to use DynamicMessage to construct an Any from incomplete Descriptor). 369 return false; 370 } 371 String typeUrl = (String) message.getField(typeUrlField); 372 // If type_url is not set, we will not be able to decode the content of the value, so just 373 // print out the Any like a regular message. 374 if (typeUrl.isEmpty()) { 375 return false; 376 } 377 Object value = message.getField(valueField); 378 379 Message.Builder contentBuilder = null; 380 try { 381 Descriptor contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl); 382 if (contentType == null) { 383 return false; 384 } 385 contentBuilder = DynamicMessage.getDefaultInstance(contentType).newBuilderForType(); 386 contentBuilder.mergeFrom((ByteString) value); 387 } catch (InvalidProtocolBufferException e) { 388 // The value of Any is malformed. We cannot print it out nicely, so fallback to printing out 389 // the type_url and value as bytes. Note that we fail open here to be consistent with 390 // text_format.cc, and also to allow a way for users to inspect the content of the broken 391 // message. 392 return false; 393 } 394 generator.print("["); 395 generator.print(typeUrl); 396 generator.print("] {"); 397 generator.eol(); 398 generator.indent(); 399 print(contentBuilder, generator); 400 generator.outdent(); 401 generator.print("}"); 402 generator.eol(); 403 return true; 404 } 405 printFieldToString(final FieldDescriptor field, final Object value)406 public String printFieldToString(final FieldDescriptor field, final Object value) { 407 try { 408 final StringBuilder text = new StringBuilder(); 409 printField(field, value, text); 410 return text.toString(); 411 } catch (IOException e) { 412 throw new IllegalStateException(e); 413 } 414 } 415 printField(final FieldDescriptor field, final Object value, final Appendable output)416 public void printField(final FieldDescriptor field, final Object value, final Appendable output) 417 throws IOException { 418 printField(field, value, multiLineOutput(output)); 419 } 420 printField( final FieldDescriptor field, final Object value, final TextGenerator generator)421 private void printField( 422 final FieldDescriptor field, final Object value, final TextGenerator generator) 423 throws IOException { 424 // Sort map field entries by key 425 if (field.isMapField()) { 426 List<MapEntryAdapter> adapters = new ArrayList<>(); 427 for (Object entry : (List<?>) value) { 428 adapters.add(new MapEntryAdapter(entry, field)); 429 } 430 Collections.sort(adapters); 431 for (MapEntryAdapter adapter : adapters) { 432 printSingleField(field, adapter.getEntry(), generator); 433 } 434 } else if (field.isRepeated()) { 435 // Repeated field. Print each element. 436 for (Object element : (List<?>) value) { 437 printSingleField(field, element, generator); 438 } 439 } else { 440 printSingleField(field, value, generator); 441 } 442 } 443 444 /** 445 * An adapter class that can take a MapEntry or a MutableMapEntry and returns its key and entry. 446 * This class is created solely for the purpose of sorting map entries by its key and prevent 447 * duplicated logic by having a separate comparator for MapEntry and MutableMapEntry. 448 */ 449 private static class MapEntryAdapter implements Comparable<MapEntryAdapter> { 450 private Object entry; 451 452 @SuppressWarnings({"rawtypes"}) 453 private MapEntry mapEntry; 454 455 456 private final FieldDescriptor.JavaType fieldType; 457 MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor)458 public MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor) { 459 if (entry instanceof MapEntry) { 460 this.mapEntry = (MapEntry) entry; 461 } else { 462 this.entry = entry; 463 } 464 this.fieldType = extractFieldType(fieldDescriptor); 465 } 466 extractFieldType(FieldDescriptor fieldDescriptor)467 private static FieldDescriptor.JavaType extractFieldType(FieldDescriptor fieldDescriptor) { 468 return fieldDescriptor.getMessageType().getFields().get(0).getJavaType(); 469 } 470 getKey()471 public Object getKey() { 472 if (mapEntry != null) { 473 return mapEntry.getKey(); 474 } 475 return null; 476 } 477 getEntry()478 public Object getEntry() { 479 if (mapEntry != null) { 480 return mapEntry; 481 } 482 return entry; 483 } 484 485 @Override compareTo(MapEntryAdapter b)486 public int compareTo(MapEntryAdapter b) { 487 if (getKey() == null || b.getKey() == null) { 488 logger.info("Invalid key for map field."); 489 return -1; 490 } 491 switch (fieldType) { 492 case BOOLEAN: 493 return Boolean.compare((boolean) getKey(), (boolean) b.getKey()); 494 case LONG: 495 return Long.compare((long) getKey(), (long) b.getKey()); 496 case INT: 497 return Integer.compare((int) getKey(), (int) b.getKey()); 498 case STRING: 499 String aString = (String) getKey(); 500 String bString = (String) b.getKey(); 501 if (aString == null && bString == null) { 502 return 0; 503 } else if (aString == null && bString != null) { 504 return -1; 505 } else if (aString != null && bString == null) { 506 return 1; 507 } else { 508 return aString.compareTo(bString); 509 } 510 default: 511 return 0; 512 } 513 } 514 } 515 516 /** 517 * Outputs a textual representation of the value of given field value. 518 * 519 * @param field the descriptor of the field 520 * @param value the value of the field 521 * @param output the output to which to append the formatted value 522 * @throws ClassCastException if the value is not appropriate for the given field descriptor 523 * @throws IOException if there is an exception writing to the output 524 */ printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)525 public void printFieldValue( 526 final FieldDescriptor field, final Object value, final Appendable output) 527 throws IOException { 528 printFieldValue(field, value, multiLineOutput(output)); 529 } 530 printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)531 private void printFieldValue( 532 final FieldDescriptor field, final Object value, final TextGenerator generator) 533 throws IOException { 534 switch (field.getType()) { 535 case INT32: 536 case SINT32: 537 case SFIXED32: 538 generator.print(((Integer) value).toString()); 539 break; 540 541 case INT64: 542 case SINT64: 543 case SFIXED64: 544 generator.print(((Long) value).toString()); 545 break; 546 547 case BOOL: 548 generator.print(((Boolean) value).toString()); 549 break; 550 551 case FLOAT: 552 generator.print(((Float) value).toString()); 553 break; 554 555 case DOUBLE: 556 generator.print(((Double) value).toString()); 557 break; 558 559 case UINT32: 560 case FIXED32: 561 generator.print(unsignedToString((Integer) value)); 562 break; 563 564 case UINT64: 565 case FIXED64: 566 generator.print(unsignedToString((Long) value)); 567 break; 568 569 case STRING: 570 generator.print("\""); 571 generator.print( 572 escapeNonAscii 573 ? TextFormatEscaper.escapeText((String) value) 574 : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n")); 575 generator.print("\""); 576 break; 577 578 case BYTES: 579 generator.print("\""); 580 if (value instanceof ByteString) { 581 generator.print(escapeBytes((ByteString) value)); 582 } else { 583 generator.print(escapeBytes((byte[]) value)); 584 } 585 generator.print("\""); 586 break; 587 588 case ENUM: 589 generator.print(((EnumValueDescriptor) value).getName()); 590 break; 591 592 case MESSAGE: 593 case GROUP: 594 print((Message) value, generator); 595 break; 596 } 597 } 598 599 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final MessageOrBuilder message)600 public String printToString(final MessageOrBuilder message) { 601 try { 602 final StringBuilder text = new StringBuilder(); 603 print(message, text); 604 return text.toString(); 605 } catch (IOException e) { 606 throw new IllegalStateException(e); 607 } 608 } 609 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final UnknownFieldSet fields)610 public String printToString(final UnknownFieldSet fields) { 611 try { 612 final StringBuilder text = new StringBuilder(); 613 print(fields, text); 614 return text.toString(); 615 } catch (IOException e) { 616 throw new IllegalStateException(e); 617 } 618 } 619 620 /** 621 * Generates a human readable form of this message, useful for debugging and other purposes, 622 * with no newline characters. 623 */ shortDebugString(final MessageOrBuilder message)624 public String shortDebugString(final MessageOrBuilder message) { 625 try { 626 final StringBuilder text = new StringBuilder(); 627 print(message, singleLineOutput(text)); 628 return text.toString(); 629 } catch (IOException e) { 630 throw new IllegalStateException(e); 631 } 632 } 633 634 /** 635 * Generates a human readable form of the field, useful for debugging and other purposes, with 636 * no newline characters. 637 */ shortDebugString(final FieldDescriptor field, final Object value)638 public String shortDebugString(final FieldDescriptor field, final Object value) { 639 try { 640 final StringBuilder text = new StringBuilder(); 641 printField(field, value, singleLineOutput(text)); 642 return text.toString(); 643 } catch (IOException e) { 644 throw new IllegalStateException(e); 645 } 646 } 647 648 /** 649 * Generates a human readable form of the unknown fields, useful for debugging and other 650 * purposes, with no newline characters. 651 */ shortDebugString(final UnknownFieldSet fields)652 public String shortDebugString(final UnknownFieldSet fields) { 653 try { 654 final StringBuilder text = new StringBuilder(); 655 printUnknownFields(fields, singleLineOutput(text)); 656 return text.toString(); 657 } catch (IOException e) { 658 throw new IllegalStateException(e); 659 } 660 } 661 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)662 private static void printUnknownFieldValue( 663 final int tag, final Object value, final TextGenerator generator) throws IOException { 664 switch (WireFormat.getTagWireType(tag)) { 665 case WireFormat.WIRETYPE_VARINT: 666 generator.print(unsignedToString((Long) value)); 667 break; 668 case WireFormat.WIRETYPE_FIXED32: 669 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 670 break; 671 case WireFormat.WIRETYPE_FIXED64: 672 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 673 break; 674 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 675 try { 676 // Try to parse and print the field as an embedded message 677 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 678 generator.print("{"); 679 generator.eol(); 680 generator.indent(); 681 printUnknownFields(message, generator); 682 generator.outdent(); 683 generator.print("}"); 684 } catch (InvalidProtocolBufferException e) { 685 // If not parseable as a message, print as a String 686 generator.print("\""); 687 generator.print(escapeBytes((ByteString) value)); 688 generator.print("\""); 689 } 690 break; 691 case WireFormat.WIRETYPE_START_GROUP: 692 printUnknownFields((UnknownFieldSet) value, generator); 693 break; 694 default: 695 throw new IllegalArgumentException("Bad tag: " + tag); 696 } 697 } 698 printMessage(final MessageOrBuilder message, final TextGenerator generator)699 private void printMessage(final MessageOrBuilder message, final TextGenerator generator) 700 throws IOException { 701 for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) { 702 printField(field.getKey(), field.getValue(), generator); 703 } 704 printUnknownFields(message.getUnknownFields(), generator); 705 } 706 printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)707 private void printSingleField( 708 final FieldDescriptor field, final Object value, final TextGenerator generator) 709 throws IOException { 710 if (field.isExtension()) { 711 generator.print("["); 712 // We special-case MessageSet elements for compatibility with proto1. 713 if (field.getContainingType().getOptions().getMessageSetWireFormat() 714 && (field.getType() == FieldDescriptor.Type.MESSAGE) 715 && (field.isOptional()) 716 // object equality 717 && (field.getExtensionScope() == field.getMessageType())) { 718 generator.print(field.getMessageType().getFullName()); 719 } else { 720 generator.print(field.getFullName()); 721 } 722 generator.print("]"); 723 } else { 724 if (field.getType() == FieldDescriptor.Type.GROUP) { 725 // Groups must be serialized with their original capitalization. 726 generator.print(field.getMessageType().getName()); 727 } else { 728 generator.print(field.getName()); 729 } 730 } 731 732 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 733 generator.print(" {"); 734 generator.eol(); 735 generator.indent(); 736 } else { 737 generator.print(": "); 738 } 739 740 printFieldValue(field, value, generator); 741 742 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 743 generator.outdent(); 744 generator.print("}"); 745 } 746 generator.eol(); 747 } 748 printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator)749 private static void printUnknownFields( 750 final UnknownFieldSet unknownFields, final TextGenerator generator) throws IOException { 751 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) { 752 final int number = entry.getKey(); 753 final UnknownFieldSet.Field field = entry.getValue(); 754 printUnknownField(number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator); 755 printUnknownField(number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator); 756 printUnknownField(number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator); 757 printUnknownField( 758 number, 759 WireFormat.WIRETYPE_LENGTH_DELIMITED, 760 field.getLengthDelimitedList(), 761 generator); 762 for (final UnknownFieldSet value : field.getGroupList()) { 763 generator.print(entry.getKey().toString()); 764 generator.print(" {"); 765 generator.eol(); 766 generator.indent(); 767 printUnknownFields(value, generator); 768 generator.outdent(); 769 generator.print("}"); 770 generator.eol(); 771 } 772 } 773 } 774 printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator)775 private static void printUnknownField( 776 final int number, final int wireType, final List<?> values, final TextGenerator generator) 777 throws IOException { 778 for (final Object value : values) { 779 generator.print(String.valueOf(number)); 780 generator.print(": "); 781 printUnknownFieldValue(wireType, value, generator); 782 generator.eol(); 783 } 784 } 785 } 786 787 /** Convert an unsigned 32-bit integer to a string. */ unsignedToString(final int value)788 public static String unsignedToString(final int value) { 789 if (value >= 0) { 790 return Integer.toString(value); 791 } else { 792 return Long.toString(value & 0x00000000FFFFFFFFL); 793 } 794 } 795 796 /** Convert an unsigned 64-bit integer to a string. */ unsignedToString(final long value)797 public static String unsignedToString(final long value) { 798 if (value >= 0) { 799 return Long.toString(value); 800 } else { 801 // Pull off the most-significant bit so that BigInteger doesn't think 802 // the number is negative, then set it again using setBit(). 803 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString(); 804 } 805 } 806 multiLineOutput(Appendable output)807 private static TextGenerator multiLineOutput(Appendable output) { 808 return new TextGenerator(output, false); 809 } 810 singleLineOutput(Appendable output)811 private static TextGenerator singleLineOutput(Appendable output) { 812 return new TextGenerator(output, true); 813 } 814 815 /** An inner class for writing text to the output stream. */ 816 private static final class TextGenerator { 817 private final Appendable output; 818 private final StringBuilder indent = new StringBuilder(); 819 private final boolean singleLineMode; 820 // While technically we are "at the start of a line" at the very beginning of the output, all 821 // we would do in response to this is emit the (zero length) indentation, so it has no effect. 822 // Setting it false here does however suppress an unwanted leading space in single-line mode. 823 private boolean atStartOfLine = false; 824 TextGenerator(final Appendable output, boolean singleLineMode)825 private TextGenerator(final Appendable output, boolean singleLineMode) { 826 this.output = output; 827 this.singleLineMode = singleLineMode; 828 } 829 830 /** 831 * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the 832 * beginning of each line of text. Indent() may be called multiple times to produce deeper 833 * indents. 834 */ indent()835 public void indent() { 836 indent.append(" "); 837 } 838 839 /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */ outdent()840 public void outdent() { 841 final int length = indent.length(); 842 if (length == 0) { 843 throw new IllegalArgumentException(" Outdent() without matching Indent()."); 844 } 845 indent.setLength(length - 2); 846 } 847 848 /** 849 * Print text to the output stream. Bare newlines are never expected to be passed to this 850 * method; to indicate the end of a line, call "eol()". 851 */ print(final CharSequence text)852 public void print(final CharSequence text) throws IOException { 853 if (atStartOfLine) { 854 atStartOfLine = false; 855 output.append(singleLineMode ? " " : indent); 856 } 857 output.append(text); 858 } 859 860 /** 861 * Signifies reaching the "end of the current line" in the output. In single-line mode, this 862 * does not result in a newline being emitted, but ensures that a separating space is written 863 * before the next output. 864 */ eol()865 public void eol() throws IOException { 866 if (!singleLineMode) { 867 output.append("\n"); 868 } 869 atStartOfLine = true; 870 } 871 } 872 873 // ================================================================= 874 // Parsing 875 876 /** 877 * Represents a stream of tokens parsed from a {@code String}. 878 * 879 * <p>The Java standard library provides many classes that you might think would be useful for 880 * implementing this, but aren't. For example: 881 * 882 * <ul> 883 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something 884 * that would get us close to what we want -- except for one fatal flaw: It automatically 885 * un-escapes strings using Java escape sequences, which do not include all the escape 886 * sequences we need to support (e.g. '\x'). 887 * <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular 888 * expressions out of a stream (so we wouldn't have to load the entire input into a single 889 * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with 890 * some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and 891 * ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code 892 * Scanner} provides no way to inspect the contents of delimiters, making it impossible to 893 * keep track of line and column numbers. 894 * </ul> 895 * 896 * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need 897 * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least. 898 * Unfortunately, this implies that we need to have the entire input in one contiguous string. 899 */ 900 private static final class Tokenizer { 901 private final CharSequence text; 902 private final Matcher matcher; 903 private String currentToken; 904 905 // The character index within this.text at which the current token begins. 906 private int pos = 0; 907 908 // The line and column numbers of the current token. 909 private int line = 0; 910 private int column = 0; 911 912 // The line and column numbers of the previous token (allows throwing 913 // errors *after* consuming). 914 private int previousLine = 0; 915 private int previousColumn = 0; 916 917 // We use possessive quantifiers (*+ and ++) because otherwise the Java 918 // regex matcher has stack overflows on large inputs. 919 private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 920 private static final Pattern TOKEN = 921 Pattern.compile( 922 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier 923 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number 924 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string 925 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 926 Pattern.MULTILINE); 927 928 private static final Pattern DOUBLE_INFINITY = 929 Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE); 930 private static final Pattern FLOAT_INFINITY = 931 Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE); 932 private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE); 933 934 /** Construct a tokenizer that parses tokens from the given text. */ Tokenizer(final CharSequence text)935 private Tokenizer(final CharSequence text) { 936 this.text = text; 937 this.matcher = WHITESPACE.matcher(text); 938 skipWhitespace(); 939 nextToken(); 940 } 941 getPreviousLine()942 int getPreviousLine() { 943 return previousLine; 944 } 945 getPreviousColumn()946 int getPreviousColumn() { 947 return previousColumn; 948 } 949 getLine()950 int getLine() { 951 return line; 952 } 953 getColumn()954 int getColumn() { 955 return column; 956 } 957 958 /** Are we at the end of the input? */ atEnd()959 public boolean atEnd() { 960 return currentToken.length() == 0; 961 } 962 963 /** Advance to the next token. */ nextToken()964 public void nextToken() { 965 previousLine = line; 966 previousColumn = column; 967 968 // Advance the line counter to the current position. 969 while (pos < matcher.regionStart()) { 970 if (text.charAt(pos) == '\n') { 971 ++line; 972 column = 0; 973 } else { 974 ++column; 975 } 976 ++pos; 977 } 978 979 // Match the next token. 980 if (matcher.regionStart() == matcher.regionEnd()) { 981 // EOF 982 currentToken = ""; 983 } else { 984 matcher.usePattern(TOKEN); 985 if (matcher.lookingAt()) { 986 currentToken = matcher.group(); 987 matcher.region(matcher.end(), matcher.regionEnd()); 988 } else { 989 // Take one character. 990 currentToken = String.valueOf(text.charAt(pos)); 991 matcher.region(pos + 1, matcher.regionEnd()); 992 } 993 994 skipWhitespace(); 995 } 996 } 997 998 /** Skip over any whitespace so that the matcher region starts at the next token. */ skipWhitespace()999 private void skipWhitespace() { 1000 matcher.usePattern(WHITESPACE); 1001 if (matcher.lookingAt()) { 1002 matcher.region(matcher.end(), matcher.regionEnd()); 1003 } 1004 } 1005 1006 /** 1007 * If the next token exactly matches {@code token}, consume it and return {@code true}. 1008 * Otherwise, return {@code false} without doing anything. 1009 */ tryConsume(final String token)1010 public boolean tryConsume(final String token) { 1011 if (currentToken.equals(token)) { 1012 nextToken(); 1013 return true; 1014 } else { 1015 return false; 1016 } 1017 } 1018 1019 /** 1020 * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link 1021 * ParseException}. 1022 */ consume(final String token)1023 public void consume(final String token) throws ParseException { 1024 if (!tryConsume(token)) { 1025 throw parseException("Expected \"" + token + "\"."); 1026 } 1027 } 1028 1029 /** Returns {@code true} if the next token is an integer, but does not consume it. */ lookingAtInteger()1030 public boolean lookingAtInteger() { 1031 if (currentToken.length() == 0) { 1032 return false; 1033 } 1034 1035 final char c = currentToken.charAt(0); 1036 return ('0' <= c && c <= '9') || c == '-' || c == '+'; 1037 } 1038 1039 /** Returns {@code true} if the current token's text is equal to that specified. */ lookingAt(String text)1040 public boolean lookingAt(String text) { 1041 return currentToken.equals(text); 1042 } 1043 1044 /** 1045 * If the next token is an identifier, consume it and return its value. Otherwise, throw a 1046 * {@link ParseException}. 1047 */ consumeIdentifier()1048 public String consumeIdentifier() throws ParseException { 1049 for (int i = 0; i < currentToken.length(); i++) { 1050 final char c = currentToken.charAt(i); 1051 if (('a' <= c && c <= 'z') 1052 || ('A' <= c && c <= 'Z') 1053 || ('0' <= c && c <= '9') 1054 || (c == '_') 1055 || (c == '.')) { 1056 // OK 1057 } else { 1058 throw parseException("Expected identifier. Found '" + currentToken + "'"); 1059 } 1060 } 1061 1062 final String result = currentToken; 1063 nextToken(); 1064 return result; 1065 } 1066 1067 /** 1068 * If the next token is an identifier, consume it and return {@code true}. Otherwise, return 1069 * {@code false} without doing anything. 1070 */ tryConsumeIdentifier()1071 public boolean tryConsumeIdentifier() { 1072 try { 1073 consumeIdentifier(); 1074 return true; 1075 } catch (ParseException e) { 1076 return false; 1077 } 1078 } 1079 1080 /** 1081 * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise, 1082 * throw a {@link ParseException}. 1083 */ consumeInt32()1084 public int consumeInt32() throws ParseException { 1085 try { 1086 final int result = parseInt32(currentToken); 1087 nextToken(); 1088 return result; 1089 } catch (NumberFormatException e) { 1090 throw integerParseException(e); 1091 } 1092 } 1093 1094 /** 1095 * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise, 1096 * throw a {@link ParseException}. 1097 */ consumeUInt32()1098 public int consumeUInt32() throws ParseException { 1099 try { 1100 final int result = parseUInt32(currentToken); 1101 nextToken(); 1102 return result; 1103 } catch (NumberFormatException e) { 1104 throw integerParseException(e); 1105 } 1106 } 1107 1108 /** 1109 * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise, 1110 * throw a {@link ParseException}. 1111 */ consumeInt64()1112 public long consumeInt64() throws ParseException { 1113 try { 1114 final long result = parseInt64(currentToken); 1115 nextToken(); 1116 return result; 1117 } catch (NumberFormatException e) { 1118 throw integerParseException(e); 1119 } 1120 } 1121 1122 /** 1123 * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise, 1124 * return {@code false} without doing anything. 1125 */ tryConsumeInt64()1126 public boolean tryConsumeInt64() { 1127 try { 1128 consumeInt64(); 1129 return true; 1130 } catch (ParseException e) { 1131 return false; 1132 } 1133 } 1134 1135 /** 1136 * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise, 1137 * throw a {@link ParseException}. 1138 */ consumeUInt64()1139 public long consumeUInt64() throws ParseException { 1140 try { 1141 final long result = parseUInt64(currentToken); 1142 nextToken(); 1143 return result; 1144 } catch (NumberFormatException e) { 1145 throw integerParseException(e); 1146 } 1147 } 1148 1149 /** 1150 * If the next token is a 64-bit unsigned integer, consume it and return {@code true}. 1151 * Otherwise, return {@code false} without doing anything. 1152 */ tryConsumeUInt64()1153 public boolean tryConsumeUInt64() { 1154 try { 1155 consumeUInt64(); 1156 return true; 1157 } catch (ParseException e) { 1158 return false; 1159 } 1160 } 1161 1162 /** 1163 * If the next token is a double, consume it and return its value. Otherwise, throw a {@link 1164 * ParseException}. 1165 */ consumeDouble()1166 public double consumeDouble() throws ParseException { 1167 // We need to parse infinity and nan separately because 1168 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 1169 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 1170 final boolean negative = currentToken.startsWith("-"); 1171 nextToken(); 1172 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 1173 } 1174 if (currentToken.equalsIgnoreCase("nan")) { 1175 nextToken(); 1176 return Double.NaN; 1177 } 1178 try { 1179 final double result = Double.parseDouble(currentToken); 1180 nextToken(); 1181 return result; 1182 } catch (NumberFormatException e) { 1183 throw floatParseException(e); 1184 } 1185 } 1186 1187 /** 1188 * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code 1189 * false} without doing anything. 1190 */ tryConsumeDouble()1191 public boolean tryConsumeDouble() { 1192 try { 1193 consumeDouble(); 1194 return true; 1195 } catch (ParseException e) { 1196 return false; 1197 } 1198 } 1199 1200 /** 1201 * If the next token is a float, consume it and return its value. Otherwise, throw a {@link 1202 * ParseException}. 1203 */ consumeFloat()1204 public float consumeFloat() throws ParseException { 1205 // We need to parse infinity and nan separately because 1206 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 1207 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 1208 final boolean negative = currentToken.startsWith("-"); 1209 nextToken(); 1210 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 1211 } 1212 if (FLOAT_NAN.matcher(currentToken).matches()) { 1213 nextToken(); 1214 return Float.NaN; 1215 } 1216 try { 1217 final float result = Float.parseFloat(currentToken); 1218 nextToken(); 1219 return result; 1220 } catch (NumberFormatException e) { 1221 throw floatParseException(e); 1222 } 1223 } 1224 1225 /** 1226 * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code 1227 * false} without doing anything. 1228 */ tryConsumeFloat()1229 public boolean tryConsumeFloat() { 1230 try { 1231 consumeFloat(); 1232 return true; 1233 } catch (ParseException e) { 1234 return false; 1235 } 1236 } 1237 1238 /** 1239 * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link 1240 * ParseException}. 1241 */ consumeBoolean()1242 public boolean consumeBoolean() throws ParseException { 1243 if (currentToken.equals("true") 1244 || currentToken.equals("True") 1245 || currentToken.equals("t") 1246 || currentToken.equals("1")) { 1247 nextToken(); 1248 return true; 1249 } else if (currentToken.equals("false") 1250 || currentToken.equals("False") 1251 || currentToken.equals("f") 1252 || currentToken.equals("0")) { 1253 nextToken(); 1254 return false; 1255 } else { 1256 throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\"."); 1257 } 1258 } 1259 1260 /** 1261 * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw 1262 * a {@link ParseException}. 1263 */ consumeString()1264 public String consumeString() throws ParseException { 1265 return consumeByteString().toStringUtf8(); 1266 } 1267 1268 /** If the next token is a string, consume it and return true. Otherwise, return false. */ tryConsumeString()1269 public boolean tryConsumeString() { 1270 try { 1271 consumeString(); 1272 return true; 1273 } catch (ParseException e) { 1274 return false; 1275 } 1276 } 1277 1278 /** 1279 * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return 1280 * it. Otherwise, throw a {@link ParseException}. 1281 */ consumeByteString()1282 public ByteString consumeByteString() throws ParseException { 1283 List<ByteString> list = new ArrayList<ByteString>(); 1284 consumeByteString(list); 1285 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 1286 consumeByteString(list); 1287 } 1288 return ByteString.copyFrom(list); 1289 } 1290 1291 /** 1292 * Like {@link #consumeByteString()} but adds each token of the string to the given list. String 1293 * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically 1294 * concatenated, like in C or Python. 1295 */ consumeByteString(List<ByteString> list)1296 private void consumeByteString(List<ByteString> list) throws ParseException { 1297 final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; 1298 if (quote != '\"' && quote != '\'') { 1299 throw parseException("Expected string."); 1300 } 1301 1302 if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) { 1303 throw parseException("String missing ending quote."); 1304 } 1305 1306 try { 1307 final String escaped = currentToken.substring(1, currentToken.length() - 1); 1308 final ByteString result = unescapeBytes(escaped); 1309 nextToken(); 1310 list.add(result); 1311 } catch (InvalidEscapeSequenceException e) { 1312 throw parseException(e.getMessage()); 1313 } 1314 } 1315 1316 /** 1317 * Returns a {@link ParseException} with the current line and column numbers in the description, 1318 * suitable for throwing. 1319 */ parseException(final String description)1320 public ParseException parseException(final String description) { 1321 // Note: People generally prefer one-based line and column numbers. 1322 return new ParseException(line + 1, column + 1, description); 1323 } 1324 1325 /** 1326 * Returns a {@link ParseException} with the line and column numbers of the previous token in 1327 * the description, suitable for throwing. 1328 */ parseExceptionPreviousToken(final String description)1329 public ParseException parseExceptionPreviousToken(final String description) { 1330 // Note: People generally prefer one-based line and column numbers. 1331 return new ParseException(previousLine + 1, previousColumn + 1, description); 1332 } 1333 1334 /** 1335 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1336 * when trying to parse an integer. 1337 */ integerParseException(final NumberFormatException e)1338 private ParseException integerParseException(final NumberFormatException e) { 1339 return parseException("Couldn't parse integer: " + e.getMessage()); 1340 } 1341 1342 /** 1343 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1344 * when trying to parse a float or double. 1345 */ floatParseException(final NumberFormatException e)1346 private ParseException floatParseException(final NumberFormatException e) { 1347 return parseException("Couldn't parse number: " + e.getMessage()); 1348 } 1349 1350 /** 1351 * Returns a {@link UnknownFieldParseException} with the line and column numbers of the previous 1352 * token in the description, and the unknown field name, suitable for throwing. 1353 */ unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1354 public UnknownFieldParseException unknownFieldParseExceptionPreviousToken( 1355 final String unknownField, final String description) { 1356 // Note: People generally prefer one-based line and column numbers. 1357 return new UnknownFieldParseException( 1358 previousLine + 1, previousColumn + 1, unknownField, description); 1359 } 1360 } 1361 1362 /** Thrown when parsing an invalid text format message. */ 1363 public static class ParseException extends IOException { 1364 private static final long serialVersionUID = 3196188060225107702L; 1365 1366 private final int line; 1367 private final int column; 1368 1369 /** Create a new instance, with -1 as the line and column numbers. */ ParseException(final String message)1370 public ParseException(final String message) { 1371 this(-1, -1, message); 1372 } 1373 1374 /** 1375 * Create a new instance 1376 * 1377 * @param line the line number where the parse error occurred, using 1-offset. 1378 * @param column the column number where the parser error occurred, using 1-offset. 1379 */ ParseException(final int line, final int column, final String message)1380 public ParseException(final int line, final int column, final String message) { 1381 super(Integer.toString(line) + ":" + column + ": " + message); 1382 this.line = line; 1383 this.column = column; 1384 } 1385 1386 /** 1387 * Return the line where the parse exception occurred, or -1 when none is provided. The value is 1388 * specified as 1-offset, so the first line is line 1. 1389 */ getLine()1390 public int getLine() { 1391 return line; 1392 } 1393 1394 /** 1395 * Return the column where the parse exception occurred, or -1 when none is provided. The value 1396 * is specified as 1-offset, so the first line is line 1. 1397 */ getColumn()1398 public int getColumn() { 1399 return column; 1400 } 1401 } 1402 1403 /** Thrown when encountering an unknown field while parsing a text format message. */ 1404 public static class UnknownFieldParseException extends ParseException { 1405 private final String unknownField; 1406 1407 /** 1408 * Create a new instance, with -1 as the line and column numbers, and an empty unknown field 1409 * name. 1410 */ UnknownFieldParseException(final String message)1411 public UnknownFieldParseException(final String message) { 1412 this(-1, -1, "", message); 1413 } 1414 1415 /** 1416 * Create a new instance 1417 * 1418 * @param line the line number where the parse error occurred, using 1-offset. 1419 * @param column the column number where the parser error occurred, using 1-offset. 1420 * @param unknownField the name of the unknown field found while parsing. 1421 */ UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1422 public UnknownFieldParseException( 1423 final int line, final int column, final String unknownField, final String message) { 1424 super(line, column, message); 1425 this.unknownField = unknownField; 1426 } 1427 1428 /** 1429 * Return the name of the unknown field encountered while parsing the protocol buffer string. 1430 */ getUnknownField()1431 public String getUnknownField() { 1432 return unknownField; 1433 } 1434 } 1435 1436 private static final Parser PARSER = Parser.newBuilder().build(); 1437 1438 /** 1439 * Return a {@link Parser} instance which can parse text-format messages. The returned instance is 1440 * thread-safe. 1441 */ getParser()1442 public static Parser getParser() { 1443 return PARSER; 1444 } 1445 1446 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final Readable input, final Message.Builder builder)1447 public static void merge(final Readable input, final Message.Builder builder) throws IOException { 1448 PARSER.merge(input, builder); 1449 } 1450 1451 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final CharSequence input, final Message.Builder builder)1452 public static void merge(final CharSequence input, final Message.Builder builder) 1453 throws ParseException { 1454 PARSER.merge(input, builder); 1455 } 1456 1457 /** 1458 * Parse a text-format message from {@code input}. 1459 * 1460 * @return the parsed message, guaranteed initialized 1461 */ parse(final CharSequence input, final Class<T> protoClass)1462 public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass) 1463 throws ParseException { 1464 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1465 merge(input, builder); 1466 @SuppressWarnings("unchecked") 1467 T output = (T) builder.build(); 1468 return output; 1469 } 1470 1471 /** 1472 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1473 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1474 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1475 public static void merge( 1476 final Readable input, 1477 final ExtensionRegistry extensionRegistry, 1478 final Message.Builder builder) 1479 throws IOException { 1480 PARSER.merge(input, extensionRegistry, builder); 1481 } 1482 1483 1484 /** 1485 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1486 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1487 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1488 public static void merge( 1489 final CharSequence input, 1490 final ExtensionRegistry extensionRegistry, 1491 final Message.Builder builder) 1492 throws ParseException { 1493 PARSER.merge(input, extensionRegistry, builder); 1494 } 1495 1496 /** 1497 * Parse a text-format message from {@code input}. Extensions will be recognized if they are 1498 * registered in {@code extensionRegistry}. 1499 * 1500 * @return the parsed message, guaranteed initialized 1501 */ parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1502 public static <T extends Message> T parse( 1503 final CharSequence input, 1504 final ExtensionRegistry extensionRegistry, 1505 final Class<T> protoClass) 1506 throws ParseException { 1507 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1508 merge(input, extensionRegistry, builder); 1509 @SuppressWarnings("unchecked") 1510 T output = (T) builder.build(); 1511 return output; 1512 } 1513 1514 1515 /** 1516 * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely 1517 * follows google/protobuf/text_format.cc. 1518 * 1519 * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to 1520 * control the parser behavior. 1521 */ 1522 public static class Parser { 1523 /** 1524 * Determines if repeated values for non-repeated fields and oneofs are permitted. For example, 1525 * given required/optional field "foo" and a oneof containing "baz" and "qux": 1526 * 1527 * <ul> 1528 * <li>"foo: 1 foo: 2" 1529 * <li>"baz: 1 qux: 2" 1530 * <li>merging "foo: 2" into a proto in which foo is already set, or 1531 * <li>merging "qux: 2" into a proto in which baz is already set. 1532 * </ul> 1533 */ 1534 public enum SingularOverwritePolicy { 1535 /** 1536 * Later values are merged with earlier values. For primitive fields or conflicting oneofs, 1537 * the last value is retained. 1538 */ 1539 ALLOW_SINGULAR_OVERWRITES, 1540 /** An error is issued. */ 1541 FORBID_SINGULAR_OVERWRITES 1542 } 1543 1544 private final TypeRegistry typeRegistry; 1545 private final boolean allowUnknownFields; 1546 private final boolean allowUnknownEnumValues; 1547 private final boolean allowUnknownExtensions; 1548 private final SingularOverwritePolicy singularOverwritePolicy; 1549 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder; 1550 Parser( TypeRegistry typeRegistry, boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1551 private Parser( 1552 TypeRegistry typeRegistry, 1553 boolean allowUnknownFields, 1554 boolean allowUnknownEnumValues, 1555 boolean allowUnknownExtensions, 1556 SingularOverwritePolicy singularOverwritePolicy, 1557 TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1558 this.typeRegistry = typeRegistry; 1559 this.allowUnknownFields = allowUnknownFields; 1560 this.allowUnknownEnumValues = allowUnknownEnumValues; 1561 this.allowUnknownExtensions = allowUnknownExtensions; 1562 this.singularOverwritePolicy = singularOverwritePolicy; 1563 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1564 } 1565 1566 /** Returns a new instance of {@link Builder}. */ newBuilder()1567 public static Builder newBuilder() { 1568 return new Builder(); 1569 } 1570 1571 /** Builder that can be used to obtain new instances of {@link Parser}. */ 1572 public static class Builder { 1573 private boolean allowUnknownFields = false; 1574 private boolean allowUnknownEnumValues = false; 1575 private boolean allowUnknownExtensions = false; 1576 private SingularOverwritePolicy singularOverwritePolicy = 1577 SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; 1578 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null; 1579 private TypeRegistry typeRegistry = TypeRegistry.getEmptyTypeRegistry(); 1580 1581 /** 1582 * Sets the TypeRegistry for resolving Any. If this is not set, TextFormat will not be able to 1583 * parse Any unless Any is write as bytes. 1584 * 1585 * @throws IllegalArgumentException if a registry is already set. 1586 */ setTypeRegistry(TypeRegistry typeRegistry)1587 public Builder setTypeRegistry(TypeRegistry typeRegistry) { 1588 this.typeRegistry = typeRegistry; 1589 return this; 1590 } 1591 1592 /** 1593 * Set whether this parser will allow unknown fields. By default, an exception is thrown if an 1594 * unknown field is encountered. If this is set, the parser will only log a warning. Allow 1595 * unknown fields will also allow unknown extensions. 1596 * 1597 * <p>Use of this parameter is discouraged which may hide some errors (e.g. 1598 * spelling error on field name). 1599 */ setAllowUnknownFields(boolean allowUnknownFields)1600 public Builder setAllowUnknownFields(boolean allowUnknownFields) { 1601 this.allowUnknownFields = allowUnknownFields; 1602 return this; 1603 } 1604 1605 /** 1606 * Set whether this parser will allow unknown extensions. By default, an 1607 * exception is thrown if unknown extension is encountered. If this is set true, 1608 * the parser will only log a warning. Allow unknown extensions does not mean 1609 * allow normal unknown fields. 1610 */ setAllowUnknownExtensions(boolean allowUnknownExtensions)1611 public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) { 1612 this.allowUnknownExtensions = allowUnknownExtensions; 1613 return this; 1614 } 1615 1616 /** Sets parser behavior when a non-repeated field appears more than once. */ setSingularOverwritePolicy(SingularOverwritePolicy p)1617 public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { 1618 this.singularOverwritePolicy = p; 1619 return this; 1620 } 1621 setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1622 public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1623 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1624 return this; 1625 } 1626 build()1627 public Parser build() { 1628 return new Parser( 1629 typeRegistry, 1630 allowUnknownFields, 1631 allowUnknownEnumValues, 1632 allowUnknownExtensions, 1633 singularOverwritePolicy, 1634 parseInfoTreeBuilder); 1635 } 1636 } 1637 1638 /** 1639 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1640 */ merge(final Readable input, final Message.Builder builder)1641 public void merge(final Readable input, final Message.Builder builder) throws IOException { 1642 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1643 } 1644 1645 /** 1646 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1647 */ merge(final CharSequence input, final Message.Builder builder)1648 public void merge(final CharSequence input, final Message.Builder builder) 1649 throws ParseException { 1650 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1651 } 1652 1653 /** 1654 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1655 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1656 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1657 public void merge( 1658 final Readable input, 1659 final ExtensionRegistry extensionRegistry, 1660 final Message.Builder builder) 1661 throws IOException { 1662 // Read the entire input to a String then parse that. 1663 1664 // If StreamTokenizer were not quite so crippled, or if there were a kind 1665 // of Reader that could read in chunks that match some particular regex, 1666 // or if we wanted to write a custom Reader to tokenize our stream, then 1667 // we would not have to read to one big String. Alas, none of these is 1668 // the case. Oh well. 1669 1670 merge(toStringBuilder(input), extensionRegistry, builder); 1671 } 1672 1673 1674 private static final int BUFFER_SIZE = 4096; 1675 1676 // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) 1677 // overhead is worthwhile toStringBuilder(final Readable input)1678 private static StringBuilder toStringBuilder(final Readable input) throws IOException { 1679 final StringBuilder text = new StringBuilder(); 1680 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1681 while (true) { 1682 final int n = input.read(buffer); 1683 if (n == -1) { 1684 break; 1685 } 1686 buffer.flip(); 1687 text.append(buffer, 0, n); 1688 } 1689 return text; 1690 } 1691 1692 static final class UnknownField { 1693 static enum Type { 1694 FIELD, EXTENSION; 1695 } 1696 1697 final String message; 1698 final Type type; 1699 UnknownField(String message, Type type)1700 UnknownField(String message, Type type) { 1701 this.message = message; 1702 this.type = type; 1703 } 1704 } 1705 1706 // Check both unknown fields and unknown extensions and log warning messages 1707 // or throw exceptions according to the flag. checkUnknownFields(final List<UnknownField> unknownFields)1708 private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException { 1709 if (unknownFields.isEmpty()) { 1710 return; 1711 } 1712 1713 StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:"); 1714 for (UnknownField field : unknownFields) { 1715 msg.append('\n').append(field.message); 1716 } 1717 1718 if (allowUnknownFields) { 1719 logger.warning(msg.toString()); 1720 return; 1721 } 1722 1723 int firstErrorIndex = 0; 1724 if (allowUnknownExtensions) { 1725 boolean allUnknownExtensions = true; 1726 for (UnknownField field : unknownFields) { 1727 if (field.type == UnknownField.Type.FIELD) { 1728 allUnknownExtensions = false; 1729 break; 1730 } 1731 ++firstErrorIndex; 1732 } 1733 if (allUnknownExtensions) { 1734 logger.warning(msg.toString()); 1735 return; 1736 } 1737 } 1738 1739 String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":"); 1740 throw new ParseException( 1741 Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString()); 1742 } 1743 1744 /** 1745 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1746 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1747 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1748 public void merge( 1749 final CharSequence input, 1750 final ExtensionRegistry extensionRegistry, 1751 final Message.Builder builder) 1752 throws ParseException { 1753 final Tokenizer tokenizer = new Tokenizer(input); 1754 MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder); 1755 1756 List<UnknownField> unknownFields = new ArrayList<UnknownField>(); 1757 1758 while (!tokenizer.atEnd()) { 1759 mergeField(tokenizer, extensionRegistry, target, unknownFields); 1760 } 1761 1762 checkUnknownFields(unknownFields); 1763 } 1764 1765 1766 /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields)1767 private void mergeField( 1768 final Tokenizer tokenizer, 1769 final ExtensionRegistry extensionRegistry, 1770 final MessageReflection.MergeTarget target, 1771 List<UnknownField> unknownFields) 1772 throws ParseException { 1773 mergeField( 1774 tokenizer, 1775 extensionRegistry, 1776 target, 1777 parseInfoTreeBuilder, 1778 unknownFields); 1779 } 1780 1781 /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1782 private void mergeField( 1783 final Tokenizer tokenizer, 1784 final ExtensionRegistry extensionRegistry, 1785 final MessageReflection.MergeTarget target, 1786 TextFormatParseInfoTree.Builder parseTreeBuilder, 1787 List<UnknownField> unknownFields) 1788 throws ParseException { 1789 FieldDescriptor field = null; 1790 int startLine = tokenizer.getLine(); 1791 int startColumn = tokenizer.getColumn(); 1792 final Descriptor type = target.getDescriptorForType(); 1793 ExtensionRegistry.ExtensionInfo extension = null; 1794 1795 if ("google.protobuf.Any".equals(type.getFullName()) && tokenizer.tryConsume("[")) { 1796 mergeAnyFieldValue(tokenizer, extensionRegistry, target, parseTreeBuilder, unknownFields, 1797 type); 1798 return; 1799 } 1800 1801 if (tokenizer.tryConsume("[")) { 1802 // An extension. 1803 final StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier()); 1804 while (tokenizer.tryConsume(".")) { 1805 name.append('.'); 1806 name.append(tokenizer.consumeIdentifier()); 1807 } 1808 1809 extension = target.findExtensionByName(extensionRegistry, name.toString()); 1810 1811 if (extension == null) { 1812 String message = 1813 (tokenizer.getPreviousLine() + 1) 1814 + ":" 1815 + (tokenizer.getPreviousColumn() + 1) 1816 + ":\t" 1817 + type.getFullName() 1818 + ".[" 1819 + name 1820 + "]"; 1821 unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION)); 1822 } else { 1823 if (extension.descriptor.getContainingType() != type) { 1824 throw tokenizer.parseExceptionPreviousToken( 1825 "Extension \"" 1826 + name 1827 + "\" does not extend message type \"" 1828 + type.getFullName() 1829 + "\"."); 1830 } 1831 field = extension.descriptor; 1832 } 1833 1834 tokenizer.consume("]"); 1835 } else { 1836 final String name = tokenizer.consumeIdentifier(); 1837 field = type.findFieldByName(name); 1838 1839 // Group names are expected to be capitalized as they appear in the 1840 // .proto file, which actually matches their type names, not their field 1841 // names. 1842 if (field == null) { 1843 // Explicitly specify US locale so that this code does not break when 1844 // executing in Turkey. 1845 final String lowerName = name.toLowerCase(Locale.US); 1846 field = type.findFieldByName(lowerName); 1847 // If the case-insensitive match worked but the field is NOT a group, 1848 if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { 1849 field = null; 1850 } 1851 } 1852 // Again, special-case group names as described above. 1853 if (field != null 1854 && field.getType() == FieldDescriptor.Type.GROUP 1855 && !field.getMessageType().getName().equals(name)) { 1856 field = null; 1857 } 1858 1859 if (field == null) { 1860 String message = (tokenizer.getPreviousLine() + 1) 1861 + ":" 1862 + (tokenizer.getPreviousColumn() + 1) 1863 + ":\t" 1864 + type.getFullName() 1865 + "." 1866 + name; 1867 unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD)); 1868 } 1869 } 1870 1871 // Skips unknown fields. 1872 if (field == null) { 1873 // Try to guess the type of this field. 1874 // If this field is not a message, there should be a ":" between the 1875 // field name and the field value and also the field value should not 1876 // start with "{" or "<" which indicates the beginning of a message body. 1877 // If there is no ":" or there is a "{" or "<" after ":", this field has 1878 // to be a message or the input is ill-formed. 1879 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) { 1880 skipFieldValue(tokenizer); 1881 } else { 1882 skipFieldMessage(tokenizer); 1883 } 1884 return; 1885 } 1886 1887 // Handle potential ':'. 1888 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1889 tokenizer.tryConsume(":"); // optional 1890 if (parseTreeBuilder != null) { 1891 TextFormatParseInfoTree.Builder childParseTreeBuilder = 1892 parseTreeBuilder.getBuilderForSubMessageField(field); 1893 consumeFieldValues( 1894 tokenizer, 1895 extensionRegistry, 1896 target, 1897 field, 1898 extension, 1899 childParseTreeBuilder, 1900 unknownFields); 1901 } else { 1902 consumeFieldValues( 1903 tokenizer, 1904 extensionRegistry, 1905 target, 1906 field, 1907 extension, 1908 parseTreeBuilder, 1909 unknownFields); 1910 } 1911 } else { 1912 tokenizer.consume(":"); // required 1913 consumeFieldValues( 1914 tokenizer, 1915 extensionRegistry, 1916 target, 1917 field, 1918 extension, 1919 parseTreeBuilder, 1920 unknownFields); 1921 } 1922 1923 if (parseTreeBuilder != null) { 1924 parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn)); 1925 } 1926 1927 // For historical reasons, fields may optionally be separated by commas or 1928 // semicolons. 1929 if (!tokenizer.tryConsume(";")) { 1930 tokenizer.tryConsume(","); 1931 } 1932 } 1933 1934 /** 1935 * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}. 1936 */ consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1937 private void consumeFieldValues( 1938 final Tokenizer tokenizer, 1939 final ExtensionRegistry extensionRegistry, 1940 final MessageReflection.MergeTarget target, 1941 final FieldDescriptor field, 1942 final ExtensionRegistry.ExtensionInfo extension, 1943 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1944 List<UnknownField> unknownFields) 1945 throws ParseException { 1946 // Support specifying repeated field values as a comma-separated list. 1947 // Ex."foo: [1, 2, 3]" 1948 if (field.isRepeated() && tokenizer.tryConsume("[")) { 1949 if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty. 1950 while (true) { 1951 consumeFieldValue( 1952 tokenizer, 1953 extensionRegistry, 1954 target, 1955 field, 1956 extension, 1957 parseTreeBuilder, 1958 unknownFields); 1959 if (tokenizer.tryConsume("]")) { 1960 // End of list. 1961 break; 1962 } 1963 tokenizer.consume(","); 1964 } 1965 } 1966 } else { 1967 consumeFieldValue( 1968 tokenizer, 1969 extensionRegistry, 1970 target, 1971 field, 1972 extension, 1973 parseTreeBuilder, 1974 unknownFields); 1975 } 1976 } 1977 1978 /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */ consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1979 private void consumeFieldValue( 1980 final Tokenizer tokenizer, 1981 final ExtensionRegistry extensionRegistry, 1982 final MessageReflection.MergeTarget target, 1983 final FieldDescriptor field, 1984 final ExtensionRegistry.ExtensionInfo extension, 1985 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1986 List<UnknownField> unknownFields) 1987 throws ParseException { 1988 if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES 1989 && !field.isRepeated()) { 1990 if (target.hasField(field)) { 1991 throw tokenizer.parseExceptionPreviousToken( 1992 "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten."); 1993 } else if (field.getContainingOneof() != null 1994 && target.hasOneof(field.getContainingOneof())) { 1995 Descriptors.OneofDescriptor oneof = field.getContainingOneof(); 1996 throw tokenizer.parseExceptionPreviousToken( 1997 "Field \"" 1998 + field.getFullName() 1999 + "\" is specified along with field \"" 2000 + target.getOneofFieldDescriptor(oneof).getFullName() 2001 + "\", another member of oneof \"" 2002 + oneof.getName() 2003 + "\"."); 2004 } 2005 } 2006 2007 Object value = null; 2008 2009 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 2010 final String endToken; 2011 if (tokenizer.tryConsume("<")) { 2012 endToken = ">"; 2013 } else { 2014 tokenizer.consume("{"); 2015 endToken = "}"; 2016 } 2017 2018 // Try to parse human readable format of Any in the form: [type_url]: { ... } 2019 if (field.getMessageType().getFullName().equals("google.protobuf.Any") 2020 && tokenizer.tryConsume("[")) { 2021 // Use Proto reflection here since depending on Any would intoduce a cyclic dependency 2022 // (java_proto_library for any_java_proto depends on the protobuf_impl). 2023 Message anyBuilder = DynamicMessage.getDefaultInstance(field.getMessageType()); 2024 MessageReflection.MergeTarget anyField = target.newMergeTargetForField(field, anyBuilder); 2025 mergeAnyFieldValue(tokenizer, extensionRegistry, anyField, parseTreeBuilder, 2026 unknownFields, field.getMessageType()); 2027 value = anyField.finish(); 2028 tokenizer.consume(endToken); 2029 } else { 2030 Message defaultInstance = (extension == null) ? null : extension.defaultInstance; 2031 MessageReflection.MergeTarget subField = 2032 target.newMergeTargetForField(field, defaultInstance); 2033 2034 while (!tokenizer.tryConsume(endToken)) { 2035 if (tokenizer.atEnd()) { 2036 throw tokenizer.parseException("Expected \"" + endToken + "\"."); 2037 } 2038 mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, unknownFields); 2039 } 2040 2041 value = subField.finish(); 2042 } 2043 2044 } else { 2045 switch (field.getType()) { 2046 case INT32: 2047 case SINT32: 2048 case SFIXED32: 2049 value = tokenizer.consumeInt32(); 2050 break; 2051 2052 case INT64: 2053 case SINT64: 2054 case SFIXED64: 2055 value = tokenizer.consumeInt64(); 2056 break; 2057 2058 case UINT32: 2059 case FIXED32: 2060 value = tokenizer.consumeUInt32(); 2061 break; 2062 2063 case UINT64: 2064 case FIXED64: 2065 value = tokenizer.consumeUInt64(); 2066 break; 2067 2068 case FLOAT: 2069 value = tokenizer.consumeFloat(); 2070 break; 2071 2072 case DOUBLE: 2073 value = tokenizer.consumeDouble(); 2074 break; 2075 2076 case BOOL: 2077 value = tokenizer.consumeBoolean(); 2078 break; 2079 2080 case STRING: 2081 value = tokenizer.consumeString(); 2082 break; 2083 2084 case BYTES: 2085 value = tokenizer.consumeByteString(); 2086 break; 2087 2088 case ENUM: 2089 final EnumDescriptor enumType = field.getEnumType(); 2090 2091 if (tokenizer.lookingAtInteger()) { 2092 final int number = tokenizer.consumeInt32(); 2093 value = enumType.findValueByNumber(number); 2094 if (value == null) { 2095 String unknownValueMsg = 2096 "Enum type \"" 2097 + enumType.getFullName() 2098 + "\" has no value with number " 2099 + number 2100 + '.'; 2101 if (allowUnknownEnumValues) { 2102 logger.warning(unknownValueMsg); 2103 return; 2104 } else { 2105 throw tokenizer.parseExceptionPreviousToken( 2106 "Enum type \"" 2107 + enumType.getFullName() 2108 + "\" has no value with number " 2109 + number 2110 + '.'); 2111 } 2112 } 2113 } else { 2114 final String id = tokenizer.consumeIdentifier(); 2115 value = enumType.findValueByName(id); 2116 if (value == null) { 2117 String unknownValueMsg = 2118 "Enum type \"" 2119 + enumType.getFullName() 2120 + "\" has no value named \"" 2121 + id 2122 + "\"."; 2123 if (allowUnknownEnumValues) { 2124 logger.warning(unknownValueMsg); 2125 return; 2126 } else { 2127 throw tokenizer.parseExceptionPreviousToken(unknownValueMsg); 2128 } 2129 } 2130 } 2131 2132 break; 2133 2134 case MESSAGE: 2135 case GROUP: 2136 throw new RuntimeException("Can't get here."); 2137 } 2138 } 2139 2140 if (field.isRepeated()) { 2141 // TODO(b/29122459): If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode, 2142 // check for duplicate map keys here. 2143 target.addRepeatedField(field, value); 2144 } else { 2145 target.setField(field, value); 2146 } 2147 } 2148 mergeAnyFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, MergeTarget target, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, Descriptor anyDescriptor)2149 private void mergeAnyFieldValue( 2150 final Tokenizer tokenizer, 2151 final ExtensionRegistry extensionRegistry, 2152 MergeTarget target, 2153 final TextFormatParseInfoTree.Builder parseTreeBuilder, 2154 List<UnknownField> unknownFields, 2155 Descriptor anyDescriptor) 2156 throws ParseException { 2157 // Try to parse human readable format of Any in the form: [type_url]: { ... } 2158 StringBuilder typeUrlBuilder = new StringBuilder(); 2159 // Parse the type_url inside []. 2160 while (true) { 2161 typeUrlBuilder.append(tokenizer.consumeIdentifier()); 2162 if (tokenizer.tryConsume("]")) { 2163 break; 2164 } 2165 if (tokenizer.tryConsume("/")) { 2166 typeUrlBuilder.append("/"); 2167 } else if (tokenizer.tryConsume(".")) { 2168 typeUrlBuilder.append("."); 2169 } else { 2170 throw tokenizer.parseExceptionPreviousToken("Expected a valid type URL."); 2171 } 2172 } 2173 tokenizer.tryConsume(":"); 2174 final String anyEndToken; 2175 if (tokenizer.tryConsume("<")) { 2176 anyEndToken = ">"; 2177 } else { 2178 tokenizer.consume("{"); 2179 anyEndToken = "}"; 2180 } 2181 String typeUrl = typeUrlBuilder.toString(); 2182 Descriptor contentType = null; 2183 try { 2184 contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl); 2185 } catch (InvalidProtocolBufferException e) { 2186 throw tokenizer.parseException("Invalid valid type URL. Found: " + typeUrl); 2187 } 2188 if (contentType == null) { 2189 throw tokenizer.parseException( 2190 "Unable to parse Any of type: " 2191 + typeUrl 2192 + ". Please make sure that the TypeRegistry contains the descriptors for the given" 2193 + " types."); 2194 } 2195 Message.Builder contentBuilder = 2196 DynamicMessage.getDefaultInstance(contentType).newBuilderForType(); 2197 MessageReflection.BuilderAdapter contentTarget = 2198 new MessageReflection.BuilderAdapter(contentBuilder); 2199 while (!tokenizer.tryConsume(anyEndToken)) { 2200 mergeField(tokenizer, extensionRegistry, contentTarget, parseTreeBuilder, unknownFields); 2201 } 2202 2203 target.setField(anyDescriptor.findFieldByName("type_url"), typeUrlBuilder.toString()); 2204 target.setField( 2205 anyDescriptor.findFieldByName("value"), contentBuilder.build().toByteString()); 2206 } 2207 2208 /** Skips the next field including the field's name and value. */ skipField(Tokenizer tokenizer)2209 private static void skipField(Tokenizer tokenizer) throws ParseException { 2210 if (tokenizer.tryConsume("[")) { 2211 // Extension name. 2212 do { 2213 tokenizer.consumeIdentifier(); 2214 } while (tokenizer.tryConsume(".")); 2215 tokenizer.consume("]"); 2216 } else { 2217 tokenizer.consumeIdentifier(); 2218 } 2219 2220 // Try to guess the type of this field. 2221 // If this field is not a message, there should be a ":" between the 2222 // field name and the field value and also the field value should not 2223 // start with "{" or "<" which indicates the beginning of a message body. 2224 // If there is no ":" or there is a "{" or "<" after ":", this field has 2225 // to be a message or the input is ill-formed. 2226 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) { 2227 skipFieldValue(tokenizer); 2228 } else { 2229 skipFieldMessage(tokenizer); 2230 } 2231 // For historical reasons, fields may optionally be separated by commas or 2232 // semicolons. 2233 if (!tokenizer.tryConsume(";")) { 2234 tokenizer.tryConsume(","); 2235 } 2236 } 2237 2238 /** 2239 * Skips the whole body of a message including the beginning delimiter and the ending delimiter. 2240 */ skipFieldMessage(Tokenizer tokenizer)2241 private static void skipFieldMessage(Tokenizer tokenizer) throws ParseException { 2242 final String delimiter; 2243 if (tokenizer.tryConsume("<")) { 2244 delimiter = ">"; 2245 } else { 2246 tokenizer.consume("{"); 2247 delimiter = "}"; 2248 } 2249 while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { 2250 skipField(tokenizer); 2251 } 2252 tokenizer.consume(delimiter); 2253 } 2254 2255 /** Skips a field value. */ skipFieldValue(Tokenizer tokenizer)2256 private static void skipFieldValue(Tokenizer tokenizer) throws ParseException { 2257 if (tokenizer.tryConsumeString()) { 2258 while (tokenizer.tryConsumeString()) {} 2259 return; 2260 } 2261 if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean 2262 && !tokenizer.tryConsumeInt64() // includes int32 2263 && !tokenizer.tryConsumeUInt64() // includes uint32 2264 && !tokenizer.tryConsumeDouble() 2265 && !tokenizer.tryConsumeFloat()) { 2266 throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken); 2267 } 2268 } 2269 } 2270 2271 // ================================================================= 2272 // Utility functions 2273 // 2274 // Some of these methods are package-private because Descriptors.java uses 2275 // them. 2276 2277 /** 2278 * Escapes bytes in the format used in protocol buffer text format, which is the same as the 2279 * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are 2280 * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which 2281 * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences. 2282 */ escapeBytes(ByteString input)2283 public static String escapeBytes(ByteString input) { 2284 return TextFormatEscaper.escapeBytes(input); 2285 } 2286 2287 /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */ escapeBytes(byte[] input)2288 public static String escapeBytes(byte[] input) { 2289 return TextFormatEscaper.escapeBytes(input); 2290 } 2291 2292 /** 2293 * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex 2294 * escapes (starting with "\x") are also recognized. 2295 */ unescapeBytes(final CharSequence charString)2296 public static ByteString unescapeBytes(final CharSequence charString) 2297 throws InvalidEscapeSequenceException { 2298 // First convert the Java character sequence to UTF-8 bytes. 2299 ByteString input = ByteString.copyFromUtf8(charString.toString()); 2300 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 2301 // escapes can all be expressed with ASCII characters, so it is safe to 2302 // operate on bytes here. 2303 // 2304 // Unescaping the input byte array will result in a byte sequence that's no 2305 // longer than the input. That's because each escape sequence is between 2306 // two and four bytes long and stands for a single byte. 2307 final byte[] result = new byte[input.size()]; 2308 int pos = 0; 2309 for (int i = 0; i < input.size(); i++) { 2310 byte c = input.byteAt(i); 2311 if (c == '\\') { 2312 if (i + 1 < input.size()) { 2313 ++i; 2314 c = input.byteAt(i); 2315 if (isOctal(c)) { 2316 // Octal escape. 2317 int code = digitValue(c); 2318 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2319 ++i; 2320 code = code * 8 + digitValue(input.byteAt(i)); 2321 } 2322 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2323 ++i; 2324 code = code * 8 + digitValue(input.byteAt(i)); 2325 } 2326 // TODO: Check that 0 <= code && code <= 0xFF. 2327 result[pos++] = (byte) code; 2328 } else { 2329 switch (c) { 2330 case 'a': 2331 result[pos++] = 0x07; 2332 break; 2333 case 'b': 2334 result[pos++] = '\b'; 2335 break; 2336 case 'f': 2337 result[pos++] = '\f'; 2338 break; 2339 case 'n': 2340 result[pos++] = '\n'; 2341 break; 2342 case 'r': 2343 result[pos++] = '\r'; 2344 break; 2345 case 't': 2346 result[pos++] = '\t'; 2347 break; 2348 case 'v': 2349 result[pos++] = 0x0b; 2350 break; 2351 case '\\': 2352 result[pos++] = '\\'; 2353 break; 2354 case '\'': 2355 result[pos++] = '\''; 2356 break; 2357 case '"': 2358 result[pos++] = '\"'; 2359 break; 2360 2361 case 'x': 2362 // hex escape 2363 int code = 0; 2364 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2365 ++i; 2366 code = digitValue(input.byteAt(i)); 2367 } else { 2368 throw new InvalidEscapeSequenceException( 2369 "Invalid escape sequence: '\\x' with no digits"); 2370 } 2371 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2372 ++i; 2373 code = code * 16 + digitValue(input.byteAt(i)); 2374 } 2375 result[pos++] = (byte) code; 2376 break; 2377 2378 default: 2379 throw new InvalidEscapeSequenceException( 2380 "Invalid escape sequence: '\\" + (char) c + '\''); 2381 } 2382 } 2383 } else { 2384 throw new InvalidEscapeSequenceException( 2385 "Invalid escape sequence: '\\' at end of string."); 2386 } 2387 } else { 2388 result[pos++] = c; 2389 } 2390 } 2391 2392 return result.length == pos 2393 ? ByteString.wrap(result) // This reference has not been out of our control. 2394 : ByteString.copyFrom(result, 0, pos); 2395 } 2396 2397 /** 2398 * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid 2399 * escape sequence is seen. 2400 */ 2401 public static class InvalidEscapeSequenceException extends IOException { 2402 private static final long serialVersionUID = -8164033650142593304L; 2403 InvalidEscapeSequenceException(final String description)2404 InvalidEscapeSequenceException(final String description) { 2405 super(description); 2406 } 2407 } 2408 2409 /** 2410 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are 2411 * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes, 2412 * it's weird. 2413 */ escapeText(final String input)2414 static String escapeText(final String input) { 2415 return escapeBytes(ByteString.copyFromUtf8(input)); 2416 } 2417 2418 /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */ escapeDoubleQuotesAndBackslashes(final String input)2419 public static String escapeDoubleQuotesAndBackslashes(final String input) { 2420 return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input); 2421 } 2422 2423 /** 2424 * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes 2425 * (starting with "\x") are also recognized. 2426 */ unescapeText(final String input)2427 static String unescapeText(final String input) throws InvalidEscapeSequenceException { 2428 return unescapeBytes(input).toStringUtf8(); 2429 } 2430 2431 /** Is this an octal digit? */ isOctal(final byte c)2432 private static boolean isOctal(final byte c) { 2433 return '0' <= c && c <= '7'; 2434 } 2435 2436 /** Is this a hex digit? */ isHex(final byte c)2437 private static boolean isHex(final byte c) { 2438 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 2439 } 2440 2441 /** 2442 * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is 2443 * like {@code Character.digit()} but we don't accept non-ASCII digits. 2444 */ digitValue(final byte c)2445 private static int digitValue(final byte c) { 2446 if ('0' <= c && c <= '9') { 2447 return c - '0'; 2448 } else if ('a' <= c && c <= 'z') { 2449 return c - 'a' + 10; 2450 } else { 2451 return c - 'A' + 10; 2452 } 2453 } 2454 2455 /** 2456 * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code 2457 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2458 * and octal numbers, respectively. 2459 */ parseInt32(final String text)2460 static int parseInt32(final String text) throws NumberFormatException { 2461 return (int) parseInteger(text, true, false); 2462 } 2463 2464 /** 2465 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code 2466 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2467 * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned 2468 * since Java has no unsigned integer type. 2469 */ parseUInt32(final String text)2470 static int parseUInt32(final String text) throws NumberFormatException { 2471 return (int) parseInteger(text, false, false); 2472 } 2473 2474 /** 2475 * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code 2476 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2477 * and octal numbers, respectively. 2478 */ parseInt64(final String text)2479 static long parseInt64(final String text) throws NumberFormatException { 2480 return parseInteger(text, true, true); 2481 } 2482 2483 /** 2484 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code 2485 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2486 * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned 2487 * since Java has no unsigned long type. 2488 */ parseUInt64(final String text)2489 static long parseUInt64(final String text) throws NumberFormatException { 2490 return parseInteger(text, false, true); 2491 } 2492 parseInteger(final String text, final boolean isSigned, final boolean isLong)2493 private static long parseInteger(final String text, final boolean isSigned, final boolean isLong) 2494 throws NumberFormatException { 2495 int pos = 0; 2496 2497 boolean negative = false; 2498 if (text.startsWith("-", pos)) { 2499 if (!isSigned) { 2500 throw new NumberFormatException("Number must be positive: " + text); 2501 } 2502 ++pos; 2503 negative = true; 2504 } 2505 2506 int radix = 10; 2507 if (text.startsWith("0x", pos)) { 2508 pos += 2; 2509 radix = 16; 2510 } else if (text.startsWith("0", pos)) { 2511 radix = 8; 2512 } 2513 2514 final String numberText = text.substring(pos); 2515 2516 long result = 0; 2517 if (numberText.length() < 16) { 2518 // Can safely assume no overflow. 2519 result = Long.parseLong(numberText, radix); 2520 if (negative) { 2521 result = -result; 2522 } 2523 2524 // Check bounds. 2525 // No need to check for 64-bit numbers since they'd have to be 16 chars 2526 // or longer to overflow. 2527 if (!isLong) { 2528 if (isSigned) { 2529 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 2530 throw new NumberFormatException( 2531 "Number out of range for 32-bit signed integer: " + text); 2532 } 2533 } else { 2534 if (result >= (1L << 32) || result < 0) { 2535 throw new NumberFormatException( 2536 "Number out of range for 32-bit unsigned integer: " + text); 2537 } 2538 } 2539 } 2540 } else { 2541 BigInteger bigValue = new BigInteger(numberText, radix); 2542 if (negative) { 2543 bigValue = bigValue.negate(); 2544 } 2545 2546 // Check bounds. 2547 if (!isLong) { 2548 if (isSigned) { 2549 if (bigValue.bitLength() > 31) { 2550 throw new NumberFormatException( 2551 "Number out of range for 32-bit signed integer: " + text); 2552 } 2553 } else { 2554 if (bigValue.bitLength() > 32) { 2555 throw new NumberFormatException( 2556 "Number out of range for 32-bit unsigned integer: " + text); 2557 } 2558 } 2559 } else { 2560 if (isSigned) { 2561 if (bigValue.bitLength() > 63) { 2562 throw new NumberFormatException( 2563 "Number out of range for 64-bit signed integer: " + text); 2564 } 2565 } else { 2566 if (bigValue.bitLength() > 64) { 2567 throw new NumberFormatException( 2568 "Number out of range for 64-bit unsigned integer: " + text); 2569 } 2570 } 2571 } 2572 2573 result = bigValue.longValue(); 2574 } 2575 2576 return result; 2577 } 2578 } 2579