1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 package com.google.protobuf; 32 33 import com.google.protobuf.Descriptors.Descriptor; 34 import com.google.protobuf.Descriptors.EnumDescriptor; 35 import com.google.protobuf.Descriptors.EnumValueDescriptor; 36 import com.google.protobuf.Descriptors.FieldDescriptor; 37 import java.io.IOException; 38 import java.math.BigInteger; 39 import java.nio.CharBuffer; 40 import java.util.ArrayList; 41 import java.util.List; 42 import java.util.Locale; 43 import java.util.Map; 44 import java.util.logging.Logger; 45 import java.util.regex.Matcher; 46 import java.util.regex.Pattern; 47 48 /** 49 * Provide text parsing and formatting support for proto2 instances. The implementation largely 50 * follows google/protobuf/text_format.cc. 51 * 52 * @author wenboz@google.com Wenbo Zhu 53 * @author kenton@google.com Kenton Varda 54 */ 55 public final class TextFormat { TextFormat()56 private TextFormat() {} 57 58 private static final Logger logger = Logger.getLogger(TextFormat.class.getName()); 59 60 /** 61 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 62 * (This representation is the new version of the classic "ProtocolPrinter" output from the 63 * original Protocol Buffer system) 64 * 65 * @deprecated Use {@code printer().print(MessageOrBuilder, Appendable)} 66 */ 67 @Deprecated print(final MessageOrBuilder message, final Appendable output)68 public static void print(final MessageOrBuilder message, final Appendable output) 69 throws IOException { 70 printer().print(message, output); 71 } 72 73 /** 74 * Outputs a textual representation of {@code fields} to {@code output}. 75 * 76 * @deprecated Use {@code printer().print(UnknownFieldSet, Appendable)} 77 */ 78 @Deprecated print(final UnknownFieldSet fields, final Appendable output)79 public static void print(final UnknownFieldSet fields, final Appendable output) 80 throws IOException { 81 printer().print(fields, output); 82 } 83 84 /** 85 * Same as {@code print()}, except that non-ASCII characters are not escaped. 86 * 87 * @deprecated Use {@code printer().escapingNonAscii(false).print(MessageOrBuilder, Appendable)} 88 */ 89 @Deprecated printUnicode(final MessageOrBuilder message, final Appendable output)90 public static void printUnicode(final MessageOrBuilder message, final Appendable output) 91 throws IOException { 92 printer().escapingNonAscii(false).print(message, output); 93 } 94 95 /** 96 * Same as {@code print()}, except that non-ASCII characters are not escaped. 97 * 98 * @deprecated Use {@code printer().escapingNonAscii(false).print(UnknownFieldSet, Appendable)} 99 */ 100 @Deprecated printUnicode(final UnknownFieldSet fields, final Appendable output)101 public static void printUnicode(final UnknownFieldSet fields, final Appendable output) 102 throws IOException { 103 printer().escapingNonAscii(false).print(fields, output); 104 } 105 106 /** 107 * Generates a human readable form of this message, useful for debugging and other purposes, with 108 * no newline characters. This is just a trivial wrapper around 109 * {@link TextFormat.Printer#shortDebugString(MessageOrBuilder)}. 110 */ shortDebugString(final MessageOrBuilder message)111 public static String shortDebugString(final MessageOrBuilder message) { 112 return printer().shortDebugString(message); 113 } 114 115 /** 116 * Generates a human readable form of the field, useful for debugging and other purposes, with no 117 * newline characters. 118 * 119 * @deprecated Use {@code printer().shortDebugString(FieldDescriptor, Object)} 120 */ 121 @Deprecated shortDebugString(final FieldDescriptor field, final Object value)122 public static String shortDebugString(final FieldDescriptor field, final Object value) { 123 return printer().shortDebugString(field, value); 124 } 125 126 /** 127 * Generates a human readable form of the unknown fields, useful for debugging and other purposes, 128 * with no newline characters. 129 * 130 * @deprecated Use {@code printer().shortDebugString(UnknownFieldSet)} 131 */ 132 @Deprecated shortDebugString(final UnknownFieldSet fields)133 public static String shortDebugString(final UnknownFieldSet fields) { 134 return printer().shortDebugString(fields); 135 } 136 137 /** 138 * Like {@code print()}, but writes directly to a {@code String} and returns it. 139 * 140 * @deprecated Use {@link MessageOrBuilder#toString()} 141 */ 142 @Deprecated printToString(final MessageOrBuilder message)143 public static String printToString(final MessageOrBuilder message) { 144 return printer().printToString(message); 145 } 146 147 /** 148 * Like {@code print()}, but writes directly to a {@code String} and returns it. 149 * 150 * @deprecated Use {@link UnknownFieldSet#toString()} 151 */ 152 @Deprecated printToString(final UnknownFieldSet fields)153 public static String printToString(final UnknownFieldSet fields) { 154 return printer().printToString(fields); 155 } 156 157 /** 158 * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not 159 * escaped in backslash+octals. 160 * 161 * @deprecated Use {@code printer().escapingNonAscii(false).printToString(MessageOrBuilder)} 162 */ 163 @Deprecated printToUnicodeString(final MessageOrBuilder message)164 public static String printToUnicodeString(final MessageOrBuilder message) { 165 return printer().escapingNonAscii(false).printToString(message); 166 } 167 168 /** 169 * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not 170 * escaped in backslash+octals. 171 * 172 * @deprecated Use {@code printer().escapingNonAscii(false).printToString(UnknownFieldSet)} 173 */ 174 @Deprecated printToUnicodeString(final UnknownFieldSet fields)175 public static String printToUnicodeString(final UnknownFieldSet fields) { 176 return printer().escapingNonAscii(false).printToString(fields); 177 } 178 179 /** @deprecated Use {@code printer().printField(FieldDescriptor, Object, Appendable)} */ 180 @Deprecated printField( final FieldDescriptor field, final Object value, final Appendable output)181 public static void printField( 182 final FieldDescriptor field, final Object value, final Appendable output) throws IOException { 183 printer().printField(field, value, output); 184 } 185 186 /** @deprecated Use {@code printer().printFieldToString(FieldDescriptor, Object)} */ 187 @Deprecated printFieldToString(final FieldDescriptor field, final Object value)188 public static String printFieldToString(final FieldDescriptor field, final Object value) { 189 return printer().printFieldToString(field, value); 190 } 191 192 /** 193 * Outputs a unicode textual representation of the value of given field value. 194 * 195 * <p>Same as {@code printFieldValue()}, except that non-ASCII characters in string type fields 196 * are not escaped in backslash+octals. 197 * 198 * @deprecated Use {@code printer().escapingNonAscii(false).printFieldValue(FieldDescriptor, 199 * Object, Appendable)} 200 * @param field the descriptor of the field 201 * @param value the value of the field 202 * @param output the output to which to append the formatted value 203 * @throws ClassCastException if the value is not appropriate for the given field descriptor 204 * @throws IOException if there is an exception writing to the output 205 */ 206 @Deprecated printUnicodeFieldValue( final FieldDescriptor field, final Object value, final Appendable output)207 public static void printUnicodeFieldValue( 208 final FieldDescriptor field, final Object value, final Appendable output) throws IOException { 209 printer().escapingNonAscii(false).printFieldValue(field, value, output); 210 } 211 212 /** 213 * Outputs a textual representation of the value of given field value. 214 * 215 * @deprecated Use {@code printer().printFieldValue(FieldDescriptor, Object, Appendable)} 216 * @param field the descriptor of the field 217 * @param value the value of the field 218 * @param output the output to which to append the formatted value 219 * @throws ClassCastException if the value is not appropriate for the given field descriptor 220 * @throws IOException if there is an exception writing to the output 221 */ 222 @Deprecated printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)223 public static void printFieldValue( 224 final FieldDescriptor field, final Object value, final Appendable output) throws IOException { 225 printer().printFieldValue(field, value, output); 226 } 227 228 /** 229 * Outputs a textual representation of the value of an unknown field. 230 * 231 * @param tag the field's tag number 232 * @param value the value of the field 233 * @param output the output to which to append the formatted value 234 * @throws ClassCastException if the value is not appropriate for the given field descriptor 235 * @throws IOException if there is an exception writing to the output 236 */ printUnknownFieldValue( final int tag, final Object value, final Appendable output)237 public static void printUnknownFieldValue( 238 final int tag, final Object value, final Appendable output) throws IOException { 239 printUnknownFieldValue(tag, value, multiLineOutput(output)); 240 } 241 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)242 private static void printUnknownFieldValue( 243 final int tag, final Object value, final TextGenerator generator) throws IOException { 244 switch (WireFormat.getTagWireType(tag)) { 245 case WireFormat.WIRETYPE_VARINT: 246 generator.print(unsignedToString((Long) value)); 247 break; 248 case WireFormat.WIRETYPE_FIXED32: 249 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 250 break; 251 case WireFormat.WIRETYPE_FIXED64: 252 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 253 break; 254 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 255 try { 256 // Try to parse and print the field as an embedded message 257 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 258 generator.print("{"); 259 generator.eol(); 260 generator.indent(); 261 Printer.printUnknownFields(message, generator); 262 generator.outdent(); 263 generator.print("}"); 264 } catch (InvalidProtocolBufferException e) { 265 // If not parseable as a message, print as a String 266 generator.print("\""); 267 generator.print(escapeBytes((ByteString) value)); 268 generator.print("\""); 269 } 270 break; 271 case WireFormat.WIRETYPE_START_GROUP: 272 Printer.printUnknownFields((UnknownFieldSet) value, generator); 273 break; 274 default: 275 throw new IllegalArgumentException("Bad tag: " + tag); 276 } 277 } 278 279 /** Printer instance which escapes non-ASCII characters. */ printer()280 public static Printer printer() { 281 return Printer.DEFAULT; 282 } 283 284 /** Helper class for converting protobufs to text. */ 285 public static final class Printer { 286 287 // Printer instance which escapes non-ASCII characters. 288 private static final Printer DEFAULT = new Printer(true); 289 290 /** Whether to escape non ASCII characters with backslash and octal. */ 291 private final boolean escapeNonAscii; 292 Printer(boolean escapeNonAscii)293 private Printer(boolean escapeNonAscii) { 294 this.escapeNonAscii = escapeNonAscii; 295 } 296 297 /** 298 * Return a new Printer instance with the specified escape mode. 299 * 300 * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the 301 * default behavior. If false, the new Printer will print non-ASCII characters as is. In 302 * either case, the new Printer still escapes newlines and quotes in strings. 303 * @return a new Printer that clones all other configurations from the current {@link Printer}, 304 * with the escape mode set to the given parameter. 305 */ escapingNonAscii(boolean escapeNonAscii)306 public Printer escapingNonAscii(boolean escapeNonAscii) { 307 return new Printer(escapeNonAscii); 308 } 309 310 /** 311 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 312 * (This representation is the new version of the classic "ProtocolPrinter" output from the 313 * original Protocol Buffer system) 314 */ print(final MessageOrBuilder message, final Appendable output)315 public void print(final MessageOrBuilder message, final Appendable output) throws IOException { 316 print(message, multiLineOutput(output)); 317 } 318 319 /** Outputs a textual representation of {@code fields} to {@code output}. */ print(final UnknownFieldSet fields, final Appendable output)320 public void print(final UnknownFieldSet fields, final Appendable output) throws IOException { 321 printUnknownFields(fields, multiLineOutput(output)); 322 } 323 print(final MessageOrBuilder message, final TextGenerator generator)324 private void print(final MessageOrBuilder message, final TextGenerator generator) 325 throws IOException { 326 printMessage(message, generator); 327 } 328 printFieldToString(final FieldDescriptor field, final Object value)329 public String printFieldToString(final FieldDescriptor field, final Object value) { 330 try { 331 final StringBuilder text = new StringBuilder(); 332 printField(field, value, text); 333 return text.toString(); 334 } catch (IOException e) { 335 throw new IllegalStateException(e); 336 } 337 } 338 printField(final FieldDescriptor field, final Object value, final Appendable output)339 public void printField(final FieldDescriptor field, final Object value, final Appendable output) 340 throws IOException { 341 printField(field, value, multiLineOutput(output)); 342 } 343 printField( final FieldDescriptor field, final Object value, final TextGenerator generator)344 private void printField( 345 final FieldDescriptor field, final Object value, final TextGenerator generator) 346 throws IOException { 347 if (field.isRepeated()) { 348 // Repeated field. Print each element. 349 for (Object element : (List<?>) value) { 350 printSingleField(field, element, generator); 351 } 352 } else { 353 printSingleField(field, value, generator); 354 } 355 } 356 357 /** 358 * Outputs a textual representation of the value of given field value. 359 * 360 * @param field the descriptor of the field 361 * @param value the value of the field 362 * @param output the output to which to append the formatted value 363 * @throws ClassCastException if the value is not appropriate for the given field descriptor 364 * @throws IOException if there is an exception writing to the output 365 */ printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)366 public void printFieldValue( 367 final FieldDescriptor field, final Object value, final Appendable output) 368 throws IOException { 369 printFieldValue(field, value, multiLineOutput(output)); 370 } 371 printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)372 private void printFieldValue( 373 final FieldDescriptor field, final Object value, final TextGenerator generator) 374 throws IOException { 375 switch (field.getType()) { 376 case INT32: 377 case SINT32: 378 case SFIXED32: 379 generator.print(((Integer) value).toString()); 380 break; 381 382 case INT64: 383 case SINT64: 384 case SFIXED64: 385 generator.print(((Long) value).toString()); 386 break; 387 388 case BOOL: 389 generator.print(((Boolean) value).toString()); 390 break; 391 392 case FLOAT: 393 generator.print(((Float) value).toString()); 394 break; 395 396 case DOUBLE: 397 generator.print(((Double) value).toString()); 398 break; 399 400 case UINT32: 401 case FIXED32: 402 generator.print(unsignedToString((Integer) value)); 403 break; 404 405 case UINT64: 406 case FIXED64: 407 generator.print(unsignedToString((Long) value)); 408 break; 409 410 case STRING: 411 generator.print("\""); 412 generator.print( 413 escapeNonAscii 414 ? TextFormatEscaper.escapeText((String) value) 415 : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n")); 416 generator.print("\""); 417 break; 418 419 case BYTES: 420 generator.print("\""); 421 if (value instanceof ByteString) { 422 generator.print(escapeBytes((ByteString) value)); 423 } else { 424 generator.print(escapeBytes((byte[]) value)); 425 } 426 generator.print("\""); 427 break; 428 429 case ENUM: 430 generator.print(((EnumValueDescriptor) value).getName()); 431 break; 432 433 case MESSAGE: 434 case GROUP: 435 print((Message) value, generator); 436 break; 437 } 438 } 439 440 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final MessageOrBuilder message)441 public String printToString(final MessageOrBuilder message) { 442 try { 443 final StringBuilder text = new StringBuilder(); 444 print(message, text); 445 return text.toString(); 446 } catch (IOException e) { 447 throw new IllegalStateException(e); 448 } 449 } 450 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final UnknownFieldSet fields)451 public String printToString(final UnknownFieldSet fields) { 452 try { 453 final StringBuilder text = new StringBuilder(); 454 print(fields, text); 455 return text.toString(); 456 } catch (IOException e) { 457 throw new IllegalStateException(e); 458 } 459 } 460 461 /** 462 * Generates a human readable form of this message, useful for debugging and other purposes, 463 * with no newline characters. 464 */ shortDebugString(final MessageOrBuilder message)465 public String shortDebugString(final MessageOrBuilder message) { 466 try { 467 final StringBuilder text = new StringBuilder(); 468 print(message, singleLineOutput(text)); 469 return text.toString(); 470 } catch (IOException e) { 471 throw new IllegalStateException(e); 472 } 473 } 474 475 /** 476 * Generates a human readable form of the field, useful for debugging and other purposes, with 477 * no newline characters. 478 */ shortDebugString(final FieldDescriptor field, final Object value)479 public String shortDebugString(final FieldDescriptor field, final Object value) { 480 try { 481 final StringBuilder text = new StringBuilder(); 482 printField(field, value, singleLineOutput(text)); 483 return text.toString(); 484 } catch (IOException e) { 485 throw new IllegalStateException(e); 486 } 487 } 488 489 /** 490 * Generates a human readable form of the unknown fields, useful for debugging and other 491 * purposes, with no newline characters. 492 */ shortDebugString(final UnknownFieldSet fields)493 public String shortDebugString(final UnknownFieldSet fields) { 494 try { 495 final StringBuilder text = new StringBuilder(); 496 printUnknownFields(fields, singleLineOutput(text)); 497 return text.toString(); 498 } catch (IOException e) { 499 throw new IllegalStateException(e); 500 } 501 } 502 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)503 private static void printUnknownFieldValue( 504 final int tag, final Object value, final TextGenerator generator) throws IOException { 505 switch (WireFormat.getTagWireType(tag)) { 506 case WireFormat.WIRETYPE_VARINT: 507 generator.print(unsignedToString((Long) value)); 508 break; 509 case WireFormat.WIRETYPE_FIXED32: 510 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 511 break; 512 case WireFormat.WIRETYPE_FIXED64: 513 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 514 break; 515 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 516 try { 517 // Try to parse and print the field as an embedded message 518 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 519 generator.print("{"); 520 generator.eol(); 521 generator.indent(); 522 printUnknownFields(message, generator); 523 generator.outdent(); 524 generator.print("}"); 525 } catch (InvalidProtocolBufferException e) { 526 // If not parseable as a message, print as a String 527 generator.print("\""); 528 generator.print(escapeBytes((ByteString) value)); 529 generator.print("\""); 530 } 531 break; 532 case WireFormat.WIRETYPE_START_GROUP: 533 printUnknownFields((UnknownFieldSet) value, generator); 534 break; 535 default: 536 throw new IllegalArgumentException("Bad tag: " + tag); 537 } 538 } 539 printMessage(final MessageOrBuilder message, final TextGenerator generator)540 private void printMessage(final MessageOrBuilder message, final TextGenerator generator) 541 throws IOException { 542 for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) { 543 printField(field.getKey(), field.getValue(), generator); 544 } 545 printUnknownFields(message.getUnknownFields(), generator); 546 } 547 printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)548 private void printSingleField( 549 final FieldDescriptor field, final Object value, final TextGenerator generator) 550 throws IOException { 551 if (field.isExtension()) { 552 generator.print("["); 553 // We special-case MessageSet elements for compatibility with proto1. 554 if (field.getContainingType().getOptions().getMessageSetWireFormat() 555 && (field.getType() == FieldDescriptor.Type.MESSAGE) 556 && (field.isOptional()) 557 // object equality 558 && (field.getExtensionScope() == field.getMessageType())) { 559 generator.print(field.getMessageType().getFullName()); 560 } else { 561 generator.print(field.getFullName()); 562 } 563 generator.print("]"); 564 } else { 565 if (field.getType() == FieldDescriptor.Type.GROUP) { 566 // Groups must be serialized with their original capitalization. 567 generator.print(field.getMessageType().getName()); 568 } else { 569 generator.print(field.getName()); 570 } 571 } 572 573 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 574 generator.print(" {"); 575 generator.eol(); 576 generator.indent(); 577 } else { 578 generator.print(": "); 579 } 580 581 printFieldValue(field, value, generator); 582 583 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 584 generator.outdent(); 585 generator.print("}"); 586 } 587 generator.eol(); 588 } 589 printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator)590 private static void printUnknownFields( 591 final UnknownFieldSet unknownFields, final TextGenerator generator) throws IOException { 592 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) { 593 final int number = entry.getKey(); 594 final UnknownFieldSet.Field field = entry.getValue(); 595 printUnknownField(number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator); 596 printUnknownField(number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator); 597 printUnknownField(number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator); 598 printUnknownField( 599 number, 600 WireFormat.WIRETYPE_LENGTH_DELIMITED, 601 field.getLengthDelimitedList(), 602 generator); 603 for (final UnknownFieldSet value : field.getGroupList()) { 604 generator.print(entry.getKey().toString()); 605 generator.print(" {"); 606 generator.eol(); 607 generator.indent(); 608 printUnknownFields(value, generator); 609 generator.outdent(); 610 generator.print("}"); 611 generator.eol(); 612 } 613 } 614 } 615 printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator)616 private static void printUnknownField( 617 final int number, final int wireType, final List<?> values, final TextGenerator generator) 618 throws IOException { 619 for (final Object value : values) { 620 generator.print(String.valueOf(number)); 621 generator.print(": "); 622 printUnknownFieldValue(wireType, value, generator); 623 generator.eol(); 624 } 625 } 626 } 627 628 /** Convert an unsigned 32-bit integer to a string. */ unsignedToString(final int value)629 public static String unsignedToString(final int value) { 630 if (value >= 0) { 631 return Integer.toString(value); 632 } else { 633 return Long.toString(value & 0x00000000FFFFFFFFL); 634 } 635 } 636 637 /** Convert an unsigned 64-bit integer to a string. */ unsignedToString(final long value)638 public static String unsignedToString(final long value) { 639 if (value >= 0) { 640 return Long.toString(value); 641 } else { 642 // Pull off the most-significant bit so that BigInteger doesn't think 643 // the number is negative, then set it again using setBit(). 644 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString(); 645 } 646 } 647 multiLineOutput(Appendable output)648 private static TextGenerator multiLineOutput(Appendable output) { 649 return new TextGenerator(output, false); 650 } 651 singleLineOutput(Appendable output)652 private static TextGenerator singleLineOutput(Appendable output) { 653 return new TextGenerator(output, true); 654 } 655 656 /** An inner class for writing text to the output stream. */ 657 private static final class TextGenerator { 658 private final Appendable output; 659 private final StringBuilder indent = new StringBuilder(); 660 private final boolean singleLineMode; 661 // While technically we are "at the start of a line" at the very beginning of the output, all 662 // we would do in response to this is emit the (zero length) indentation, so it has no effect. 663 // Setting it false here does however suppress an unwanted leading space in single-line mode. 664 private boolean atStartOfLine = false; 665 TextGenerator(final Appendable output, boolean singleLineMode)666 private TextGenerator(final Appendable output, boolean singleLineMode) { 667 this.output = output; 668 this.singleLineMode = singleLineMode; 669 } 670 671 /** 672 * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the 673 * beginning of each line of text. Indent() may be called multiple times to produce deeper 674 * indents. 675 */ indent()676 public void indent() { 677 indent.append(" "); 678 } 679 680 /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */ outdent()681 public void outdent() { 682 final int length = indent.length(); 683 if (length == 0) { 684 throw new IllegalArgumentException(" Outdent() without matching Indent()."); 685 } 686 indent.setLength(length - 2); 687 } 688 689 /** 690 * Print text to the output stream. Bare newlines are never expected to be passed to this 691 * method; to indicate the end of a line, call "eol()". 692 */ print(final CharSequence text)693 public void print(final CharSequence text) throws IOException { 694 if (atStartOfLine) { 695 atStartOfLine = false; 696 output.append(singleLineMode ? " " : indent); 697 } 698 output.append(text); 699 } 700 701 /** 702 * Signifies reaching the "end of the current line" in the output. In single-line mode, this 703 * does not result in a newline being emitted, but ensures that a separating space is written 704 * before the next output. 705 */ eol()706 public void eol() throws IOException { 707 if (!singleLineMode) { 708 output.append("\n"); 709 } 710 atStartOfLine = true; 711 } 712 } 713 714 // ================================================================= 715 // Parsing 716 717 /** 718 * Represents a stream of tokens parsed from a {@code String}. 719 * 720 * <p>The Java standard library provides many classes that you might think would be useful for 721 * implementing this, but aren't. For example: 722 * 723 * <ul> 724 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something 725 * that would get us close to what we want -- except for one fatal flaw: It automatically 726 * un-escapes strings using Java escape sequences, which do not include all the escape 727 * sequences we need to support (e.g. '\x'). 728 * <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular 729 * expressions out of a stream (so we wouldn't have to load the entire input into a single 730 * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with 731 * some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and 732 * ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code 733 * Scanner} provides no way to inspect the contents of delimiters, making it impossible to 734 * keep track of line and column numbers. 735 * </ul> 736 * 737 * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need 738 * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least. 739 * Unfortunately, this implies that we need to have the entire input in one contiguous string. 740 */ 741 private static final class Tokenizer { 742 private final CharSequence text; 743 private final Matcher matcher; 744 private String currentToken; 745 746 // The character index within this.text at which the current token begins. 747 private int pos = 0; 748 749 // The line and column numbers of the current token. 750 private int line = 0; 751 private int column = 0; 752 753 // The line and column numbers of the previous token (allows throwing 754 // errors *after* consuming). 755 private int previousLine = 0; 756 private int previousColumn = 0; 757 758 // We use possessive quantifiers (*+ and ++) because otherwise the Java 759 // regex matcher has stack overflows on large inputs. 760 private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 761 private static final Pattern TOKEN = 762 Pattern.compile( 763 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier 764 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number 765 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string 766 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 767 Pattern.MULTILINE); 768 769 private static final Pattern DOUBLE_INFINITY = 770 Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE); 771 private static final Pattern FLOAT_INFINITY = 772 Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE); 773 private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE); 774 775 /** Construct a tokenizer that parses tokens from the given text. */ Tokenizer(final CharSequence text)776 private Tokenizer(final CharSequence text) { 777 this.text = text; 778 this.matcher = WHITESPACE.matcher(text); 779 skipWhitespace(); 780 nextToken(); 781 } 782 getPreviousLine()783 int getPreviousLine() { 784 return previousLine; 785 } 786 getPreviousColumn()787 int getPreviousColumn() { 788 return previousColumn; 789 } 790 getLine()791 int getLine() { 792 return line; 793 } 794 getColumn()795 int getColumn() { 796 return column; 797 } 798 799 /** Are we at the end of the input? */ atEnd()800 public boolean atEnd() { 801 return currentToken.length() == 0; 802 } 803 804 /** Advance to the next token. */ nextToken()805 public void nextToken() { 806 previousLine = line; 807 previousColumn = column; 808 809 // Advance the line counter to the current position. 810 while (pos < matcher.regionStart()) { 811 if (text.charAt(pos) == '\n') { 812 ++line; 813 column = 0; 814 } else { 815 ++column; 816 } 817 ++pos; 818 } 819 820 // Match the next token. 821 if (matcher.regionStart() == matcher.regionEnd()) { 822 // EOF 823 currentToken = ""; 824 } else { 825 matcher.usePattern(TOKEN); 826 if (matcher.lookingAt()) { 827 currentToken = matcher.group(); 828 matcher.region(matcher.end(), matcher.regionEnd()); 829 } else { 830 // Take one character. 831 currentToken = String.valueOf(text.charAt(pos)); 832 matcher.region(pos + 1, matcher.regionEnd()); 833 } 834 835 skipWhitespace(); 836 } 837 } 838 839 /** Skip over any whitespace so that the matcher region starts at the next token. */ skipWhitespace()840 private void skipWhitespace() { 841 matcher.usePattern(WHITESPACE); 842 if (matcher.lookingAt()) { 843 matcher.region(matcher.end(), matcher.regionEnd()); 844 } 845 } 846 847 /** 848 * If the next token exactly matches {@code token}, consume it and return {@code true}. 849 * Otherwise, return {@code false} without doing anything. 850 */ tryConsume(final String token)851 public boolean tryConsume(final String token) { 852 if (currentToken.equals(token)) { 853 nextToken(); 854 return true; 855 } else { 856 return false; 857 } 858 } 859 860 /** 861 * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link 862 * ParseException}. 863 */ consume(final String token)864 public void consume(final String token) throws ParseException { 865 if (!tryConsume(token)) { 866 throw parseException("Expected \"" + token + "\"."); 867 } 868 } 869 870 /** Returns {@code true} if the next token is an integer, but does not consume it. */ lookingAtInteger()871 public boolean lookingAtInteger() { 872 if (currentToken.length() == 0) { 873 return false; 874 } 875 876 final char c = currentToken.charAt(0); 877 return ('0' <= c && c <= '9') || c == '-' || c == '+'; 878 } 879 880 /** Returns {@code true} if the current token's text is equal to that specified. */ lookingAt(String text)881 public boolean lookingAt(String text) { 882 return currentToken.equals(text); 883 } 884 885 /** 886 * If the next token is an identifier, consume it and return its value. Otherwise, throw a 887 * {@link ParseException}. 888 */ consumeIdentifier()889 public String consumeIdentifier() throws ParseException { 890 for (int i = 0; i < currentToken.length(); i++) { 891 final char c = currentToken.charAt(i); 892 if (('a' <= c && c <= 'z') 893 || ('A' <= c && c <= 'Z') 894 || ('0' <= c && c <= '9') 895 || (c == '_') 896 || (c == '.')) { 897 // OK 898 } else { 899 throw parseException("Expected identifier. Found '" + currentToken + "'"); 900 } 901 } 902 903 final String result = currentToken; 904 nextToken(); 905 return result; 906 } 907 908 /** 909 * If the next token is an identifier, consume it and return {@code true}. Otherwise, return 910 * {@code false} without doing anything. 911 */ tryConsumeIdentifier()912 public boolean tryConsumeIdentifier() { 913 try { 914 consumeIdentifier(); 915 return true; 916 } catch (ParseException e) { 917 return false; 918 } 919 } 920 921 /** 922 * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise, 923 * throw a {@link ParseException}. 924 */ consumeInt32()925 public int consumeInt32() throws ParseException { 926 try { 927 final int result = parseInt32(currentToken); 928 nextToken(); 929 return result; 930 } catch (NumberFormatException e) { 931 throw integerParseException(e); 932 } 933 } 934 935 /** 936 * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise, 937 * throw a {@link ParseException}. 938 */ consumeUInt32()939 public int consumeUInt32() throws ParseException { 940 try { 941 final int result = parseUInt32(currentToken); 942 nextToken(); 943 return result; 944 } catch (NumberFormatException e) { 945 throw integerParseException(e); 946 } 947 } 948 949 /** 950 * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise, 951 * throw a {@link ParseException}. 952 */ consumeInt64()953 public long consumeInt64() throws ParseException { 954 try { 955 final long result = parseInt64(currentToken); 956 nextToken(); 957 return result; 958 } catch (NumberFormatException e) { 959 throw integerParseException(e); 960 } 961 } 962 963 /** 964 * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise, 965 * return {@code false} without doing anything. 966 */ tryConsumeInt64()967 public boolean tryConsumeInt64() { 968 try { 969 consumeInt64(); 970 return true; 971 } catch (ParseException e) { 972 return false; 973 } 974 } 975 976 /** 977 * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise, 978 * throw a {@link ParseException}. 979 */ consumeUInt64()980 public long consumeUInt64() throws ParseException { 981 try { 982 final long result = parseUInt64(currentToken); 983 nextToken(); 984 return result; 985 } catch (NumberFormatException e) { 986 throw integerParseException(e); 987 } 988 } 989 990 /** 991 * If the next token is a 64-bit unsigned integer, consume it and return {@code true}. 992 * Otherwise, return {@code false} without doing anything. 993 */ tryConsumeUInt64()994 public boolean tryConsumeUInt64() { 995 try { 996 consumeUInt64(); 997 return true; 998 } catch (ParseException e) { 999 return false; 1000 } 1001 } 1002 1003 /** 1004 * If the next token is a double, consume it and return its value. Otherwise, throw a {@link 1005 * ParseException}. 1006 */ consumeDouble()1007 public double consumeDouble() throws ParseException { 1008 // We need to parse infinity and nan separately because 1009 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 1010 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 1011 final boolean negative = currentToken.startsWith("-"); 1012 nextToken(); 1013 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 1014 } 1015 if (currentToken.equalsIgnoreCase("nan")) { 1016 nextToken(); 1017 return Double.NaN; 1018 } 1019 try { 1020 final double result = Double.parseDouble(currentToken); 1021 nextToken(); 1022 return result; 1023 } catch (NumberFormatException e) { 1024 throw floatParseException(e); 1025 } 1026 } 1027 1028 /** 1029 * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code 1030 * false} without doing anything. 1031 */ tryConsumeDouble()1032 public boolean tryConsumeDouble() { 1033 try { 1034 consumeDouble(); 1035 return true; 1036 } catch (ParseException e) { 1037 return false; 1038 } 1039 } 1040 1041 /** 1042 * If the next token is a float, consume it and return its value. Otherwise, throw a {@link 1043 * ParseException}. 1044 */ consumeFloat()1045 public float consumeFloat() throws ParseException { 1046 // We need to parse infinity and nan separately because 1047 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 1048 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 1049 final boolean negative = currentToken.startsWith("-"); 1050 nextToken(); 1051 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 1052 } 1053 if (FLOAT_NAN.matcher(currentToken).matches()) { 1054 nextToken(); 1055 return Float.NaN; 1056 } 1057 try { 1058 final float result = Float.parseFloat(currentToken); 1059 nextToken(); 1060 return result; 1061 } catch (NumberFormatException e) { 1062 throw floatParseException(e); 1063 } 1064 } 1065 1066 /** 1067 * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code 1068 * false} without doing anything. 1069 */ tryConsumeFloat()1070 public boolean tryConsumeFloat() { 1071 try { 1072 consumeFloat(); 1073 return true; 1074 } catch (ParseException e) { 1075 return false; 1076 } 1077 } 1078 1079 /** 1080 * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link 1081 * ParseException}. 1082 */ consumeBoolean()1083 public boolean consumeBoolean() throws ParseException { 1084 if (currentToken.equals("true") 1085 || currentToken.equals("True") 1086 || currentToken.equals("t") 1087 || currentToken.equals("1")) { 1088 nextToken(); 1089 return true; 1090 } else if (currentToken.equals("false") 1091 || currentToken.equals("False") 1092 || currentToken.equals("f") 1093 || currentToken.equals("0")) { 1094 nextToken(); 1095 return false; 1096 } else { 1097 throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\"."); 1098 } 1099 } 1100 1101 /** 1102 * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw 1103 * a {@link ParseException}. 1104 */ consumeString()1105 public String consumeString() throws ParseException { 1106 return consumeByteString().toStringUtf8(); 1107 } 1108 1109 /** If the next token is a string, consume it and return true. Otherwise, return false. */ tryConsumeString()1110 public boolean tryConsumeString() { 1111 try { 1112 consumeString(); 1113 return true; 1114 } catch (ParseException e) { 1115 return false; 1116 } 1117 } 1118 1119 /** 1120 * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return 1121 * it. Otherwise, throw a {@link ParseException}. 1122 */ consumeByteString()1123 public ByteString consumeByteString() throws ParseException { 1124 List<ByteString> list = new ArrayList<ByteString>(); 1125 consumeByteString(list); 1126 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 1127 consumeByteString(list); 1128 } 1129 return ByteString.copyFrom(list); 1130 } 1131 1132 /** 1133 * Like {@link #consumeByteString()} but adds each token of the string to the given list. String 1134 * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically 1135 * concatenated, like in C or Python. 1136 */ consumeByteString(List<ByteString> list)1137 private void consumeByteString(List<ByteString> list) throws ParseException { 1138 final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; 1139 if (quote != '\"' && quote != '\'') { 1140 throw parseException("Expected string."); 1141 } 1142 1143 if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) { 1144 throw parseException("String missing ending quote."); 1145 } 1146 1147 try { 1148 final String escaped = currentToken.substring(1, currentToken.length() - 1); 1149 final ByteString result = unescapeBytes(escaped); 1150 nextToken(); 1151 list.add(result); 1152 } catch (InvalidEscapeSequenceException e) { 1153 throw parseException(e.getMessage()); 1154 } 1155 } 1156 1157 /** 1158 * Returns a {@link ParseException} with the current line and column numbers in the description, 1159 * suitable for throwing. 1160 */ parseException(final String description)1161 public ParseException parseException(final String description) { 1162 // Note: People generally prefer one-based line and column numbers. 1163 return new ParseException(line + 1, column + 1, description); 1164 } 1165 1166 /** 1167 * Returns a {@link ParseException} with the line and column numbers of the previous token in 1168 * the description, suitable for throwing. 1169 */ parseExceptionPreviousToken(final String description)1170 public ParseException parseExceptionPreviousToken(final String description) { 1171 // Note: People generally prefer one-based line and column numbers. 1172 return new ParseException(previousLine + 1, previousColumn + 1, description); 1173 } 1174 1175 /** 1176 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1177 * when trying to parse an integer. 1178 */ integerParseException(final NumberFormatException e)1179 private ParseException integerParseException(final NumberFormatException e) { 1180 return parseException("Couldn't parse integer: " + e.getMessage()); 1181 } 1182 1183 /** 1184 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1185 * when trying to parse a float or double. 1186 */ floatParseException(final NumberFormatException e)1187 private ParseException floatParseException(final NumberFormatException e) { 1188 return parseException("Couldn't parse number: " + e.getMessage()); 1189 } 1190 1191 /** 1192 * Returns a {@link UnknownFieldParseException} with the line and column numbers of the previous 1193 * token in the description, and the unknown field name, suitable for throwing. 1194 */ unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1195 public UnknownFieldParseException unknownFieldParseExceptionPreviousToken( 1196 final String unknownField, final String description) { 1197 // Note: People generally prefer one-based line and column numbers. 1198 return new UnknownFieldParseException( 1199 previousLine + 1, previousColumn + 1, unknownField, description); 1200 } 1201 } 1202 1203 /** Thrown when parsing an invalid text format message. */ 1204 public static class ParseException extends IOException { 1205 private static final long serialVersionUID = 3196188060225107702L; 1206 1207 private final int line; 1208 private final int column; 1209 1210 /** Create a new instance, with -1 as the line and column numbers. */ ParseException(final String message)1211 public ParseException(final String message) { 1212 this(-1, -1, message); 1213 } 1214 1215 /** 1216 * Create a new instance 1217 * 1218 * @param line the line number where the parse error occurred, using 1-offset. 1219 * @param column the column number where the parser error occurred, using 1-offset. 1220 */ ParseException(final int line, final int column, final String message)1221 public ParseException(final int line, final int column, final String message) { 1222 super(Integer.toString(line) + ":" + column + ": " + message); 1223 this.line = line; 1224 this.column = column; 1225 } 1226 1227 /** 1228 * Return the line where the parse exception occurred, or -1 when none is provided. The value is 1229 * specified as 1-offset, so the first line is line 1. 1230 */ getLine()1231 public int getLine() { 1232 return line; 1233 } 1234 1235 /** 1236 * Return the column where the parse exception occurred, or -1 when none is provided. The value 1237 * is specified as 1-offset, so the first line is line 1. 1238 */ getColumn()1239 public int getColumn() { 1240 return column; 1241 } 1242 } 1243 1244 /** Thrown when encountering an unknown field while parsing a text format message. */ 1245 public static class UnknownFieldParseException extends ParseException { 1246 private final String unknownField; 1247 1248 /** 1249 * Create a new instance, with -1 as the line and column numbers, and an empty unknown field 1250 * name. 1251 */ UnknownFieldParseException(final String message)1252 public UnknownFieldParseException(final String message) { 1253 this(-1, -1, "", message); 1254 } 1255 1256 /** 1257 * Create a new instance 1258 * 1259 * @param line the line number where the parse error occurred, using 1-offset. 1260 * @param column the column number where the parser error occurred, using 1-offset. 1261 * @param unknownField the name of the unknown field found while parsing. 1262 */ UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1263 public UnknownFieldParseException( 1264 final int line, final int column, final String unknownField, final String message) { 1265 super(line, column, message); 1266 this.unknownField = unknownField; 1267 } 1268 1269 /** 1270 * Return the name of the unknown field encountered while parsing the protocol buffer string. 1271 */ getUnknownField()1272 public String getUnknownField() { 1273 return unknownField; 1274 } 1275 } 1276 1277 private static final Parser PARSER = Parser.newBuilder().build(); 1278 1279 /** 1280 * Return a {@link Parser} instance which can parse text-format messages. The returned instance is 1281 * thread-safe. 1282 */ getParser()1283 public static Parser getParser() { 1284 return PARSER; 1285 } 1286 1287 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final Readable input, final Message.Builder builder)1288 public static void merge(final Readable input, final Message.Builder builder) throws IOException { 1289 PARSER.merge(input, builder); 1290 } 1291 1292 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final CharSequence input, final Message.Builder builder)1293 public static void merge(final CharSequence input, final Message.Builder builder) 1294 throws ParseException { 1295 PARSER.merge(input, builder); 1296 } 1297 1298 /** 1299 * Parse a text-format message from {@code input}. 1300 * 1301 * @return the parsed message, guaranteed initialized 1302 */ parse(final CharSequence input, final Class<T> protoClass)1303 public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass) 1304 throws ParseException { 1305 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1306 merge(input, builder); 1307 @SuppressWarnings("unchecked") 1308 T output = (T) builder.build(); 1309 return output; 1310 } 1311 1312 /** 1313 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1314 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1315 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1316 public static void merge( 1317 final Readable input, 1318 final ExtensionRegistry extensionRegistry, 1319 final Message.Builder builder) 1320 throws IOException { 1321 PARSER.merge(input, extensionRegistry, builder); 1322 } 1323 1324 1325 /** 1326 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1327 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1328 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1329 public static void merge( 1330 final CharSequence input, 1331 final ExtensionRegistry extensionRegistry, 1332 final Message.Builder builder) 1333 throws ParseException { 1334 PARSER.merge(input, extensionRegistry, builder); 1335 } 1336 1337 /** 1338 * Parse a text-format message from {@code input}. Extensions will be recognized if they are 1339 * registered in {@code extensionRegistry}. 1340 * 1341 * @return the parsed message, guaranteed initialized 1342 */ parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1343 public static <T extends Message> T parse( 1344 final CharSequence input, 1345 final ExtensionRegistry extensionRegistry, 1346 final Class<T> protoClass) 1347 throws ParseException { 1348 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1349 merge(input, extensionRegistry, builder); 1350 @SuppressWarnings("unchecked") 1351 T output = (T) builder.build(); 1352 return output; 1353 } 1354 1355 1356 /** 1357 * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely 1358 * follows google/protobuf/text_format.cc. 1359 * 1360 * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to 1361 * control the parser behavior. 1362 */ 1363 public static class Parser { 1364 /** 1365 * Determines if repeated values for non-repeated fields and oneofs are permitted. For example, 1366 * given required/optional field "foo" and a oneof containing "baz" and "qux": 1367 * 1368 * <ul> 1369 * <li>"foo: 1 foo: 2" 1370 * <li>"baz: 1 qux: 2" 1371 * <li>merging "foo: 2" into a proto in which foo is already set, or 1372 * <li>merging "qux: 2" into a proto in which baz is already set. 1373 * </ul> 1374 */ 1375 public enum SingularOverwritePolicy { 1376 /** 1377 * Later values are merged with earlier values. For primitive fields or conflicting oneofs, 1378 * the last value is retained. 1379 */ 1380 ALLOW_SINGULAR_OVERWRITES, 1381 /** An error is issued. */ 1382 FORBID_SINGULAR_OVERWRITES 1383 } 1384 1385 private final boolean allowUnknownFields; 1386 private final boolean allowUnknownEnumValues; 1387 private final boolean allowUnknownExtensions; 1388 private final SingularOverwritePolicy singularOverwritePolicy; 1389 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder; 1390 Parser( boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1391 private Parser( 1392 boolean allowUnknownFields, 1393 boolean allowUnknownEnumValues, 1394 boolean allowUnknownExtensions, 1395 SingularOverwritePolicy singularOverwritePolicy, 1396 TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1397 this.allowUnknownFields = allowUnknownFields; 1398 this.allowUnknownEnumValues = allowUnknownEnumValues; 1399 this.allowUnknownExtensions = allowUnknownExtensions; 1400 this.singularOverwritePolicy = singularOverwritePolicy; 1401 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1402 } 1403 1404 /** Returns a new instance of {@link Builder}. */ newBuilder()1405 public static Builder newBuilder() { 1406 return new Builder(); 1407 } 1408 1409 /** Builder that can be used to obtain new instances of {@link Parser}. */ 1410 public static class Builder { 1411 private boolean allowUnknownFields = false; 1412 private boolean allowUnknownEnumValues = false; 1413 private boolean allowUnknownExtensions = false; 1414 private SingularOverwritePolicy singularOverwritePolicy = 1415 SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; 1416 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null; 1417 1418 /** 1419 * Set whether this parser will allow unknown fields. By default, an exception is thrown if an 1420 * unknown field is encountered. If this is set, the parser will only log a warning. Allow 1421 * unknown fields will also allow unknown extensions. 1422 * 1423 * <p>Use of this parameter is discouraged which may hide some errors (e.g. 1424 * spelling error on field name). 1425 */ setAllowUnknownFields(boolean allowUnknownFields)1426 public Builder setAllowUnknownFields(boolean allowUnknownFields) { 1427 this.allowUnknownFields = allowUnknownFields; 1428 return this; 1429 } 1430 1431 /** 1432 * Set whether this parser will allow unknown extensions. By default, an 1433 * exception is thrown if unknown extension is encountered. If this is set true, 1434 * the parser will only log a warning. Allow unknown extensions does not mean 1435 * allow normal unknown fields. 1436 */ setAllowUnknownExtensions(boolean allowUnknownExtensions)1437 public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) { 1438 this.allowUnknownExtensions = allowUnknownExtensions; 1439 return this; 1440 } 1441 1442 /** Sets parser behavior when a non-repeated field appears more than once. */ setSingularOverwritePolicy(SingularOverwritePolicy p)1443 public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { 1444 this.singularOverwritePolicy = p; 1445 return this; 1446 } 1447 setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1448 public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1449 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1450 return this; 1451 } 1452 build()1453 public Parser build() { 1454 return new Parser( 1455 allowUnknownFields, 1456 allowUnknownEnumValues, 1457 allowUnknownExtensions, 1458 singularOverwritePolicy, 1459 parseInfoTreeBuilder); 1460 } 1461 } 1462 1463 /** 1464 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1465 */ merge(final Readable input, final Message.Builder builder)1466 public void merge(final Readable input, final Message.Builder builder) throws IOException { 1467 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1468 } 1469 1470 /** 1471 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1472 */ merge(final CharSequence input, final Message.Builder builder)1473 public void merge(final CharSequence input, final Message.Builder builder) 1474 throws ParseException { 1475 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1476 } 1477 1478 /** 1479 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1480 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1481 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1482 public void merge( 1483 final Readable input, 1484 final ExtensionRegistry extensionRegistry, 1485 final Message.Builder builder) 1486 throws IOException { 1487 // Read the entire input to a String then parse that. 1488 1489 // If StreamTokenizer were not quite so crippled, or if there were a kind 1490 // of Reader that could read in chunks that match some particular regex, 1491 // or if we wanted to write a custom Reader to tokenize our stream, then 1492 // we would not have to read to one big String. Alas, none of these is 1493 // the case. Oh well. 1494 1495 merge(toStringBuilder(input), extensionRegistry, builder); 1496 } 1497 1498 1499 private static final int BUFFER_SIZE = 4096; 1500 1501 // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer) 1502 // overhead is worthwhile toStringBuilder(final Readable input)1503 private static StringBuilder toStringBuilder(final Readable input) throws IOException { 1504 final StringBuilder text = new StringBuilder(); 1505 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1506 while (true) { 1507 final int n = input.read(buffer); 1508 if (n == -1) { 1509 break; 1510 } 1511 buffer.flip(); 1512 text.append(buffer, 0, n); 1513 } 1514 return text; 1515 } 1516 1517 static final class UnknownField { 1518 static enum Type { 1519 FIELD, EXTENSION; 1520 } 1521 1522 final String message; 1523 final Type type; 1524 UnknownField(String message, Type type)1525 UnknownField(String message, Type type) { 1526 this.message = message; 1527 this.type = type; 1528 } 1529 } 1530 1531 // Check both unknown fields and unknown extensions and log warning messages 1532 // or throw exceptions according to the flag. checkUnknownFields(final List<UnknownField> unknownFields)1533 private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException { 1534 if (unknownFields.isEmpty()) { 1535 return; 1536 } 1537 1538 StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:"); 1539 for (UnknownField field : unknownFields) { 1540 msg.append('\n').append(field.message); 1541 } 1542 1543 if (allowUnknownFields) { 1544 logger.warning(msg.toString()); 1545 return; 1546 } 1547 1548 int firstErrorIndex = 0; 1549 if (allowUnknownExtensions) { 1550 boolean allUnknownExtensions = true; 1551 for (UnknownField field : unknownFields) { 1552 if (field.type == UnknownField.Type.FIELD) { 1553 allUnknownExtensions = false; 1554 break; 1555 } 1556 ++firstErrorIndex; 1557 } 1558 if (allUnknownExtensions) { 1559 logger.warning(msg.toString()); 1560 return; 1561 } 1562 } 1563 1564 String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":"); 1565 throw new ParseException( 1566 Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString()); 1567 } 1568 1569 /** 1570 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1571 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1572 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1573 public void merge( 1574 final CharSequence input, 1575 final ExtensionRegistry extensionRegistry, 1576 final Message.Builder builder) 1577 throws ParseException { 1578 final Tokenizer tokenizer = new Tokenizer(input); 1579 MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder); 1580 1581 List<UnknownField> unknownFields = new ArrayList<UnknownField>(); 1582 1583 while (!tokenizer.atEnd()) { 1584 mergeField(tokenizer, extensionRegistry, target, unknownFields); 1585 } 1586 1587 checkUnknownFields(unknownFields); 1588 } 1589 1590 1591 /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields)1592 private void mergeField( 1593 final Tokenizer tokenizer, 1594 final ExtensionRegistry extensionRegistry, 1595 final MessageReflection.MergeTarget target, 1596 List<UnknownField> unknownFields) 1597 throws ParseException { 1598 mergeField( 1599 tokenizer, 1600 extensionRegistry, 1601 target, 1602 parseInfoTreeBuilder, 1603 unknownFields); 1604 } 1605 1606 /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1607 private void mergeField( 1608 final Tokenizer tokenizer, 1609 final ExtensionRegistry extensionRegistry, 1610 final MessageReflection.MergeTarget target, 1611 TextFormatParseInfoTree.Builder parseTreeBuilder, 1612 List<UnknownField> unknownFields) 1613 throws ParseException { 1614 FieldDescriptor field = null; 1615 int startLine = tokenizer.getLine(); 1616 int startColumn = tokenizer.getColumn(); 1617 final Descriptor type = target.getDescriptorForType(); 1618 ExtensionRegistry.ExtensionInfo extension = null; 1619 1620 if (tokenizer.tryConsume("[")) { 1621 // An extension. 1622 final StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier()); 1623 while (tokenizer.tryConsume(".")) { 1624 name.append('.'); 1625 name.append(tokenizer.consumeIdentifier()); 1626 } 1627 1628 extension = target.findExtensionByName(extensionRegistry, name.toString()); 1629 1630 if (extension == null) { 1631 String message = 1632 (tokenizer.getPreviousLine() + 1) 1633 + ":" 1634 + (tokenizer.getPreviousColumn() + 1) 1635 + ":\t" 1636 + type.getFullName() 1637 + ".[" 1638 + name 1639 + "]"; 1640 unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION)); 1641 } else { 1642 if (extension.descriptor.getContainingType() != type) { 1643 throw tokenizer.parseExceptionPreviousToken( 1644 "Extension \"" 1645 + name 1646 + "\" does not extend message type \"" 1647 + type.getFullName() 1648 + "\"."); 1649 } 1650 field = extension.descriptor; 1651 } 1652 1653 tokenizer.consume("]"); 1654 } else { 1655 final String name = tokenizer.consumeIdentifier(); 1656 field = type.findFieldByName(name); 1657 1658 // Group names are expected to be capitalized as they appear in the 1659 // .proto file, which actually matches their type names, not their field 1660 // names. 1661 if (field == null) { 1662 // Explicitly specify US locale so that this code does not break when 1663 // executing in Turkey. 1664 final String lowerName = name.toLowerCase(Locale.US); 1665 field = type.findFieldByName(lowerName); 1666 // If the case-insensitive match worked but the field is NOT a group, 1667 if (field != null && field.getType() != FieldDescriptor.Type.GROUP) { 1668 field = null; 1669 } 1670 } 1671 // Again, special-case group names as described above. 1672 if (field != null 1673 && field.getType() == FieldDescriptor.Type.GROUP 1674 && !field.getMessageType().getName().equals(name)) { 1675 field = null; 1676 } 1677 1678 if (field == null) { 1679 String message = (tokenizer.getPreviousLine() + 1) 1680 + ":" 1681 + (tokenizer.getPreviousColumn() + 1) 1682 + ":\t" 1683 + type.getFullName() 1684 + "." 1685 + name; 1686 unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD)); 1687 } 1688 } 1689 1690 // Skips unknown fields. 1691 if (field == null) { 1692 // Try to guess the type of this field. 1693 // If this field is not a message, there should be a ":" between the 1694 // field name and the field value and also the field value should not 1695 // start with "{" or "<" which indicates the beginning of a message body. 1696 // If there is no ":" or there is a "{" or "<" after ":", this field has 1697 // to be a message or the input is ill-formed. 1698 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) { 1699 skipFieldValue(tokenizer); 1700 } else { 1701 skipFieldMessage(tokenizer); 1702 } 1703 return; 1704 } 1705 1706 // Handle potential ':'. 1707 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1708 tokenizer.tryConsume(":"); // optional 1709 if (parseTreeBuilder != null) { 1710 TextFormatParseInfoTree.Builder childParseTreeBuilder = 1711 parseTreeBuilder.getBuilderForSubMessageField(field); 1712 consumeFieldValues( 1713 tokenizer, 1714 extensionRegistry, 1715 target, 1716 field, 1717 extension, 1718 childParseTreeBuilder, 1719 unknownFields); 1720 } else { 1721 consumeFieldValues( 1722 tokenizer, 1723 extensionRegistry, 1724 target, 1725 field, 1726 extension, 1727 parseTreeBuilder, 1728 unknownFields); 1729 } 1730 } else { 1731 tokenizer.consume(":"); // required 1732 consumeFieldValues( 1733 tokenizer, 1734 extensionRegistry, 1735 target, 1736 field, 1737 extension, 1738 parseTreeBuilder, 1739 unknownFields); 1740 } 1741 1742 if (parseTreeBuilder != null) { 1743 parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn)); 1744 } 1745 1746 // For historical reasons, fields may optionally be separated by commas or 1747 // semicolons. 1748 if (!tokenizer.tryConsume(";")) { 1749 tokenizer.tryConsume(","); 1750 } 1751 } 1752 1753 /** 1754 * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}. 1755 */ consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1756 private void consumeFieldValues( 1757 final Tokenizer tokenizer, 1758 final ExtensionRegistry extensionRegistry, 1759 final MessageReflection.MergeTarget target, 1760 final FieldDescriptor field, 1761 final ExtensionRegistry.ExtensionInfo extension, 1762 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1763 List<UnknownField> unknownFields) 1764 throws ParseException { 1765 // Support specifying repeated field values as a comma-separated list. 1766 // Ex."foo: [1, 2, 3]" 1767 if (field.isRepeated() && tokenizer.tryConsume("[")) { 1768 if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty. 1769 while (true) { 1770 consumeFieldValue( 1771 tokenizer, 1772 extensionRegistry, 1773 target, 1774 field, 1775 extension, 1776 parseTreeBuilder, 1777 unknownFields); 1778 if (tokenizer.tryConsume("]")) { 1779 // End of list. 1780 break; 1781 } 1782 tokenizer.consume(","); 1783 } 1784 } 1785 } else { 1786 consumeFieldValue( 1787 tokenizer, 1788 extensionRegistry, 1789 target, 1790 field, 1791 extension, 1792 parseTreeBuilder, 1793 unknownFields); 1794 } 1795 } 1796 1797 /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */ consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1798 private void consumeFieldValue( 1799 final Tokenizer tokenizer, 1800 final ExtensionRegistry extensionRegistry, 1801 final MessageReflection.MergeTarget target, 1802 final FieldDescriptor field, 1803 final ExtensionRegistry.ExtensionInfo extension, 1804 final TextFormatParseInfoTree.Builder parseTreeBuilder, 1805 List<UnknownField> unknownFields) 1806 throws ParseException { 1807 if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES 1808 && !field.isRepeated()) { 1809 if (target.hasField(field)) { 1810 throw tokenizer.parseExceptionPreviousToken( 1811 "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten."); 1812 } else if (field.getContainingOneof() != null 1813 && target.hasOneof(field.getContainingOneof())) { 1814 Descriptors.OneofDescriptor oneof = field.getContainingOneof(); 1815 throw tokenizer.parseExceptionPreviousToken( 1816 "Field \"" 1817 + field.getFullName() 1818 + "\" is specified along with field \"" 1819 + target.getOneofFieldDescriptor(oneof).getFullName() 1820 + "\", another member of oneof \"" 1821 + oneof.getName() 1822 + "\"."); 1823 } 1824 } 1825 1826 Object value = null; 1827 1828 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1829 final String endToken; 1830 if (tokenizer.tryConsume("<")) { 1831 endToken = ">"; 1832 } else { 1833 tokenizer.consume("{"); 1834 endToken = "}"; 1835 } 1836 1837 Message defaultInstance = (extension == null) ? null : extension.defaultInstance; 1838 MessageReflection.MergeTarget subField = 1839 target.newMergeTargetForField(field, defaultInstance); 1840 1841 while (!tokenizer.tryConsume(endToken)) { 1842 if (tokenizer.atEnd()) { 1843 throw tokenizer.parseException("Expected \"" + endToken + "\"."); 1844 } 1845 mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, unknownFields); 1846 } 1847 1848 value = subField.finish(); 1849 } else { 1850 switch (field.getType()) { 1851 case INT32: 1852 case SINT32: 1853 case SFIXED32: 1854 value = tokenizer.consumeInt32(); 1855 break; 1856 1857 case INT64: 1858 case SINT64: 1859 case SFIXED64: 1860 value = tokenizer.consumeInt64(); 1861 break; 1862 1863 case UINT32: 1864 case FIXED32: 1865 value = tokenizer.consumeUInt32(); 1866 break; 1867 1868 case UINT64: 1869 case FIXED64: 1870 value = tokenizer.consumeUInt64(); 1871 break; 1872 1873 case FLOAT: 1874 value = tokenizer.consumeFloat(); 1875 break; 1876 1877 case DOUBLE: 1878 value = tokenizer.consumeDouble(); 1879 break; 1880 1881 case BOOL: 1882 value = tokenizer.consumeBoolean(); 1883 break; 1884 1885 case STRING: 1886 value = tokenizer.consumeString(); 1887 break; 1888 1889 case BYTES: 1890 value = tokenizer.consumeByteString(); 1891 break; 1892 1893 case ENUM: 1894 final EnumDescriptor enumType = field.getEnumType(); 1895 1896 if (tokenizer.lookingAtInteger()) { 1897 final int number = tokenizer.consumeInt32(); 1898 value = enumType.findValueByNumber(number); 1899 if (value == null) { 1900 String unknownValueMsg = 1901 "Enum type \"" 1902 + enumType.getFullName() 1903 + "\" has no value with number " 1904 + number 1905 + '.'; 1906 if (allowUnknownEnumValues) { 1907 logger.warning(unknownValueMsg); 1908 return; 1909 } else { 1910 throw tokenizer.parseExceptionPreviousToken( 1911 "Enum type \"" 1912 + enumType.getFullName() 1913 + "\" has no value with number " 1914 + number 1915 + '.'); 1916 } 1917 } 1918 } else { 1919 final String id = tokenizer.consumeIdentifier(); 1920 value = enumType.findValueByName(id); 1921 if (value == null) { 1922 String unknownValueMsg = 1923 "Enum type \"" 1924 + enumType.getFullName() 1925 + "\" has no value named \"" 1926 + id 1927 + "\"."; 1928 if (allowUnknownEnumValues) { 1929 logger.warning(unknownValueMsg); 1930 return; 1931 } else { 1932 throw tokenizer.parseExceptionPreviousToken(unknownValueMsg); 1933 } 1934 } 1935 } 1936 1937 break; 1938 1939 case MESSAGE: 1940 case GROUP: 1941 throw new RuntimeException("Can't get here."); 1942 } 1943 } 1944 1945 if (field.isRepeated()) { 1946 // TODO(b/29122459): If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode, 1947 // check for duplicate map keys here. 1948 target.addRepeatedField(field, value); 1949 } else { 1950 target.setField(field, value); 1951 } 1952 } 1953 1954 1955 /** Skips the next field including the field's name and value. */ skipField(Tokenizer tokenizer)1956 private void skipField(Tokenizer tokenizer) throws ParseException { 1957 if (tokenizer.tryConsume("[")) { 1958 // Extension name. 1959 do { 1960 tokenizer.consumeIdentifier(); 1961 } while (tokenizer.tryConsume(".")); 1962 tokenizer.consume("]"); 1963 } else { 1964 tokenizer.consumeIdentifier(); 1965 } 1966 1967 // Try to guess the type of this field. 1968 // If this field is not a message, there should be a ":" between the 1969 // field name and the field value and also the field value should not 1970 // start with "{" or "<" which indicates the beginning of a message body. 1971 // If there is no ":" or there is a "{" or "<" after ":", this field has 1972 // to be a message or the input is ill-formed. 1973 if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) { 1974 skipFieldValue(tokenizer); 1975 } else { 1976 skipFieldMessage(tokenizer); 1977 } 1978 // For historical reasons, fields may optionally be separated by commas or 1979 // semicolons. 1980 if (!tokenizer.tryConsume(";")) { 1981 tokenizer.tryConsume(","); 1982 } 1983 } 1984 1985 /** 1986 * Skips the whole body of a message including the beginning delimiter and the ending delimiter. 1987 */ skipFieldMessage(Tokenizer tokenizer)1988 private void skipFieldMessage(Tokenizer tokenizer) throws ParseException { 1989 final String delimiter; 1990 if (tokenizer.tryConsume("<")) { 1991 delimiter = ">"; 1992 } else { 1993 tokenizer.consume("{"); 1994 delimiter = "}"; 1995 } 1996 while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { 1997 skipField(tokenizer); 1998 } 1999 tokenizer.consume(delimiter); 2000 } 2001 2002 /** Skips a field value. */ skipFieldValue(Tokenizer tokenizer)2003 private void skipFieldValue(Tokenizer tokenizer) throws ParseException { 2004 if (tokenizer.tryConsumeString()) { 2005 while (tokenizer.tryConsumeString()) {} 2006 return; 2007 } 2008 if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean 2009 && !tokenizer.tryConsumeInt64() // includes int32 2010 && !tokenizer.tryConsumeUInt64() // includes uint32 2011 && !tokenizer.tryConsumeDouble() 2012 && !tokenizer.tryConsumeFloat()) { 2013 throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken); 2014 } 2015 } 2016 } 2017 2018 // ================================================================= 2019 // Utility functions 2020 // 2021 // Some of these methods are package-private because Descriptors.java uses 2022 // them. 2023 2024 /** 2025 * Escapes bytes in the format used in protocol buffer text format, which is the same as the 2026 * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are 2027 * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which 2028 * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences. 2029 */ escapeBytes(ByteString input)2030 public static String escapeBytes(ByteString input) { 2031 return TextFormatEscaper.escapeBytes(input); 2032 } 2033 2034 /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */ escapeBytes(byte[] input)2035 public static String escapeBytes(byte[] input) { 2036 return TextFormatEscaper.escapeBytes(input); 2037 } 2038 2039 /** 2040 * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex 2041 * escapes (starting with "\x") are also recognized. 2042 */ unescapeBytes(final CharSequence charString)2043 public static ByteString unescapeBytes(final CharSequence charString) 2044 throws InvalidEscapeSequenceException { 2045 // First convert the Java character sequence to UTF-8 bytes. 2046 ByteString input = ByteString.copyFromUtf8(charString.toString()); 2047 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 2048 // escapes can all be expressed with ASCII characters, so it is safe to 2049 // operate on bytes here. 2050 // 2051 // Unescaping the input byte array will result in a byte sequence that's no 2052 // longer than the input. That's because each escape sequence is between 2053 // two and four bytes long and stands for a single byte. 2054 final byte[] result = new byte[input.size()]; 2055 int pos = 0; 2056 for (int i = 0; i < input.size(); i++) { 2057 byte c = input.byteAt(i); 2058 if (c == '\\') { 2059 if (i + 1 < input.size()) { 2060 ++i; 2061 c = input.byteAt(i); 2062 if (isOctal(c)) { 2063 // Octal escape. 2064 int code = digitValue(c); 2065 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2066 ++i; 2067 code = code * 8 + digitValue(input.byteAt(i)); 2068 } 2069 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2070 ++i; 2071 code = code * 8 + digitValue(input.byteAt(i)); 2072 } 2073 // TODO: Check that 0 <= code && code <= 0xFF. 2074 result[pos++] = (byte) code; 2075 } else { 2076 switch (c) { 2077 case 'a': 2078 result[pos++] = 0x07; 2079 break; 2080 case 'b': 2081 result[pos++] = '\b'; 2082 break; 2083 case 'f': 2084 result[pos++] = '\f'; 2085 break; 2086 case 'n': 2087 result[pos++] = '\n'; 2088 break; 2089 case 'r': 2090 result[pos++] = '\r'; 2091 break; 2092 case 't': 2093 result[pos++] = '\t'; 2094 break; 2095 case 'v': 2096 result[pos++] = 0x0b; 2097 break; 2098 case '\\': 2099 result[pos++] = '\\'; 2100 break; 2101 case '\'': 2102 result[pos++] = '\''; 2103 break; 2104 case '"': 2105 result[pos++] = '\"'; 2106 break; 2107 2108 case 'x': 2109 // hex escape 2110 int code = 0; 2111 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2112 ++i; 2113 code = digitValue(input.byteAt(i)); 2114 } else { 2115 throw new InvalidEscapeSequenceException( 2116 "Invalid escape sequence: '\\x' with no digits"); 2117 } 2118 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2119 ++i; 2120 code = code * 16 + digitValue(input.byteAt(i)); 2121 } 2122 result[pos++] = (byte) code; 2123 break; 2124 2125 default: 2126 throw new InvalidEscapeSequenceException( 2127 "Invalid escape sequence: '\\" + (char) c + '\''); 2128 } 2129 } 2130 } else { 2131 throw new InvalidEscapeSequenceException( 2132 "Invalid escape sequence: '\\' at end of string."); 2133 } 2134 } else { 2135 result[pos++] = c; 2136 } 2137 } 2138 2139 return result.length == pos 2140 ? ByteString.wrap(result) // This reference has not been out of our control. 2141 : ByteString.copyFrom(result, 0, pos); 2142 } 2143 2144 /** 2145 * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid 2146 * escape sequence is seen. 2147 */ 2148 public static class InvalidEscapeSequenceException extends IOException { 2149 private static final long serialVersionUID = -8164033650142593304L; 2150 InvalidEscapeSequenceException(final String description)2151 InvalidEscapeSequenceException(final String description) { 2152 super(description); 2153 } 2154 } 2155 2156 /** 2157 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are 2158 * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes, 2159 * it's weird. 2160 */ escapeText(final String input)2161 static String escapeText(final String input) { 2162 return escapeBytes(ByteString.copyFromUtf8(input)); 2163 } 2164 2165 /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */ escapeDoubleQuotesAndBackslashes(final String input)2166 public static String escapeDoubleQuotesAndBackslashes(final String input) { 2167 return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input); 2168 } 2169 2170 /** 2171 * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes 2172 * (starting with "\x") are also recognized. 2173 */ unescapeText(final String input)2174 static String unescapeText(final String input) throws InvalidEscapeSequenceException { 2175 return unescapeBytes(input).toStringUtf8(); 2176 } 2177 2178 /** Is this an octal digit? */ isOctal(final byte c)2179 private static boolean isOctal(final byte c) { 2180 return '0' <= c && c <= '7'; 2181 } 2182 2183 /** Is this a hex digit? */ isHex(final byte c)2184 private static boolean isHex(final byte c) { 2185 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 2186 } 2187 2188 /** 2189 * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is 2190 * like {@code Character.digit()} but we don't accept non-ASCII digits. 2191 */ digitValue(final byte c)2192 private static int digitValue(final byte c) { 2193 if ('0' <= c && c <= '9') { 2194 return c - '0'; 2195 } else if ('a' <= c && c <= 'z') { 2196 return c - 'a' + 10; 2197 } else { 2198 return c - 'A' + 10; 2199 } 2200 } 2201 2202 /** 2203 * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code 2204 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2205 * and octal numbers, respectively. 2206 */ parseInt32(final String text)2207 static int parseInt32(final String text) throws NumberFormatException { 2208 return (int) parseInteger(text, true, false); 2209 } 2210 2211 /** 2212 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code 2213 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2214 * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned 2215 * since Java has no unsigned integer type. 2216 */ parseUInt32(final String text)2217 static int parseUInt32(final String text) throws NumberFormatException { 2218 return (int) parseInteger(text, false, false); 2219 } 2220 2221 /** 2222 * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code 2223 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2224 * and octal numbers, respectively. 2225 */ parseInt64(final String text)2226 static long parseInt64(final String text) throws NumberFormatException { 2227 return parseInteger(text, true, true); 2228 } 2229 2230 /** 2231 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code 2232 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2233 * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned 2234 * since Java has no unsigned long type. 2235 */ parseUInt64(final String text)2236 static long parseUInt64(final String text) throws NumberFormatException { 2237 return parseInteger(text, false, true); 2238 } 2239 parseInteger(final String text, final boolean isSigned, final boolean isLong)2240 private static long parseInteger(final String text, final boolean isSigned, final boolean isLong) 2241 throws NumberFormatException { 2242 int pos = 0; 2243 2244 boolean negative = false; 2245 if (text.startsWith("-", pos)) { 2246 if (!isSigned) { 2247 throw new NumberFormatException("Number must be positive: " + text); 2248 } 2249 ++pos; 2250 negative = true; 2251 } 2252 2253 int radix = 10; 2254 if (text.startsWith("0x", pos)) { 2255 pos += 2; 2256 radix = 16; 2257 } else if (text.startsWith("0", pos)) { 2258 radix = 8; 2259 } 2260 2261 final String numberText = text.substring(pos); 2262 2263 long result = 0; 2264 if (numberText.length() < 16) { 2265 // Can safely assume no overflow. 2266 result = Long.parseLong(numberText, radix); 2267 if (negative) { 2268 result = -result; 2269 } 2270 2271 // Check bounds. 2272 // No need to check for 64-bit numbers since they'd have to be 16 chars 2273 // or longer to overflow. 2274 if (!isLong) { 2275 if (isSigned) { 2276 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 2277 throw new NumberFormatException( 2278 "Number out of range for 32-bit signed integer: " + text); 2279 } 2280 } else { 2281 if (result >= (1L << 32) || result < 0) { 2282 throw new NumberFormatException( 2283 "Number out of range for 32-bit unsigned integer: " + text); 2284 } 2285 } 2286 } 2287 } else { 2288 BigInteger bigValue = new BigInteger(numberText, radix); 2289 if (negative) { 2290 bigValue = bigValue.negate(); 2291 } 2292 2293 // Check bounds. 2294 if (!isLong) { 2295 if (isSigned) { 2296 if (bigValue.bitLength() > 31) { 2297 throw new NumberFormatException( 2298 "Number out of range for 32-bit signed integer: " + text); 2299 } 2300 } else { 2301 if (bigValue.bitLength() > 32) { 2302 throw new NumberFormatException( 2303 "Number out of range for 32-bit unsigned integer: " + text); 2304 } 2305 } 2306 } else { 2307 if (isSigned) { 2308 if (bigValue.bitLength() > 63) { 2309 throw new NumberFormatException( 2310 "Number out of range for 64-bit signed integer: " + text); 2311 } 2312 } else { 2313 if (bigValue.bitLength() > 64) { 2314 throw new NumberFormatException( 2315 "Number out of range for 64-bit unsigned integer: " + text); 2316 } 2317 } 2318 } 2319 2320 result = bigValue.longValue(); 2321 } 2322 2323 return result; 2324 } 2325 } 2326