1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 package com.google.protobuf; 9 10 import com.google.protobuf.Descriptors.Descriptor; 11 import com.google.protobuf.Descriptors.EnumDescriptor; 12 import com.google.protobuf.Descriptors.EnumValueDescriptor; 13 import com.google.protobuf.Descriptors.FieldDescriptor; 14 import com.google.protobuf.MessageReflection.MergeTarget; 15 import java.io.IOException; 16 import java.math.BigInteger; 17 import java.nio.CharBuffer; 18 import java.util.ArrayList; 19 import java.util.Collections; 20 import java.util.HashMap; 21 import java.util.HashSet; 22 import java.util.List; 23 import java.util.Locale; 24 import java.util.Map; 25 import java.util.Set; 26 import java.util.logging.Logger; 27 import java.util.regex.Matcher; 28 import java.util.regex.Pattern; 29 30 /** 31 * Provide text parsing and formatting support for proto2 instances. The implementation largely 32 * follows text_format.cc. 33 * 34 * @author wenboz@google.com Wenbo Zhu 35 * @author kenton@google.com Kenton Varda 36 */ 37 public final class TextFormat { TextFormat()38 private TextFormat() {} 39 40 private static final Logger logger = Logger.getLogger(TextFormat.class.getName()); 41 42 private static final String DEBUG_STRING_SILENT_MARKER = "\t "; 43 44 private static final String REDACTED_MARKER = "[REDACTED]"; 45 46 /** 47 * Generates a human readable form of this message, useful for debugging and other purposes, with 48 * no newline characters. This is just a trivial wrapper around {@link 49 * TextFormat.Printer#shortDebugString(MessageOrBuilder)}. 50 * 51 * @deprecated Use {@code printer().emittingSingleLine(true).printToString(MessageOrBuilder)} 52 */ 53 @Deprecated shortDebugString(final MessageOrBuilder message)54 public static String shortDebugString(final MessageOrBuilder message) { 55 return printer() 56 .emittingSingleLine(true) 57 .printToString(message, Printer.FieldReporterLevel.SHORT_DEBUG_STRING); 58 } 59 60 /** 61 * Outputs a textual representation of the value of an unknown field. 62 * 63 * @param tag the field's tag number 64 * @param value the value of the field 65 * @param output the output to which to append the formatted value 66 * @throws ClassCastException if the value is not appropriate for the given field descriptor 67 * @throws IOException if there is an exception writing to the output 68 */ printUnknownFieldValue( final int tag, final Object value, final Appendable output)69 public static void printUnknownFieldValue( 70 final int tag, final Object value, final Appendable output) throws IOException { 71 printUnknownFieldValue(tag, value, setSingleLineOutput(output, false), false); 72 } 73 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator, boolean redact)74 private static void printUnknownFieldValue( 75 final int tag, final Object value, final TextGenerator generator, boolean redact) 76 throws IOException { 77 switch (WireFormat.getTagWireType(tag)) { 78 case WireFormat.WIRETYPE_VARINT: 79 generator.print(unsignedToString((Long) value)); 80 break; 81 case WireFormat.WIRETYPE_FIXED32: 82 generator.print(String.format((Locale) null, "0x%08x", (Integer) value)); 83 break; 84 case WireFormat.WIRETYPE_FIXED64: 85 generator.print(String.format((Locale) null, "0x%016x", (Long) value)); 86 break; 87 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 88 try { 89 // Try to parse and print the field as an embedded message 90 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 91 generator.print("{"); 92 generator.eol(); 93 generator.indent(); 94 Printer.printUnknownFields(message, generator, redact); 95 generator.outdent(); 96 generator.print("}"); 97 } catch (InvalidProtocolBufferException e) { 98 // If not parseable as a message, print as a String 99 generator.print("\""); 100 generator.print(escapeBytes((ByteString) value)); 101 generator.print("\""); 102 } 103 break; 104 case WireFormat.WIRETYPE_START_GROUP: 105 Printer.printUnknownFields((UnknownFieldSet) value, generator, redact); 106 break; 107 default: 108 throw new IllegalArgumentException("Bad tag: " + tag); 109 } 110 } 111 112 /** Printer instance which escapes non-ASCII characters. */ printer()113 public static Printer printer() { 114 return Printer.DEFAULT_TEXT_FORMAT; 115 } 116 117 /** Printer instance which escapes non-ASCII characters and prints in the debug format. */ debugFormatPrinter()118 public static Printer debugFormatPrinter() { 119 return Printer.DEFAULT_DEBUG_FORMAT; 120 } 121 122 /** Helper class for converting protobufs to text. */ 123 public static final class Printer { 124 125 // Printer instance which escapes non-ASCII characters and prints in the text format. 126 private static final Printer DEFAULT_TEXT_FORMAT = 127 new Printer( 128 true, 129 TypeRegistry.getEmptyTypeRegistry(), 130 ExtensionRegistryLite.getEmptyRegistry(), 131 false, 132 false); 133 134 // Printer instance which escapes non-ASCII characters and prints in the debug format. 135 private static final Printer DEFAULT_DEBUG_FORMAT = 136 new Printer( 137 true, 138 TypeRegistry.getEmptyTypeRegistry(), 139 ExtensionRegistryLite.getEmptyRegistry(), 140 true, 141 false); 142 143 /** 144 * A list of the public APIs that output human-readable text from a message. A higher-level API 145 * must be larger than any lower-level APIs it calls under the hood, e.g 146 * DEBUG_MULTILINE.compareTo(PRINTER_PRINT_TO_STRING) > 0. The inverse is not necessarily true. 147 */ 148 static enum FieldReporterLevel { 149 NO_REPORT(0), 150 PRINT(1), 151 PRINTER_PRINT_TO_STRING(2), 152 TEXTFORMAT_PRINT_TO_STRING(3), 153 PRINT_UNICODE(4), 154 SHORT_DEBUG_STRING(5), 155 LEGACY_MULTILINE(6), 156 LEGACY_SINGLE_LINE(7), 157 DEBUG_MULTILINE(8), 158 DEBUG_SINGLE_LINE(9), 159 ABSTRACT_TO_STRING(10), 160 ABSTRACT_MUTABLE_TO_STRING(11); 161 private final int index; 162 FieldReporterLevel(int index)163 FieldReporterLevel(int index) { 164 this.index = index; 165 } 166 } 167 168 /** Whether to escape non ASCII characters with backslash and octal. */ 169 private final boolean escapeNonAscii; 170 171 private final TypeRegistry typeRegistry; 172 private final ExtensionRegistryLite extensionRegistry; 173 174 /** 175 * Whether to enable redaction of sensitive fields and introduce randomization. Note that when 176 * this is enabled, the output will no longer be deserializable. 177 */ 178 private final boolean enablingSafeDebugFormat; 179 180 private final boolean singleLine; 181 182 // Any API level higher than this level will be reported. This is set to 183 // ABSTRACT_MUTABLE_TO_STRING by default to prevent reporting for now. 184 private static final ThreadLocal<FieldReporterLevel> sensitiveFieldReportingLevel = 185 new ThreadLocal<FieldReporterLevel>() { 186 @Override 187 protected FieldReporterLevel initialValue() { 188 return FieldReporterLevel.ABSTRACT_MUTABLE_TO_STRING; 189 } 190 }; 191 Printer( boolean escapeNonAscii, TypeRegistry typeRegistry, ExtensionRegistryLite extensionRegistry, boolean enablingSafeDebugFormat, boolean singleLine)192 private Printer( 193 boolean escapeNonAscii, 194 TypeRegistry typeRegistry, 195 ExtensionRegistryLite extensionRegistry, 196 boolean enablingSafeDebugFormat, 197 boolean singleLine) { 198 this.escapeNonAscii = escapeNonAscii; 199 this.typeRegistry = typeRegistry; 200 this.extensionRegistry = extensionRegistry; 201 this.enablingSafeDebugFormat = enablingSafeDebugFormat; 202 this.singleLine = singleLine; 203 } 204 205 /** 206 * Return a new Printer instance with the specified escape mode. 207 * 208 * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the 209 * default behavior. If false, the new Printer will print non-ASCII characters as is. In 210 * either case, the new Printer still escapes newlines and quotes in strings. 211 * @return a new Printer that clones all other configurations from the current {@link Printer}, 212 * with the escape mode set to the given parameter. 213 */ escapingNonAscii(boolean escapeNonAscii)214 public Printer escapingNonAscii(boolean escapeNonAscii) { 215 return new Printer( 216 escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine); 217 } 218 219 /** 220 * Creates a new {@link Printer} using the given typeRegistry. The new Printer clones all other 221 * configurations from the current {@link Printer}. 222 * 223 * @throws IllegalArgumentException if a registry is already set. 224 */ usingTypeRegistry(TypeRegistry typeRegistry)225 public Printer usingTypeRegistry(TypeRegistry typeRegistry) { 226 if (this.typeRegistry != TypeRegistry.getEmptyTypeRegistry()) { 227 throw new IllegalArgumentException("Only one typeRegistry is allowed."); 228 } 229 return new Printer( 230 escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine); 231 } 232 233 /** 234 * Creates a new {@link Printer} using the given extensionRegistry. The new Printer clones all 235 * other configurations from the current {@link Printer}. 236 * 237 * @throws IllegalArgumentException if a registry is already set. 238 */ usingExtensionRegistry(ExtensionRegistryLite extensionRegistry)239 public Printer usingExtensionRegistry(ExtensionRegistryLite extensionRegistry) { 240 if (this.extensionRegistry != ExtensionRegistryLite.getEmptyRegistry()) { 241 throw new IllegalArgumentException("Only one extensionRegistry is allowed."); 242 } 243 return new Printer( 244 escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine); 245 } 246 247 /** 248 * Return a new Printer instance that outputs a redacted and unstable format suitable for 249 * debugging. 250 * 251 * @param enablingSafeDebugFormat If true, the new Printer will redact all proto fields that are 252 * marked by a debug_redact=true option, and apply an unstable prefix to the output. 253 * @return a new Printer that clones all other configurations from the current {@link Printer}, 254 * with the enablingSafeDebugFormat mode set to the given parameter. 255 */ enablingSafeDebugFormat(boolean enablingSafeDebugFormat)256 Printer enablingSafeDebugFormat(boolean enablingSafeDebugFormat) { 257 return new Printer( 258 escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine); 259 } 260 261 /** 262 * Return a new Printer instance with the specified line formatting status. 263 * 264 * @param singleLine If true, the new Printer will output no newline characters. 265 * @return a new Printer that clones all other configurations from the current {@link Printer}, 266 * with the singleLine mode set to the given parameter. 267 */ emittingSingleLine(boolean singleLine)268 public Printer emittingSingleLine(boolean singleLine) { 269 return new Printer( 270 escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine); 271 } 272 setSensitiveFieldReportingLevel(FieldReporterLevel level)273 void setSensitiveFieldReportingLevel(FieldReporterLevel level) { 274 Printer.sensitiveFieldReportingLevel.set(level); 275 } 276 277 /** 278 * Outputs a textual representation of the Protocol Message supplied into the parameter output. 279 * (This representation is the new version of the classic "ProtocolPrinter" output from the 280 * original Protocol Buffer system) 281 */ print(final MessageOrBuilder message, final Appendable output)282 public void print(final MessageOrBuilder message, final Appendable output) throws IOException { 283 print(message, output, FieldReporterLevel.PRINT); 284 } 285 print(final MessageOrBuilder message, final Appendable output, FieldReporterLevel level)286 void print(final MessageOrBuilder message, final Appendable output, FieldReporterLevel level) 287 throws IOException { 288 TextGenerator generator = setSingleLineOutput(output, this.singleLine, level); 289 print(message, generator); 290 } 291 292 /** Outputs a textual representation of {@code fields} to {@code output}. */ print(final UnknownFieldSet fields, final Appendable output)293 public void print(final UnknownFieldSet fields, final Appendable output) throws IOException { 294 printUnknownFields( 295 fields, setSingleLineOutput(output, this.singleLine), this.enablingSafeDebugFormat); 296 } 297 print(final MessageOrBuilder message, final TextGenerator generator)298 private void print(final MessageOrBuilder message, final TextGenerator generator) 299 throws IOException { 300 if (message.getDescriptorForType().getFullName().equals("google.protobuf.Any") 301 && printAny(message, generator)) { 302 return; 303 } 304 printMessage(message, generator); 305 } 306 applyUnstablePrefix(final Appendable output)307 private void applyUnstablePrefix(final Appendable output) { 308 try { 309 output.append(""); 310 } catch (IOException e) { 311 throw new IllegalStateException(e); 312 } 313 } 314 315 /** 316 * Attempt to print the 'google.protobuf.Any' message in a human-friendly format. Returns false 317 * if the message isn't a valid 'google.protobuf.Any' message (in which case the message should 318 * be rendered just like a regular message to help debugging). 319 */ printAny(final MessageOrBuilder message, final TextGenerator generator)320 private boolean printAny(final MessageOrBuilder message, final TextGenerator generator) 321 throws IOException { 322 Descriptor messageType = message.getDescriptorForType(); 323 FieldDescriptor typeUrlField = messageType.findFieldByNumber(1); 324 FieldDescriptor valueField = messageType.findFieldByNumber(2); 325 if (typeUrlField == null 326 || typeUrlField.getType() != FieldDescriptor.Type.STRING 327 || valueField == null 328 || valueField.getType() != FieldDescriptor.Type.BYTES) { 329 // The message may look like an Any but isn't actually an Any message (might happen if the 330 // user tries to use DynamicMessage to construct an Any from incomplete Descriptor). 331 return false; 332 } 333 String typeUrl = (String) message.getField(typeUrlField); 334 // If type_url is not set, we will not be able to decode the content of the value, so just 335 // print out the Any like a regular message. 336 if (typeUrl.isEmpty()) { 337 return false; 338 } 339 Object value = message.getField(valueField); 340 341 Message.Builder contentBuilder = null; 342 try { 343 Descriptor contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl); 344 if (contentType == null) { 345 return false; 346 } 347 contentBuilder = DynamicMessage.getDefaultInstance(contentType).newBuilderForType(); 348 contentBuilder.mergeFrom((ByteString) value, extensionRegistry); 349 } catch (InvalidProtocolBufferException e) { 350 // The value of Any is malformed. We cannot print it out nicely, so fallback to printing out 351 // the type_url and value as bytes. Note that we fail open here to be consistent with 352 // text_format.cc, and also to allow a way for users to inspect the content of the broken 353 // message. 354 return false; 355 } 356 generator.print("["); 357 generator.print(typeUrl); 358 generator.print("] {"); 359 generator.eol(); 360 generator.indent(); 361 print(contentBuilder, generator); 362 generator.outdent(); 363 generator.print("}"); 364 generator.eol(); 365 return true; 366 } 367 printFieldToString(final FieldDescriptor field, final Object value)368 public String printFieldToString(final FieldDescriptor field, final Object value) { 369 try { 370 final StringBuilder text = new StringBuilder(); 371 if (enablingSafeDebugFormat) { 372 applyUnstablePrefix(text); 373 } 374 printField(field, value, text); 375 return text.toString(); 376 } catch (IOException e) { 377 throw new IllegalStateException(e); 378 } 379 } 380 printField(final FieldDescriptor field, final Object value, final Appendable output)381 public void printField(final FieldDescriptor field, final Object value, final Appendable output) 382 throws IOException { 383 printField(field, value, setSingleLineOutput(output, this.singleLine)); 384 } 385 printField( final FieldDescriptor field, final Object value, final TextGenerator generator)386 private void printField( 387 final FieldDescriptor field, final Object value, final TextGenerator generator) 388 throws IOException { 389 // Sort map field entries by key 390 if (field.isMapField()) { 391 List<MapEntryAdapter> adapters = new ArrayList<>(); 392 for (Object entry : (List<?>) value) { 393 adapters.add(new MapEntryAdapter(entry, field)); 394 } 395 Collections.sort(adapters); 396 for (MapEntryAdapter adapter : adapters) { 397 printSingleField(field, adapter.getEntry(), generator); 398 } 399 } else if (field.isRepeated()) { 400 // Repeated field. Print each element. 401 for (Object element : (List<?>) value) { 402 printSingleField(field, element, generator); 403 } 404 } else { 405 printSingleField(field, value, generator); 406 } 407 } 408 409 /** An adapter class that can take a {@link MapEntry} and returns its key and entry. */ 410 private static class MapEntryAdapter implements Comparable<MapEntryAdapter> { 411 private Object entry; 412 413 @SuppressWarnings({"rawtypes"}) 414 private MapEntry mapEntry; 415 416 private final FieldDescriptor.JavaType fieldType; 417 MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor)418 MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor) { 419 if (entry instanceof MapEntry) { 420 this.mapEntry = (MapEntry) entry; 421 } else { 422 this.entry = entry; 423 } 424 this.fieldType = extractFieldType(fieldDescriptor); 425 } 426 extractFieldType(FieldDescriptor fieldDescriptor)427 private static FieldDescriptor.JavaType extractFieldType(FieldDescriptor fieldDescriptor) { 428 return fieldDescriptor.getMessageType().getFields().get(0).getJavaType(); 429 } 430 getKey()431 Object getKey() { 432 if (mapEntry != null) { 433 return mapEntry.getKey(); 434 } 435 return null; 436 } 437 getEntry()438 Object getEntry() { 439 if (mapEntry != null) { 440 return mapEntry; 441 } 442 return entry; 443 } 444 445 @Override compareTo(MapEntryAdapter b)446 public int compareTo(MapEntryAdapter b) { 447 if (getKey() == null || b.getKey() == null) { 448 logger.info("Invalid key for map field."); 449 return -1; 450 } 451 switch (fieldType) { 452 case BOOLEAN: 453 return ((Boolean) getKey()).compareTo((Boolean) b.getKey()); 454 case LONG: 455 return ((Long) getKey()).compareTo((Long) b.getKey()); 456 case INT: 457 return ((Integer) getKey()).compareTo((Integer) b.getKey()); 458 case STRING: 459 String aString = (String) getKey(); 460 String bString = (String) b.getKey(); 461 if (aString == null && bString == null) { 462 return 0; 463 } else if (aString == null && bString != null) { 464 return -1; 465 } else if (aString != null && bString == null) { 466 return 1; 467 } else { 468 return aString.compareTo(bString); 469 } 470 default: 471 return 0; 472 } 473 } 474 } 475 476 /** 477 * Outputs a textual representation of the value of given field value. 478 * 479 * @param field the descriptor of the field 480 * @param value the value of the field 481 * @param output the output to which to append the formatted value 482 * @throws ClassCastException if the value is not appropriate for the given field descriptor 483 * @throws IOException if there is an exception writing to the output 484 */ printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)485 public void printFieldValue( 486 final FieldDescriptor field, final Object value, final Appendable output) 487 throws IOException { 488 printFieldValue(field, value, setSingleLineOutput(output, this.singleLine)); 489 } 490 printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)491 private void printFieldValue( 492 final FieldDescriptor field, final Object value, final TextGenerator generator) 493 throws IOException { 494 if (shouldRedact(field, generator)) { 495 generator.print(REDACTED_MARKER); 496 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 497 generator.eol(); 498 } 499 return; 500 } 501 switch (field.getType()) { 502 case INT32: 503 case SINT32: 504 case SFIXED32: 505 generator.print(((Integer) value).toString()); 506 break; 507 508 case INT64: 509 case SINT64: 510 case SFIXED64: 511 generator.print(((Long) value).toString()); 512 break; 513 514 case BOOL: 515 generator.print(((Boolean) value).toString()); 516 break; 517 518 case FLOAT: 519 generator.print(((Float) value).toString()); 520 break; 521 522 case DOUBLE: 523 generator.print(((Double) value).toString()); 524 break; 525 526 case UINT32: 527 case FIXED32: 528 generator.print(unsignedToString((Integer) value)); 529 break; 530 531 case UINT64: 532 case FIXED64: 533 generator.print(unsignedToString((Long) value)); 534 break; 535 536 case STRING: 537 generator.print("\""); 538 generator.print( 539 escapeNonAscii 540 ? TextFormatEscaper.escapeText((String) value) 541 : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n")); 542 generator.print("\""); 543 break; 544 545 case BYTES: 546 generator.print("\""); 547 if (value instanceof ByteString) { 548 generator.print(escapeBytes((ByteString) value)); 549 } else { 550 generator.print(escapeBytes((byte[]) value)); 551 } 552 generator.print("\""); 553 break; 554 555 case ENUM: 556 if (((EnumValueDescriptor) value).getIndex() == -1) { 557 // Unknown enum value, print the number instead of the name. 558 generator.print(Integer.toString(((EnumValueDescriptor) value).getNumber())); 559 } else { 560 generator.print(((EnumValueDescriptor) value).getName()); 561 } 562 break; 563 564 case MESSAGE: 565 case GROUP: 566 print((MessageOrBuilder) value, generator); 567 break; 568 } 569 } 570 shouldRedactOptionValue(EnumValueDescriptor optionValue)571 private boolean shouldRedactOptionValue(EnumValueDescriptor optionValue) { 572 if (optionValue.getOptions().hasDebugRedact()) { 573 return optionValue.getOptions().getDebugRedact(); 574 } 575 return false; 576 } 577 578 // The criteria for redacting a field is as follows: 1) The enablingSafeDebugFormat printer 579 // option 580 // must be on. 2) The field must be marked by a debug_redact=true option, or is marked by an 581 // option with an enum value that is marked by a debug_redact=true option. 582 @SuppressWarnings("unchecked") // List<EnumValueDescriptor> guaranteed by protobuf runtime. shouldRedact(final FieldDescriptor field, TextGenerator generator)583 private boolean shouldRedact(final FieldDescriptor field, TextGenerator generator) { 584 // Skip checking if it's sensitive and potentially reporting it if we don't care about either. 585 if (!shouldReport(generator.fieldReporterLevel) && !enablingSafeDebugFormat) { 586 return false; 587 } 588 boolean isSensitive = false; 589 if (field.getOptions().hasDebugRedact() && field.getOptions().getDebugRedact()) { 590 isSensitive = true; 591 } else { 592 // Iterate through every option; if it's an enum, we check each enum value for debug_redact. 593 for (Map.Entry<Descriptors.FieldDescriptor, Object> entry : 594 field.getOptions().getAllFields().entrySet()) { 595 Descriptors.FieldDescriptor option = entry.getKey(); 596 if (option.getType() != Descriptors.FieldDescriptor.Type.ENUM) { 597 continue; 598 } 599 if (option.isRepeated()) { 600 for (EnumValueDescriptor value : (List<EnumValueDescriptor>) entry.getValue()) { 601 if (shouldRedactOptionValue(value)) { 602 isSensitive = true; 603 break; 604 } 605 } 606 } else { 607 EnumValueDescriptor optionValue = (EnumValueDescriptor) entry.getValue(); 608 if (shouldRedactOptionValue(optionValue)) { 609 isSensitive = true; 610 break; 611 } 612 } 613 } 614 } 615 return isSensitive && enablingSafeDebugFormat; 616 } 617 shouldReport(FieldReporterLevel level)618 private boolean shouldReport(FieldReporterLevel level) { 619 return sensitiveFieldReportingLevel.get().compareTo(level) < 0; 620 } 621 622 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final MessageOrBuilder message)623 public String printToString(final MessageOrBuilder message) { 624 return printToString(message, FieldReporterLevel.PRINTER_PRINT_TO_STRING); 625 } 626 printToString(final MessageOrBuilder message, FieldReporterLevel level)627 String printToString(final MessageOrBuilder message, FieldReporterLevel level) { 628 try { 629 final StringBuilder text = new StringBuilder(); 630 if (enablingSafeDebugFormat) { 631 applyUnstablePrefix(text); 632 } 633 print(message, text, level); 634 return text.toString(); 635 } catch (IOException e) { 636 throw new IllegalStateException(e); 637 } 638 } 639 640 /** Like {@code print()}, but writes directly to a {@code String} and returns it. */ printToString(final UnknownFieldSet fields)641 public String printToString(final UnknownFieldSet fields) { 642 try { 643 final StringBuilder text = new StringBuilder(); 644 if (enablingSafeDebugFormat) { 645 applyUnstablePrefix(text); 646 } 647 print(fields, text); 648 return text.toString(); 649 } catch (IOException e) { 650 throw new IllegalStateException(e); 651 } 652 } 653 654 /** 655 * Generates a human readable form of this message, useful for debugging and other purposes, 656 * with no newline characters. 657 * 658 * @deprecated Use {@code 659 * this.printer().emittingSingleLine(true).printToString(MessageOrBuilder)} 660 */ 661 @Deprecated shortDebugString(final MessageOrBuilder message)662 public String shortDebugString(final MessageOrBuilder message) { 663 return this.emittingSingleLine(true) 664 .printToString(message, FieldReporterLevel.SHORT_DEBUG_STRING); 665 } 666 667 /** 668 * Generates a human readable form of the field, useful for debugging and other purposes, with 669 * no newline characters. 670 * 671 * @deprecated Use {@code this.emittingSingleLine(true).printFieldToString(FieldDescriptor, 672 * Object)} 673 */ 674 @Deprecated 675 @InlineMe(replacement = "this.emittingSingleLine(true).printFieldToString(field, value)") shortDebugString(final FieldDescriptor field, final Object value)676 public String shortDebugString(final FieldDescriptor field, final Object value) { 677 return this.emittingSingleLine(true).printFieldToString(field, value); 678 } 679 680 /** 681 * Generates a human readable form of the unknown fields, useful for debugging and other 682 * purposes, with no newline characters. 683 * 684 * @deprecated Use {@code this.emittingSingleLine(true).printToString(UnknownFieldSet)} 685 */ 686 @Deprecated 687 @InlineMe(replacement = "this.emittingSingleLine(true).printToString(fields)") shortDebugString(final UnknownFieldSet fields)688 public String shortDebugString(final UnknownFieldSet fields) { 689 return this.emittingSingleLine(true).printToString(fields); 690 } 691 printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator, boolean redact)692 private static void printUnknownFieldValue( 693 final int tag, final Object value, final TextGenerator generator, boolean redact) 694 throws IOException { 695 switch (WireFormat.getTagWireType(tag)) { 696 case WireFormat.WIRETYPE_VARINT: 697 generator.print( 698 redact 699 ? String.format("UNKNOWN_VARINT %s", REDACTED_MARKER) 700 : unsignedToString((Long) value)); 701 break; 702 case WireFormat.WIRETYPE_FIXED32: 703 generator.print( 704 redact 705 ? String.format("UNKNOWN_FIXED32 %s", REDACTED_MARKER) 706 : String.format((Locale) null, "0x%08x", (Integer) value)); 707 break; 708 case WireFormat.WIRETYPE_FIXED64: 709 generator.print( 710 redact 711 ? String.format("UNKNOWN_FIXED64 %s", REDACTED_MARKER) 712 : String.format((Locale) null, "0x%016x", (Long) value)); 713 break; 714 case WireFormat.WIRETYPE_LENGTH_DELIMITED: 715 try { 716 // Try to parse and print the field as an embedded message 717 UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value); 718 generator.print("{"); 719 generator.eol(); 720 generator.indent(); 721 printUnknownFields(message, generator, redact); 722 generator.outdent(); 723 generator.print("}"); 724 } catch (InvalidProtocolBufferException e) { 725 // If not parseable as a message, print as a String 726 if (redact) { 727 generator.print(String.format("UNKNOWN_STRING %s", REDACTED_MARKER)); 728 break; 729 } 730 generator.print("\""); 731 generator.print(escapeBytes((ByteString) value)); 732 generator.print("\""); 733 } 734 break; 735 case WireFormat.WIRETYPE_START_GROUP: 736 printUnknownFields((UnknownFieldSet) value, generator, redact); 737 break; 738 default: 739 throw new IllegalArgumentException("Bad tag: " + tag); 740 } 741 } 742 printMessage(final MessageOrBuilder message, final TextGenerator generator)743 private void printMessage(final MessageOrBuilder message, final TextGenerator generator) 744 throws IOException { 745 for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) { 746 printField(field.getKey(), field.getValue(), generator); 747 } 748 printUnknownFields(message.getUnknownFields(), generator, this.enablingSafeDebugFormat); 749 } 750 printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)751 private void printSingleField( 752 final FieldDescriptor field, final Object value, final TextGenerator generator) 753 throws IOException { 754 if (field.isExtension()) { 755 generator.print("["); 756 // We special-case MessageSet elements for compatibility with proto1. 757 if (field.getContainingType().getOptions().getMessageSetWireFormat() 758 && (field.getType() == FieldDescriptor.Type.MESSAGE) 759 && (field.isOptional()) 760 // object equality 761 && (field.getExtensionScope() == field.getMessageType())) { 762 generator.print(field.getMessageType().getFullName()); 763 } else { 764 generator.print(field.getFullName()); 765 } 766 generator.print("]"); 767 } else { 768 if (field.isGroupLike()) { 769 // Groups must be serialized with their original capitalization. 770 generator.print(field.getMessageType().getName()); 771 } else { 772 generator.print(field.getName()); 773 } 774 } 775 776 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 777 generator.print(" {"); 778 generator.eol(); 779 generator.indent(); 780 } else { 781 generator.print(": "); 782 } 783 784 printFieldValue(field, value, generator); 785 786 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 787 generator.outdent(); 788 generator.print("}"); 789 } 790 generator.eol(); 791 } 792 printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator, boolean redact)793 private static void printUnknownFields( 794 final UnknownFieldSet unknownFields, final TextGenerator generator, boolean redact) 795 throws IOException { 796 if (unknownFields.isEmpty()) { 797 return; 798 } 799 for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) { 800 final int number = entry.getKey(); 801 final UnknownFieldSet.Field field = entry.getValue(); 802 printUnknownField( 803 number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator, redact); 804 printUnknownField( 805 number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator, redact); 806 printUnknownField( 807 number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator, redact); 808 printUnknownField( 809 number, 810 WireFormat.WIRETYPE_LENGTH_DELIMITED, 811 field.getLengthDelimitedList(), 812 generator, 813 redact); 814 for (final UnknownFieldSet value : field.getGroupList()) { 815 generator.print(entry.getKey().toString()); 816 generator.print(" {"); 817 generator.eol(); 818 generator.indent(); 819 printUnknownFields(value, generator, redact); 820 generator.outdent(); 821 generator.print("}"); 822 generator.eol(); 823 } 824 } 825 } 826 printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator, boolean redact)827 private static void printUnknownField( 828 final int number, 829 final int wireType, 830 final List<?> values, 831 final TextGenerator generator, 832 boolean redact) 833 throws IOException { 834 for (final Object value : values) { 835 generator.print(String.valueOf(number)); 836 generator.print(": "); 837 printUnknownFieldValue(wireType, value, generator, redact); 838 generator.eol(); 839 } 840 } 841 } 842 843 /** Convert an unsigned 32-bit integer to a string. */ unsignedToString(final int value)844 public static String unsignedToString(final int value) { 845 if (value >= 0) { 846 return Integer.toString(value); 847 } else { 848 return Long.toString(value & 0x00000000FFFFFFFFL); 849 } 850 } 851 852 /** Convert an unsigned 64-bit integer to a string. */ unsignedToString(final long value)853 public static String unsignedToString(final long value) { 854 if (value >= 0) { 855 return Long.toString(value); 856 } else { 857 // Pull off the most-significant bit so that BigInteger doesn't think 858 // the number is negative, then set it again using setBit(). 859 return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString(); 860 } 861 } 862 setSingleLineOutput(Appendable output, boolean singleLine)863 private static TextGenerator setSingleLineOutput(Appendable output, boolean singleLine) { 864 return new TextGenerator(output, singleLine, Printer.FieldReporterLevel.NO_REPORT); 865 } 866 setSingleLineOutput( Appendable output, boolean singleLine, Printer.FieldReporterLevel fieldReporterLevel)867 private static TextGenerator setSingleLineOutput( 868 Appendable output, boolean singleLine, Printer.FieldReporterLevel fieldReporterLevel) { 869 return new TextGenerator(output, singleLine, fieldReporterLevel); 870 } 871 872 /** An inner class for writing text to the output stream. */ 873 private static final class TextGenerator { 874 private final Appendable output; 875 private final StringBuilder indent = new StringBuilder(); 876 private final boolean singleLineMode; 877 // While technically we are "at the start of a line" at the very beginning of the output, all 878 // we would do in response to this is emit the (zero length) indentation, so it has no effect. 879 // Setting it false here does however suppress an unwanted leading space in single-line mode. 880 private boolean atStartOfLine = false; 881 // Indicate which Protobuf public stringification API (e.g AbstractMessage.toString()) is 882 // called. 883 private final Printer.FieldReporterLevel fieldReporterLevel; 884 TextGenerator( final Appendable output, boolean singleLineMode, Printer.FieldReporterLevel fieldReporterLevel)885 private TextGenerator( 886 final Appendable output, 887 boolean singleLineMode, 888 Printer.FieldReporterLevel fieldReporterLevel) { 889 this.output = output; 890 this.singleLineMode = singleLineMode; 891 this.fieldReporterLevel = fieldReporterLevel; 892 } 893 894 /** 895 * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the 896 * beginning of each line of text. Indent() may be called multiple times to produce deeper 897 * indents. 898 */ indent()899 public void indent() { 900 indent.append(" "); 901 } 902 903 /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */ outdent()904 public void outdent() { 905 final int length = indent.length(); 906 if (length == 0) { 907 throw new IllegalArgumentException(" Outdent() without matching Indent()."); 908 } 909 indent.setLength(length - 2); 910 } 911 912 /** 913 * Print text to the output stream. Bare newlines are never expected to be passed to this 914 * method; to indicate the end of a line, call "eol()". 915 */ print(final CharSequence text)916 public void print(final CharSequence text) throws IOException { 917 if (atStartOfLine) { 918 atStartOfLine = false; 919 output.append(singleLineMode ? " " : indent); 920 } 921 output.append(text); 922 } 923 924 /** 925 * Signifies reaching the "end of the current line" in the output. In single-line mode, this 926 * does not result in a newline being emitted, but ensures that a separating space is written 927 * before the next output. 928 */ eol()929 public void eol() throws IOException { 930 if (!singleLineMode) { 931 output.append("\n"); 932 } 933 atStartOfLine = true; 934 } 935 } 936 937 // ================================================================= 938 // Parsing 939 940 /** 941 * Represents a stream of tokens parsed from a {@code String}. 942 * 943 * <p>The Java standard library provides many classes that you might think would be useful for 944 * implementing this, but aren't. For example: 945 * 946 * <ul> 947 * <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something 948 * that would get us close to what we want -- except for one fatal flaw: It automatically 949 * un-escapes strings using Java escape sequences, which do not include all the escape 950 * sequences we need to support (e.g. '\x'). 951 * <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular 952 * expressions out of a stream (so we wouldn't have to load the entire input into a single 953 * string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with 954 * some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and 955 * ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code 956 * Scanner} provides no way to inspect the contents of delimiters, making it impossible to 957 * keep track of line and column numbers. 958 * </ul> 959 * 960 * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need 961 * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least. 962 * Unfortunately, this implies that we need to have the entire input in one contiguous string. 963 */ 964 private static final class Tokenizer { 965 private final CharSequence text; 966 private final Matcher matcher; 967 private String currentToken; 968 969 // The character index within this.text at which the current token begins. 970 private int pos = 0; 971 972 // The line and column numbers of the current token. 973 private int line = 0; 974 private int column = 0; 975 976 // The line and column numbers of the previous token (allows throwing 977 // errors *after* consuming). 978 private int previousLine = 0; 979 private int previousColumn = 0; 980 981 // We use possessive quantifiers (*+ and ++) because otherwise the Java 982 // regex matcher has stack overflows on large inputs. 983 private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE); 984 private static final Pattern TOKEN = 985 Pattern.compile( 986 "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier 987 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number 988 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string 989 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string 990 Pattern.MULTILINE); 991 992 private static final Pattern DOUBLE_INFINITY = 993 Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE); 994 private static final Pattern FLOAT_INFINITY = 995 Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE); 996 private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE); 997 998 /** 999 * {@link containsSilentMarkerAfterCurrentToken} indicates if there is a silent marker after the 1000 * current token. This value is moved to {@link containsSilentMarkerAfterPrevToken} every time 1001 * the next token is parsed. 1002 */ 1003 private boolean containsSilentMarkerAfterCurrentToken = false; 1004 1005 private boolean containsSilentMarkerAfterPrevToken = false; 1006 1007 /** Construct a tokenizer that parses tokens from the given text. */ Tokenizer(final CharSequence text)1008 private Tokenizer(final CharSequence text) { 1009 this.text = text; 1010 this.matcher = WHITESPACE.matcher(text); 1011 skipWhitespace(); 1012 nextToken(); 1013 } 1014 getPreviousLine()1015 int getPreviousLine() { 1016 return previousLine; 1017 } 1018 getPreviousColumn()1019 int getPreviousColumn() { 1020 return previousColumn; 1021 } 1022 getLine()1023 int getLine() { 1024 return line; 1025 } 1026 getColumn()1027 int getColumn() { 1028 return column; 1029 } 1030 getContainsSilentMarkerAfterCurrentToken()1031 boolean getContainsSilentMarkerAfterCurrentToken() { 1032 return containsSilentMarkerAfterCurrentToken; 1033 } 1034 getContainsSilentMarkerAfterPrevToken()1035 boolean getContainsSilentMarkerAfterPrevToken() { 1036 return containsSilentMarkerAfterPrevToken; 1037 } 1038 1039 /** Are we at the end of the input? */ atEnd()1040 boolean atEnd() { 1041 return currentToken.length() == 0; 1042 } 1043 1044 /** Advance to the next token. */ nextToken()1045 void nextToken() { 1046 previousLine = line; 1047 previousColumn = column; 1048 1049 // Advance the line counter to the current position. 1050 while (pos < matcher.regionStart()) { 1051 if (text.charAt(pos) == '\n') { 1052 ++line; 1053 column = 0; 1054 } else { 1055 ++column; 1056 } 1057 ++pos; 1058 } 1059 1060 // Match the next token. 1061 if (matcher.regionStart() == matcher.regionEnd()) { 1062 // EOF 1063 currentToken = ""; 1064 } else { 1065 matcher.usePattern(TOKEN); 1066 if (matcher.lookingAt()) { 1067 currentToken = matcher.group(); 1068 matcher.region(matcher.end(), matcher.regionEnd()); 1069 } else { 1070 // Take one character. 1071 currentToken = String.valueOf(text.charAt(pos)); 1072 matcher.region(pos + 1, matcher.regionEnd()); 1073 } 1074 1075 skipWhitespace(); 1076 } 1077 } 1078 1079 /** Skip over any whitespace so that the matcher region starts at the next token. */ skipWhitespace()1080 private void skipWhitespace() { 1081 matcher.usePattern(WHITESPACE); 1082 if (matcher.lookingAt()) { 1083 matcher.region(matcher.end(), matcher.regionEnd()); 1084 } 1085 } 1086 1087 /** 1088 * If the next token exactly matches {@code token}, consume it and return {@code true}. 1089 * Otherwise, return {@code false} without doing anything. 1090 */ tryConsume(final String token)1091 boolean tryConsume(final String token) { 1092 if (currentToken.equals(token)) { 1093 nextToken(); 1094 return true; 1095 } else { 1096 return false; 1097 } 1098 } 1099 1100 /** 1101 * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link 1102 * ParseException}. 1103 */ consume(final String token)1104 void consume(final String token) throws ParseException { 1105 if (!tryConsume(token)) { 1106 throw parseException("Expected \"" + token + "\"."); 1107 } 1108 } 1109 1110 /** Returns {@code true} if the next token is an integer, but does not consume it. */ lookingAtInteger()1111 boolean lookingAtInteger() { 1112 if (currentToken.length() == 0) { 1113 return false; 1114 } 1115 1116 final char c = currentToken.charAt(0); 1117 return ('0' <= c && c <= '9') || c == '-' || c == '+'; 1118 } 1119 1120 /** Returns {@code true} if the current token's text is equal to that specified. */ lookingAt(String text)1121 boolean lookingAt(String text) { 1122 return currentToken.equals(text); 1123 } 1124 1125 /** 1126 * If the next token is an identifier, consume it and return its value. Otherwise, throw a 1127 * {@link ParseException}. 1128 */ consumeIdentifier()1129 String consumeIdentifier() throws ParseException { 1130 for (int i = 0; i < currentToken.length(); i++) { 1131 final char c = currentToken.charAt(i); 1132 if (('a' <= c && c <= 'z') 1133 || ('A' <= c && c <= 'Z') 1134 || ('0' <= c && c <= '9') 1135 || (c == '_') 1136 || (c == '.')) { 1137 // OK 1138 } else { 1139 throw parseException("Expected identifier. Found '" + currentToken + "'"); 1140 } 1141 } 1142 1143 final String result = currentToken; 1144 nextToken(); 1145 return result; 1146 } 1147 1148 /** 1149 * If the next token is an identifier, consume it and return {@code true}. Otherwise, return 1150 * {@code false} without doing anything. 1151 */ tryConsumeIdentifier()1152 boolean tryConsumeIdentifier() { 1153 try { 1154 consumeIdentifier(); 1155 return true; 1156 } catch (ParseException e) { 1157 return false; 1158 } 1159 } 1160 1161 /** 1162 * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise, 1163 * throw a {@link ParseException}. 1164 */ consumeInt32()1165 int consumeInt32() throws ParseException { 1166 try { 1167 final int result = parseInt32(currentToken); 1168 nextToken(); 1169 return result; 1170 } catch (NumberFormatException e) { 1171 throw integerParseException(e); 1172 } 1173 } 1174 1175 /** 1176 * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise, 1177 * throw a {@link ParseException}. 1178 */ consumeUInt32()1179 int consumeUInt32() throws ParseException { 1180 try { 1181 final int result = parseUInt32(currentToken); 1182 nextToken(); 1183 return result; 1184 } catch (NumberFormatException e) { 1185 throw integerParseException(e); 1186 } 1187 } 1188 1189 /** 1190 * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise, 1191 * throw a {@link ParseException}. 1192 */ consumeInt64()1193 long consumeInt64() throws ParseException { 1194 try { 1195 final long result = parseInt64(currentToken); 1196 nextToken(); 1197 return result; 1198 } catch (NumberFormatException e) { 1199 throw integerParseException(e); 1200 } 1201 } 1202 1203 /** 1204 * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise, 1205 * return {@code false} without doing anything. 1206 */ tryConsumeInt64()1207 boolean tryConsumeInt64() { 1208 try { 1209 consumeInt64(); 1210 return true; 1211 } catch (ParseException e) { 1212 return false; 1213 } 1214 } 1215 1216 /** 1217 * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise, 1218 * throw a {@link ParseException}. 1219 */ consumeUInt64()1220 long consumeUInt64() throws ParseException { 1221 try { 1222 final long result = parseUInt64(currentToken); 1223 nextToken(); 1224 return result; 1225 } catch (NumberFormatException e) { 1226 throw integerParseException(e); 1227 } 1228 } 1229 1230 /** 1231 * If the next token is a 64-bit unsigned integer, consume it and return {@code true}. 1232 * Otherwise, return {@code false} without doing anything. 1233 */ tryConsumeUInt64()1234 public boolean tryConsumeUInt64() { 1235 try { 1236 consumeUInt64(); 1237 return true; 1238 } catch (ParseException e) { 1239 return false; 1240 } 1241 } 1242 1243 /** 1244 * If the next token is a double, consume it and return its value. Otherwise, throw a {@link 1245 * ParseException}. 1246 */ consumeDouble()1247 public double consumeDouble() throws ParseException { 1248 // We need to parse infinity and nan separately because 1249 // Double.parseDouble() does not accept "inf", "infinity", or "nan". 1250 if (DOUBLE_INFINITY.matcher(currentToken).matches()) { 1251 final boolean negative = currentToken.startsWith("-"); 1252 nextToken(); 1253 return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; 1254 } 1255 if (currentToken.equalsIgnoreCase("nan")) { 1256 nextToken(); 1257 return Double.NaN; 1258 } 1259 try { 1260 final double result = Double.parseDouble(currentToken); 1261 nextToken(); 1262 return result; 1263 } catch (NumberFormatException e) { 1264 throw floatParseException(e); 1265 } 1266 } 1267 1268 /** 1269 * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code 1270 * false} without doing anything. 1271 */ tryConsumeDouble()1272 public boolean tryConsumeDouble() { 1273 try { 1274 consumeDouble(); 1275 return true; 1276 } catch (ParseException e) { 1277 return false; 1278 } 1279 } 1280 1281 /** 1282 * If the next token is a float, consume it and return its value. Otherwise, throw a {@link 1283 * ParseException}. 1284 */ consumeFloat()1285 public float consumeFloat() throws ParseException { 1286 // We need to parse infinity and nan separately because 1287 // Float.parseFloat() does not accept "inf", "infinity", or "nan". 1288 if (FLOAT_INFINITY.matcher(currentToken).matches()) { 1289 final boolean negative = currentToken.startsWith("-"); 1290 nextToken(); 1291 return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; 1292 } 1293 if (FLOAT_NAN.matcher(currentToken).matches()) { 1294 nextToken(); 1295 return Float.NaN; 1296 } 1297 try { 1298 final float result = Float.parseFloat(currentToken); 1299 nextToken(); 1300 return result; 1301 } catch (NumberFormatException e) { 1302 throw floatParseException(e); 1303 } 1304 } 1305 1306 /** 1307 * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code 1308 * false} without doing anything. 1309 */ tryConsumeFloat()1310 public boolean tryConsumeFloat() { 1311 try { 1312 consumeFloat(); 1313 return true; 1314 } catch (ParseException e) { 1315 return false; 1316 } 1317 } 1318 1319 /** 1320 * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link 1321 * ParseException}. 1322 */ consumeBoolean()1323 public boolean consumeBoolean() throws ParseException { 1324 if (currentToken.equals("true") 1325 || currentToken.equals("True") 1326 || currentToken.equals("t") 1327 || currentToken.equals("1")) { 1328 nextToken(); 1329 return true; 1330 } else if (currentToken.equals("false") 1331 || currentToken.equals("False") 1332 || currentToken.equals("f") 1333 || currentToken.equals("0")) { 1334 nextToken(); 1335 return false; 1336 } else { 1337 throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\"."); 1338 } 1339 } 1340 1341 /** 1342 * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw 1343 * a {@link ParseException}. 1344 */ consumeString()1345 public String consumeString() throws ParseException { 1346 return consumeByteString().toStringUtf8(); 1347 } 1348 1349 /** 1350 * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return 1351 * it. Otherwise, throw a {@link ParseException}. 1352 */ 1353 @CanIgnoreReturnValue consumeByteString()1354 ByteString consumeByteString() throws ParseException { 1355 List<ByteString> list = new ArrayList<ByteString>(); 1356 consumeByteString(list); 1357 while (currentToken.startsWith("'") || currentToken.startsWith("\"")) { 1358 consumeByteString(list); 1359 } 1360 return ByteString.copyFrom(list); 1361 } 1362 1363 /** If the next token is a string, consume it and return true. Otherwise, return false. */ tryConsumeByteString()1364 boolean tryConsumeByteString() { 1365 try { 1366 consumeByteString(); 1367 return true; 1368 } catch (ParseException e) { 1369 return false; 1370 } 1371 } 1372 1373 /** 1374 * Like {@link #consumeByteString()} but adds each token of the string to the given list. String 1375 * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically 1376 * concatenated, like in C or Python. 1377 */ consumeByteString(List<ByteString> list)1378 private void consumeByteString(List<ByteString> list) throws ParseException { 1379 final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0'; 1380 if (quote != '\"' && quote != '\'') { 1381 throw parseException("Expected string."); 1382 } 1383 1384 if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) { 1385 throw parseException("String missing ending quote."); 1386 } 1387 1388 try { 1389 final String escaped = currentToken.substring(1, currentToken.length() - 1); 1390 final ByteString result = unescapeBytes(escaped); 1391 nextToken(); 1392 list.add(result); 1393 } catch (InvalidEscapeSequenceException e) { 1394 throw parseException(e.getMessage()); 1395 } 1396 } 1397 1398 /** 1399 * Returns a {@link ParseException} with the current line and column numbers in the description, 1400 * suitable for throwing. 1401 */ parseException(final String description)1402 ParseException parseException(final String description) { 1403 // Note: People generally prefer one-based line and column numbers. 1404 return new ParseException(line + 1, column + 1, description); 1405 } 1406 1407 /** 1408 * Returns a {@link ParseException} with the line and column numbers of the previous token in 1409 * the description, suitable for throwing. 1410 */ parseExceptionPreviousToken(final String description)1411 ParseException parseExceptionPreviousToken(final String description) { 1412 // Note: People generally prefer one-based line and column numbers. 1413 return new ParseException(previousLine + 1, previousColumn + 1, description); 1414 } 1415 1416 /** 1417 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1418 * when trying to parse an integer. 1419 */ integerParseException(final NumberFormatException e)1420 private ParseException integerParseException(final NumberFormatException e) { 1421 return parseException("Couldn't parse integer: " + e.getMessage()); 1422 } 1423 1424 /** 1425 * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException} 1426 * when trying to parse a float or double. 1427 */ floatParseException(final NumberFormatException e)1428 private ParseException floatParseException(final NumberFormatException e) { 1429 return parseException("Couldn't parse number: " + e.getMessage()); 1430 } 1431 } 1432 1433 /** Thrown when parsing an invalid text format message. */ 1434 public static class ParseException extends IOException { 1435 private static final long serialVersionUID = 3196188060225107702L; 1436 1437 private final int line; 1438 private final int column; 1439 1440 /** Create a new instance, with -1 as the line and column numbers. */ ParseException(final String message)1441 public ParseException(final String message) { 1442 this(-1, -1, message); 1443 } 1444 1445 /** 1446 * Create a new instance 1447 * 1448 * @param line the line number where the parse error occurred, using 1-offset. 1449 * @param column the column number where the parser error occurred, using 1-offset. 1450 */ ParseException(final int line, final int column, final String message)1451 public ParseException(final int line, final int column, final String message) { 1452 super(Integer.toString(line) + ":" + column + ": " + message); 1453 this.line = line; 1454 this.column = column; 1455 } 1456 1457 /** 1458 * Return the line where the parse exception occurred, or -1 when none is provided. The value is 1459 * specified as 1-offset, so the first line is line 1. 1460 */ getLine()1461 public int getLine() { 1462 return line; 1463 } 1464 1465 /** 1466 * Return the column where the parse exception occurred, or -1 when none is provided. The value 1467 * is specified as 1-offset, so the first line is line 1. 1468 */ getColumn()1469 public int getColumn() { 1470 return column; 1471 } 1472 } 1473 1474 /** Obsolete exception, once thrown when encountering an unknown field while parsing a text 1475 format message. 1476 * 1477 * @deprecated This exception is unused and will be removed in the next breaking release 1478 (v5.x.x). 1479 */ 1480 @Deprecated 1481 public static class UnknownFieldParseException extends ParseException { 1482 private final String unknownField; 1483 1484 /** 1485 * Create a new instance, with -1 as the line and column numbers, and an empty unknown field 1486 * name. 1487 */ UnknownFieldParseException(final String message)1488 public UnknownFieldParseException(final String message) { 1489 this(-1, -1, "", message); 1490 } 1491 1492 /** 1493 * Create a new instance 1494 * 1495 * @param line the line number where the parse error occurred, using 1-offset. 1496 * @param column the column number where the parser error occurred, using 1-offset. 1497 * @param unknownField the name of the unknown field found while parsing. 1498 */ UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1499 public UnknownFieldParseException( 1500 final int line, final int column, final String unknownField, final String message) { 1501 super(line, column, message); 1502 this.unknownField = unknownField; 1503 } 1504 1505 /** 1506 * Return the name of the unknown field encountered while parsing the protocol buffer string. 1507 */ getUnknownField()1508 public String getUnknownField() { 1509 return unknownField; 1510 } 1511 } 1512 1513 private static final Parser PARSER = Parser.newBuilder().build(); 1514 1515 /** 1516 * Return a {@link Parser} instance which can parse text-format messages. The returned instance is 1517 * thread-safe. 1518 */ getParser()1519 public static Parser getParser() { 1520 return PARSER; 1521 } 1522 1523 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final Readable input, final Message.Builder builder)1524 public static void merge(final Readable input, final Message.Builder builder) throws IOException { 1525 PARSER.merge(input, builder); 1526 } 1527 1528 /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */ merge(final CharSequence input, final Message.Builder builder)1529 public static void merge(final CharSequence input, final Message.Builder builder) 1530 throws ParseException { 1531 PARSER.merge(input, builder); 1532 } 1533 1534 /** 1535 * Parse a text-format message from {@code input}. 1536 * 1537 * @return the parsed message, guaranteed initialized 1538 */ parse(final CharSequence input, final Class<T> protoClass)1539 public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass) 1540 throws ParseException { 1541 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1542 merge(input, builder); 1543 @SuppressWarnings("unchecked") 1544 T output = (T) builder.build(); 1545 return output; 1546 } 1547 1548 /** 1549 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1550 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1551 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1552 public static void merge( 1553 final Readable input, 1554 final ExtensionRegistry extensionRegistry, 1555 final Message.Builder builder) 1556 throws IOException { 1557 PARSER.merge(input, extensionRegistry, builder); 1558 } 1559 1560 /** 1561 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1562 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1563 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1564 public static void merge( 1565 final CharSequence input, 1566 final ExtensionRegistry extensionRegistry, 1567 final Message.Builder builder) 1568 throws ParseException { 1569 PARSER.merge(input, extensionRegistry, builder); 1570 } 1571 1572 /** 1573 * Parse a text-format message from {@code input}. Extensions will be recognized if they are 1574 * registered in {@code extensionRegistry}. 1575 * 1576 * @return the parsed message, guaranteed initialized 1577 */ parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1578 public static <T extends Message> T parse( 1579 final CharSequence input, 1580 final ExtensionRegistry extensionRegistry, 1581 final Class<T> protoClass) 1582 throws ParseException { 1583 Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType(); 1584 merge(input, extensionRegistry, builder); 1585 @SuppressWarnings("unchecked") 1586 T output = (T) builder.build(); 1587 return output; 1588 } 1589 1590 /** 1591 * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely 1592 * follows google/protobuf/text_format.cc. 1593 * 1594 * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to 1595 * control the parser behavior. 1596 */ 1597 public static class Parser { 1598 1599 /** 1600 * A valid silent marker appears between a field name and its value. If there is a ":" in 1601 * between, the silent marker will only appear after the colon. This is called after a field 1602 * name is parsed, and before the ":" if it exists. If the current token is ":", then 1603 * containsSilentMarkerAfterCurrentToken indicates if there is a valid silent marker. Otherwise, 1604 * the current token is part of the field value, so the silent marker is indicated by 1605 * containsSilentMarkerAfterPrevToken. 1606 */ detectSilentMarker( Tokenizer tokenizer, Descriptor immediateMessageType, String fieldName)1607 private void detectSilentMarker( 1608 Tokenizer tokenizer, Descriptor immediateMessageType, String fieldName) { 1609 } 1610 1611 /** 1612 * Determines if repeated values for non-repeated fields and oneofs are permitted. For example, 1613 * given required/optional field "foo" and a oneof containing "baz" and "moo": 1614 * 1615 * <ul> 1616 * <li>"foo: 1 foo: 2" 1617 * <li>"baz: 1 moo: 2" 1618 * <li>merging "foo: 2" into a proto in which foo is already set, or 1619 * <li>merging "moo: 2" into a proto in which baz is already set. 1620 * </ul> 1621 */ 1622 public enum SingularOverwritePolicy { 1623 /** 1624 * Later values are merged with earlier values. For primitive fields or conflicting oneofs, 1625 * the last value is retained. 1626 */ 1627 ALLOW_SINGULAR_OVERWRITES, 1628 /** An error is issued. */ 1629 FORBID_SINGULAR_OVERWRITES 1630 } 1631 1632 private final TypeRegistry typeRegistry; 1633 private final boolean allowUnknownFields; 1634 private final boolean allowUnknownEnumValues; 1635 private final boolean allowUnknownExtensions; 1636 private final SingularOverwritePolicy singularOverwritePolicy; 1637 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder; 1638 private final int recursionLimit; 1639 Parser( TypeRegistry typeRegistry, boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder, int recursionLimit)1640 private Parser( 1641 TypeRegistry typeRegistry, 1642 boolean allowUnknownFields, 1643 boolean allowUnknownEnumValues, 1644 boolean allowUnknownExtensions, 1645 SingularOverwritePolicy singularOverwritePolicy, 1646 TextFormatParseInfoTree.Builder parseInfoTreeBuilder, 1647 int recursionLimit) { 1648 this.typeRegistry = typeRegistry; 1649 this.allowUnknownFields = allowUnknownFields; 1650 this.allowUnknownEnumValues = allowUnknownEnumValues; 1651 this.allowUnknownExtensions = allowUnknownExtensions; 1652 this.singularOverwritePolicy = singularOverwritePolicy; 1653 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1654 this.recursionLimit = recursionLimit; 1655 } 1656 1657 /** Returns a new instance of {@link Builder}. */ newBuilder()1658 public static Builder newBuilder() { 1659 return new Builder(); 1660 } 1661 1662 /** Builder that can be used to obtain new instances of {@link Parser}. */ 1663 public static class Builder { 1664 private boolean allowUnknownFields = false; 1665 private boolean allowUnknownEnumValues = false; 1666 private boolean allowUnknownExtensions = false; 1667 private SingularOverwritePolicy singularOverwritePolicy = 1668 SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES; 1669 private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null; 1670 private TypeRegistry typeRegistry = TypeRegistry.getEmptyTypeRegistry(); 1671 private int recursionLimit = 100; 1672 1673 /** 1674 * Sets the TypeRegistry for resolving Any. If this is not set, TextFormat will not be able to 1675 * parse Any unless Any is write as bytes. 1676 * 1677 * @throws IllegalArgumentException if a registry is already set. 1678 */ setTypeRegistry(TypeRegistry typeRegistry)1679 public Builder setTypeRegistry(TypeRegistry typeRegistry) { 1680 this.typeRegistry = typeRegistry; 1681 return this; 1682 } 1683 1684 /** 1685 * Set whether this parser will allow unknown fields. By default, an exception is thrown if an 1686 * unknown field is encountered. If this is set, the parser will only log a warning. Allow 1687 * unknown fields will also allow unknown extensions. 1688 * 1689 * <p>Use of this parameter is discouraged which may hide some errors (e.g. spelling error on 1690 * field name). 1691 */ setAllowUnknownFields(boolean allowUnknownFields)1692 public Builder setAllowUnknownFields(boolean allowUnknownFields) { 1693 this.allowUnknownFields = allowUnknownFields; 1694 return this; 1695 } 1696 1697 /** 1698 * Set whether this parser will allow unknown extensions. By default, an exception is thrown 1699 * if unknown extension is encountered. If this is set true, the parser will only log a 1700 * warning. Allow unknown extensions does not mean allow normal unknown fields. 1701 */ setAllowUnknownExtensions(boolean allowUnknownExtensions)1702 public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) { 1703 this.allowUnknownExtensions = allowUnknownExtensions; 1704 return this; 1705 } 1706 1707 /** Sets parser behavior when a non-repeated field appears more than once. */ setSingularOverwritePolicy(SingularOverwritePolicy p)1708 public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) { 1709 this.singularOverwritePolicy = p; 1710 return this; 1711 } 1712 setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1713 public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) { 1714 this.parseInfoTreeBuilder = parseInfoTreeBuilder; 1715 return this; 1716 } 1717 1718 /** 1719 * Set the maximum recursion limit that the parser will allow. If the depth of the message 1720 * exceeds this limit then the parser will stop and throw an exception. 1721 */ setRecursionLimit(int recursionLimit)1722 public Builder setRecursionLimit(int recursionLimit) { 1723 this.recursionLimit = recursionLimit; 1724 return this; 1725 } 1726 build()1727 public Parser build() { 1728 return new Parser( 1729 typeRegistry, 1730 allowUnknownFields, 1731 allowUnknownEnumValues, 1732 allowUnknownExtensions, 1733 singularOverwritePolicy, 1734 parseInfoTreeBuilder, 1735 recursionLimit); 1736 } 1737 } 1738 1739 /** 1740 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1741 */ merge(final Readable input, final Message.Builder builder)1742 public void merge(final Readable input, final Message.Builder builder) throws IOException { 1743 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1744 } 1745 1746 /** 1747 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1748 */ merge(final CharSequence input, final Message.Builder builder)1749 public void merge(final CharSequence input, final Message.Builder builder) 1750 throws ParseException { 1751 merge(input, ExtensionRegistry.getEmptyRegistry(), builder); 1752 } 1753 1754 /** 1755 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1756 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1757 */ merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1758 public void merge( 1759 final Readable input, 1760 final ExtensionRegistry extensionRegistry, 1761 final Message.Builder builder) 1762 throws IOException { 1763 // Read the entire input to a String then parse that. 1764 1765 // If StreamTokenizer was not so limited, or if there were a kind 1766 // of Reader that could read in chunks that match some particular regex, 1767 // or if we wanted to write a custom Reader to tokenize our stream, then 1768 // we would not have to read to one big String. Alas, none of these is 1769 // the case. Oh well. 1770 1771 merge(toStringBuilder(input), extensionRegistry, builder); 1772 } 1773 1774 private static final int BUFFER_SIZE = 4096; 1775 1776 // TODO: See if working around java.io.Reader#read(CharBuffer) 1777 // overhead is worthwhile toStringBuilder(final Readable input)1778 private static StringBuilder toStringBuilder(final Readable input) throws IOException { 1779 final StringBuilder text = new StringBuilder(); 1780 final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE); 1781 while (true) { 1782 final int n = input.read(buffer); 1783 if (n == -1) { 1784 break; 1785 } 1786 Java8Compatibility.flip(buffer); 1787 text.append(buffer, 0, n); 1788 } 1789 return text; 1790 } 1791 1792 static final class UnknownField { 1793 static enum Type { 1794 FIELD, 1795 EXTENSION; 1796 } 1797 1798 final String message; 1799 final Type type; 1800 UnknownField(String message, Type type)1801 UnknownField(String message, Type type) { 1802 this.message = message; 1803 this.type = type; 1804 } 1805 } 1806 1807 // Check both unknown fields and unknown extensions and log warning messages 1808 // or throw exceptions according to the flag. checkUnknownFields(final List<UnknownField> unknownFields)1809 private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException { 1810 if (unknownFields.isEmpty()) { 1811 return; 1812 } 1813 1814 StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:"); 1815 for (UnknownField field : unknownFields) { 1816 msg.append('\n').append(field.message); 1817 } 1818 1819 if (allowUnknownFields) { 1820 logger.warning(msg.toString()); 1821 return; 1822 } 1823 1824 int firstErrorIndex = 0; 1825 if (allowUnknownExtensions) { 1826 boolean allUnknownExtensions = true; 1827 for (UnknownField field : unknownFields) { 1828 if (field.type == UnknownField.Type.FIELD) { 1829 allUnknownExtensions = false; 1830 break; 1831 } 1832 ++firstErrorIndex; 1833 } 1834 if (allUnknownExtensions) { 1835 logger.warning(msg.toString()); 1836 return; 1837 } 1838 } 1839 1840 String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":"); 1841 throw new ParseException( 1842 Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString()); 1843 } 1844 1845 /** 1846 * Parse a text-format message from {@code input} and merge the contents into {@code builder}. 1847 * Extensions will be recognized if they are registered in {@code extensionRegistry}. 1848 */ merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1849 public void merge( 1850 final CharSequence input, 1851 final ExtensionRegistry extensionRegistry, 1852 final Message.Builder builder) 1853 throws ParseException { 1854 final Tokenizer tokenizer = new Tokenizer(input); 1855 MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder); 1856 List<UnknownField> unknownFields = new ArrayList<UnknownField>(); 1857 1858 while (!tokenizer.atEnd()) { 1859 mergeField(tokenizer, extensionRegistry, target, unknownFields, recursionLimit); 1860 } 1861 checkUnknownFields(unknownFields); 1862 } 1863 1864 /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields, int recursionLimit)1865 private void mergeField( 1866 final Tokenizer tokenizer, 1867 final ExtensionRegistry extensionRegistry, 1868 final MessageReflection.MergeTarget target, 1869 List<UnknownField> unknownFields, 1870 int recursionLimit) 1871 throws ParseException { 1872 mergeField( 1873 tokenizer, 1874 extensionRegistry, 1875 target, 1876 parseInfoTreeBuilder, 1877 unknownFields, 1878 recursionLimit); 1879 } 1880 1881 /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */ mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, int recursionLimit)1882 private void mergeField( 1883 final Tokenizer tokenizer, 1884 final ExtensionRegistry extensionRegistry, 1885 final MessageReflection.MergeTarget target, 1886 TextFormatParseInfoTree.Builder parseTreeBuilder, 1887 List<UnknownField> unknownFields, 1888 int recursionLimit) 1889 throws ParseException { 1890 FieldDescriptor field = null; 1891 String name; 1892 int startLine = tokenizer.getLine(); 1893 int startColumn = tokenizer.getColumn(); 1894 final Descriptor type = target.getDescriptorForType(); 1895 ExtensionRegistry.ExtensionInfo extension = null; 1896 1897 if ("google.protobuf.Any".equals(type.getFullName()) && tokenizer.tryConsume("[")) { 1898 if (recursionLimit < 1) { 1899 throw tokenizer.parseException("Message is nested too deep"); 1900 } 1901 mergeAnyFieldValue( 1902 tokenizer, 1903 extensionRegistry, 1904 target, 1905 parseTreeBuilder, 1906 unknownFields, 1907 type, 1908 recursionLimit - 1); 1909 return; 1910 } 1911 1912 if (tokenizer.tryConsume("[")) { 1913 // An extension. 1914 StringBuilder nameBuilder = new StringBuilder(tokenizer.consumeIdentifier()); 1915 while (tokenizer.tryConsume(".")) { 1916 nameBuilder.append('.'); 1917 nameBuilder.append(tokenizer.consumeIdentifier()); 1918 } 1919 name = nameBuilder.toString(); 1920 1921 extension = target.findExtensionByName(extensionRegistry, name); 1922 1923 if (extension == null) { 1924 String message = 1925 (tokenizer.getPreviousLine() + 1) 1926 + ":" 1927 + (tokenizer.getPreviousColumn() + 1) 1928 + ":\t" 1929 + type.getFullName() 1930 + ".[" 1931 + name 1932 + "]"; 1933 unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION)); 1934 } else { 1935 if (extension.descriptor.getContainingType() != type) { 1936 throw tokenizer.parseExceptionPreviousToken( 1937 "Extension \"" 1938 + name 1939 + "\" does not extend message type \"" 1940 + type.getFullName() 1941 + "\"."); 1942 } 1943 field = extension.descriptor; 1944 } 1945 1946 tokenizer.consume("]"); 1947 } else { 1948 name = tokenizer.consumeIdentifier(); 1949 field = type.findFieldByName(name); 1950 1951 // Group names are expected to be capitalized as they appear in the 1952 // .proto file, which actually matches their type names, not their field 1953 // names. 1954 if (field == null) { 1955 // Explicitly specify US locale so that this code does not break when 1956 // executing in Turkey. 1957 final String lowerName = name.toLowerCase(Locale.US); 1958 field = type.findFieldByName(lowerName); 1959 // If the case-insensitive match worked but the field is NOT a group, 1960 if (field != null && !field.isGroupLike()) { 1961 field = null; 1962 } 1963 if (field != null && !field.getMessageType().getName().equals(name)) { 1964 field = null; 1965 } 1966 } 1967 1968 if (field == null) { 1969 String message = 1970 (tokenizer.getPreviousLine() + 1) 1971 + ":" 1972 + (tokenizer.getPreviousColumn() + 1) 1973 + ":\t" 1974 + type.getFullName() 1975 + "." 1976 + name; 1977 unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD)); 1978 } 1979 } 1980 1981 // Skips unknown fields. 1982 if (field == null) { 1983 detectSilentMarker(tokenizer, type, name); 1984 guessFieldTypeAndSkip(tokenizer, type, recursionLimit); 1985 return; 1986 } 1987 1988 // Handle potential ':'. 1989 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 1990 detectSilentMarker(tokenizer, type, field.getFullName()); 1991 tokenizer.tryConsume(":"); // optional 1992 if (parseTreeBuilder != null) { 1993 TextFormatParseInfoTree.Builder childParseTreeBuilder = 1994 parseTreeBuilder.getBuilderForSubMessageField(field); 1995 consumeFieldValues( 1996 tokenizer, 1997 extensionRegistry, 1998 target, 1999 field, 2000 extension, 2001 childParseTreeBuilder, 2002 unknownFields, 2003 recursionLimit); 2004 } else { 2005 consumeFieldValues( 2006 tokenizer, 2007 extensionRegistry, 2008 target, 2009 field, 2010 extension, 2011 parseTreeBuilder, 2012 unknownFields, 2013 recursionLimit); 2014 } 2015 } else { 2016 detectSilentMarker(tokenizer, type, field.getFullName()); 2017 tokenizer.consume(":"); // required 2018 consumeFieldValues( 2019 tokenizer, 2020 extensionRegistry, 2021 target, 2022 field, 2023 extension, 2024 parseTreeBuilder, 2025 unknownFields, 2026 recursionLimit); 2027 } 2028 2029 if (parseTreeBuilder != null) { 2030 parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn)); 2031 } 2032 2033 // For historical reasons, fields may optionally be separated by commas or 2034 // semicolons. 2035 if (!tokenizer.tryConsume(";")) { 2036 tokenizer.tryConsume(","); 2037 } 2038 } 2039 consumeFullTypeName(Tokenizer tokenizer)2040 private String consumeFullTypeName(Tokenizer tokenizer) throws ParseException { 2041 // If there is not a leading `[`, this is just a type name. 2042 if (!tokenizer.tryConsume("[")) { 2043 return tokenizer.consumeIdentifier(); 2044 } 2045 2046 // Otherwise, this is an extension or google.protobuf.Any type URL: we consume proto path 2047 // elements until we've addressed the type. 2048 String name = tokenizer.consumeIdentifier(); 2049 while (tokenizer.tryConsume(".")) { 2050 name += "." + tokenizer.consumeIdentifier(); 2051 } 2052 if (tokenizer.tryConsume("/")) { 2053 name += "/" + tokenizer.consumeIdentifier(); 2054 while (tokenizer.tryConsume(".")) { 2055 name += "." + tokenizer.consumeIdentifier(); 2056 } 2057 } 2058 tokenizer.consume("]"); 2059 2060 return name; 2061 } 2062 2063 /** 2064 * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}. 2065 */ consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, int recursionLimit)2066 private void consumeFieldValues( 2067 final Tokenizer tokenizer, 2068 final ExtensionRegistry extensionRegistry, 2069 final MessageReflection.MergeTarget target, 2070 final FieldDescriptor field, 2071 final ExtensionRegistry.ExtensionInfo extension, 2072 final TextFormatParseInfoTree.Builder parseTreeBuilder, 2073 List<UnknownField> unknownFields, 2074 int recursionLimit) 2075 throws ParseException { 2076 // Support specifying repeated field values as a comma-separated list. 2077 // Ex."foo: [1, 2, 3]" 2078 if (field.isRepeated() && tokenizer.tryConsume("[")) { 2079 if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty. 2080 while (true) { 2081 consumeFieldValue( 2082 tokenizer, 2083 extensionRegistry, 2084 target, 2085 field, 2086 extension, 2087 parseTreeBuilder, 2088 unknownFields, 2089 recursionLimit); 2090 if (tokenizer.tryConsume("]")) { 2091 // End of list. 2092 break; 2093 } 2094 tokenizer.consume(","); 2095 } 2096 } 2097 } else { 2098 consumeFieldValue( 2099 tokenizer, 2100 extensionRegistry, 2101 target, 2102 field, 2103 extension, 2104 parseTreeBuilder, 2105 unknownFields, 2106 recursionLimit); 2107 } 2108 } 2109 2110 /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */ consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, int recursionLimit)2111 private void consumeFieldValue( 2112 final Tokenizer tokenizer, 2113 final ExtensionRegistry extensionRegistry, 2114 final MessageReflection.MergeTarget target, 2115 final FieldDescriptor field, 2116 final ExtensionRegistry.ExtensionInfo extension, 2117 final TextFormatParseInfoTree.Builder parseTreeBuilder, 2118 List<UnknownField> unknownFields, 2119 int recursionLimit) 2120 throws ParseException { 2121 if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES 2122 && !field.isRepeated()) { 2123 if (target.hasField(field)) { 2124 throw tokenizer.parseExceptionPreviousToken( 2125 "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten."); 2126 } else if (field.getContainingOneof() != null 2127 && target.hasOneof(field.getContainingOneof())) { 2128 Descriptors.OneofDescriptor oneof = field.getContainingOneof(); 2129 throw tokenizer.parseExceptionPreviousToken( 2130 "Field \"" 2131 + field.getFullName() 2132 + "\" is specified along with field \"" 2133 + target.getOneofFieldDescriptor(oneof).getFullName() 2134 + "\", another member of oneof \"" 2135 + oneof.getName() 2136 + "\"."); 2137 } 2138 } 2139 2140 Object value = null; 2141 2142 if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) { 2143 if (recursionLimit < 1) { 2144 throw tokenizer.parseException("Message is nested too deep"); 2145 } 2146 2147 final String endToken; 2148 if (tokenizer.tryConsume("<")) { 2149 endToken = ">"; 2150 } else { 2151 tokenizer.consume("{"); 2152 endToken = "}"; 2153 } 2154 2155 Message defaultInstance = (extension == null) ? null : extension.defaultInstance; 2156 MessageReflection.MergeTarget subField = 2157 target.newMergeTargetForField(field, defaultInstance); 2158 2159 while (!tokenizer.tryConsume(endToken)) { 2160 if (tokenizer.atEnd()) { 2161 throw tokenizer.parseException("Expected \"" + endToken + "\"."); 2162 } 2163 mergeField( 2164 tokenizer, 2165 extensionRegistry, 2166 subField, 2167 parseTreeBuilder, 2168 unknownFields, 2169 recursionLimit - 1); 2170 } 2171 2172 value = subField.finish(); 2173 } else { 2174 switch (field.getType()) { 2175 case INT32: 2176 case SINT32: 2177 case SFIXED32: 2178 value = tokenizer.consumeInt32(); 2179 break; 2180 2181 case INT64: 2182 case SINT64: 2183 case SFIXED64: 2184 value = tokenizer.consumeInt64(); 2185 break; 2186 2187 case UINT32: 2188 case FIXED32: 2189 value = tokenizer.consumeUInt32(); 2190 break; 2191 2192 case UINT64: 2193 case FIXED64: 2194 value = tokenizer.consumeUInt64(); 2195 break; 2196 2197 case FLOAT: 2198 value = tokenizer.consumeFloat(); 2199 break; 2200 2201 case DOUBLE: 2202 value = tokenizer.consumeDouble(); 2203 break; 2204 2205 case BOOL: 2206 value = tokenizer.consumeBoolean(); 2207 break; 2208 2209 case STRING: 2210 value = tokenizer.consumeString(); 2211 break; 2212 2213 case BYTES: 2214 value = tokenizer.consumeByteString(); 2215 break; 2216 2217 case ENUM: 2218 final EnumDescriptor enumType = field.getEnumType(); 2219 2220 if (tokenizer.lookingAtInteger()) { 2221 final int number = tokenizer.consumeInt32(); 2222 value = 2223 enumType.isClosed() 2224 ? enumType.findValueByNumber(number) 2225 : enumType.findValueByNumberCreatingIfUnknown(number); 2226 if (value == null) { 2227 String unknownValueMsg = 2228 "Enum type \"" 2229 + enumType.getFullName() 2230 + "\" has no value with number " 2231 + number 2232 + '.'; 2233 if (allowUnknownEnumValues) { 2234 logger.warning(unknownValueMsg); 2235 return; 2236 } else { 2237 throw tokenizer.parseExceptionPreviousToken( 2238 "Enum type \"" 2239 + enumType.getFullName() 2240 + "\" has no value with number " 2241 + number 2242 + '.'); 2243 } 2244 } 2245 } else { 2246 final String id = tokenizer.consumeIdentifier(); 2247 value = enumType.findValueByName(id); 2248 if (value == null) { 2249 String unknownValueMsg = 2250 "Enum type \"" 2251 + enumType.getFullName() 2252 + "\" has no value named \"" 2253 + id 2254 + "\"."; 2255 if (allowUnknownEnumValues) { 2256 logger.warning(unknownValueMsg); 2257 return; 2258 } else { 2259 throw tokenizer.parseExceptionPreviousToken(unknownValueMsg); 2260 } 2261 } 2262 } 2263 2264 break; 2265 2266 case MESSAGE: 2267 case GROUP: 2268 throw new RuntimeException("Can't get here."); 2269 } 2270 } 2271 2272 if (field.isRepeated()) { 2273 // TODO: If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode, 2274 // check for duplicate map keys here. 2275 target.addRepeatedField(field, value); 2276 } else { 2277 target.setField(field, value); 2278 } 2279 } 2280 mergeAnyFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, MergeTarget target, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, Descriptor anyDescriptor, int recursionLimit)2281 private void mergeAnyFieldValue( 2282 final Tokenizer tokenizer, 2283 final ExtensionRegistry extensionRegistry, 2284 MergeTarget target, 2285 final TextFormatParseInfoTree.Builder parseTreeBuilder, 2286 List<UnknownField> unknownFields, 2287 Descriptor anyDescriptor, 2288 int recursionLimit) 2289 throws ParseException { 2290 // Try to parse human readable format of Any in the form: [type_url]: { ... } 2291 StringBuilder typeUrlBuilder = new StringBuilder(); 2292 // Parse the type_url inside []. 2293 while (true) { 2294 typeUrlBuilder.append(tokenizer.consumeIdentifier()); 2295 if (tokenizer.tryConsume("]")) { 2296 break; 2297 } 2298 if (tokenizer.tryConsume("/")) { 2299 typeUrlBuilder.append("/"); 2300 } else if (tokenizer.tryConsume(".")) { 2301 typeUrlBuilder.append("."); 2302 } else { 2303 throw tokenizer.parseExceptionPreviousToken("Expected a valid type URL."); 2304 } 2305 } 2306 detectSilentMarker(tokenizer, anyDescriptor, typeUrlBuilder.toString()); 2307 tokenizer.tryConsume(":"); 2308 final String anyEndToken; 2309 if (tokenizer.tryConsume("<")) { 2310 anyEndToken = ">"; 2311 } else { 2312 tokenizer.consume("{"); 2313 anyEndToken = "}"; 2314 } 2315 String typeUrl = typeUrlBuilder.toString(); 2316 Descriptor contentType = null; 2317 try { 2318 contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl); 2319 } catch (InvalidProtocolBufferException e) { 2320 throw tokenizer.parseException("Invalid valid type URL. Found: " + typeUrl); 2321 } 2322 if (contentType == null) { 2323 throw tokenizer.parseException( 2324 "Unable to parse Any of type: " 2325 + typeUrl 2326 + ". Please make sure that the TypeRegistry contains the descriptors for the given" 2327 + " types."); 2328 } 2329 Message.Builder contentBuilder = 2330 DynamicMessage.getDefaultInstance(contentType).newBuilderForType(); 2331 MessageReflection.BuilderAdapter contentTarget = 2332 new MessageReflection.BuilderAdapter(contentBuilder); 2333 while (!tokenizer.tryConsume(anyEndToken)) { 2334 mergeField( 2335 tokenizer, 2336 extensionRegistry, 2337 contentTarget, 2338 parseTreeBuilder, 2339 unknownFields, 2340 recursionLimit); 2341 } 2342 2343 target.setField(anyDescriptor.findFieldByName("type_url"), typeUrlBuilder.toString()); 2344 target.setField( 2345 anyDescriptor.findFieldByName("value"), contentBuilder.build().toByteString()); 2346 } 2347 2348 /** Skips the next field including the field's name and value. */ skipField(Tokenizer tokenizer, Descriptor type, int recursionLimit)2349 private void skipField(Tokenizer tokenizer, Descriptor type, int recursionLimit) 2350 throws ParseException { 2351 String name = consumeFullTypeName(tokenizer); 2352 detectSilentMarker(tokenizer, type, name); 2353 guessFieldTypeAndSkip(tokenizer, type, recursionLimit); 2354 2355 // For historical reasons, fields may optionally be separated by commas or 2356 // semicolons. 2357 if (!tokenizer.tryConsume(";")) { 2358 tokenizer.tryConsume(","); 2359 } 2360 } 2361 2362 /** 2363 * Skips the whole body of a message including the beginning delimiter and the ending delimiter. 2364 */ skipFieldMessage(Tokenizer tokenizer, Descriptor type, int recursionLimit)2365 private void skipFieldMessage(Tokenizer tokenizer, Descriptor type, int recursionLimit) 2366 throws ParseException { 2367 final String delimiter; 2368 if (tokenizer.tryConsume("<")) { 2369 delimiter = ">"; 2370 } else { 2371 tokenizer.consume("{"); 2372 delimiter = "}"; 2373 } 2374 while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) { 2375 skipField(tokenizer, type, recursionLimit); 2376 } 2377 tokenizer.consume(delimiter); 2378 } 2379 2380 /** Skips a field value. */ skipFieldValue(Tokenizer tokenizer)2381 private void skipFieldValue(Tokenizer tokenizer) throws ParseException { 2382 if (!tokenizer.tryConsumeByteString() 2383 && !tokenizer.tryConsumeIdentifier() // includes enum & boolean 2384 && !tokenizer.tryConsumeInt64() // includes int32 2385 && !tokenizer.tryConsumeUInt64() // includes uint32 2386 && !tokenizer.tryConsumeDouble() 2387 && !tokenizer.tryConsumeFloat()) { 2388 throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken); 2389 } 2390 } 2391 2392 /** 2393 * Tries to guess the type of this field and skip it. 2394 * 2395 * <p>If this field is not a message, there should be a ":" between the field name and the field 2396 * value and also the field value should not start with "{" or "<" which indicates the beginning 2397 * of a message body. If there is no ":" or there is a "{" or "<" after ":", this field has to 2398 * be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"), 2399 * if it is repeated scalar, there must be a ":" between the field name and the starting "[" . 2400 */ guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type, int recursionLimit)2401 private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type, int recursionLimit) 2402 throws ParseException { 2403 boolean semicolonConsumed = tokenizer.tryConsume(":"); 2404 if (tokenizer.lookingAt("[")) { 2405 // Short repeated field form. If a semicolon was consumed, it could be repeated scalar or 2406 // repeated message. If not, it must be repeated message. 2407 skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type, recursionLimit); 2408 } else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) { 2409 skipFieldValue(tokenizer); 2410 } else { 2411 if (recursionLimit < 1) { 2412 throw tokenizer.parseException("Message is nested too deep"); 2413 } 2414 skipFieldMessage(tokenizer, type, recursionLimit - 1); 2415 } 2416 } 2417 2418 /** 2419 * Skips a short-formed repeated field value. 2420 * 2421 * <p>Reports an error if scalar type is not allowed but showing up inside "[]". 2422 */ skipFieldShortFormedRepeated( Tokenizer tokenizer, boolean scalarAllowed, Descriptor type, int recursionLimit)2423 private void skipFieldShortFormedRepeated( 2424 Tokenizer tokenizer, boolean scalarAllowed, Descriptor type, int recursionLimit) 2425 throws ParseException { 2426 if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) { 2427 // Try skipping "[]". 2428 return; 2429 } 2430 2431 while (true) { 2432 if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) { 2433 // Try skipping message field inside "[]" 2434 if (recursionLimit < 1) { 2435 throw tokenizer.parseException("Message is nested too deep"); 2436 } 2437 skipFieldMessage(tokenizer, type, recursionLimit - 1); 2438 } else if (scalarAllowed) { 2439 // Try skipping scalar field inside "[]". 2440 skipFieldValue(tokenizer); 2441 } else { 2442 throw tokenizer.parseException( 2443 "Invalid repeated scalar field: missing \":\" before \"[\"."); 2444 } 2445 if (tokenizer.tryConsume("]")) { 2446 break; 2447 } 2448 tokenizer.consume(","); 2449 } 2450 } 2451 } 2452 2453 // ================================================================= 2454 // Utility functions 2455 // 2456 // Some of these methods are package-private because Descriptors.java uses 2457 // them. 2458 2459 /** 2460 * Escapes bytes in the format used in protocol buffer text format, which is the same as the 2461 * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are 2462 * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which 2463 * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences. 2464 */ escapeBytes(ByteString input)2465 public static String escapeBytes(ByteString input) { 2466 return TextFormatEscaper.escapeBytes(input); 2467 } 2468 2469 /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */ escapeBytes(byte[] input)2470 public static String escapeBytes(byte[] input) { 2471 return TextFormatEscaper.escapeBytes(input); 2472 } 2473 2474 /** 2475 * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex 2476 * escapes (starting with "\x") are also recognized. 2477 */ unescapeBytes(CharSequence charString)2478 public static ByteString unescapeBytes(CharSequence charString) 2479 throws InvalidEscapeSequenceException { 2480 // First convert the Java character sequence to UTF-8 bytes. 2481 ByteString input = ByteString.copyFromUtf8(charString.toString()); 2482 // Then unescape certain byte sequences introduced by ASCII '\\'. The valid 2483 // escapes can all be expressed with ASCII characters, so it is safe to 2484 // operate on bytes here. 2485 // 2486 // Unescaping the input byte array will result in a byte sequence that's no 2487 // longer than the input. That's because each escape sequence is between 2488 // two and four bytes long and stands for a single byte. 2489 final byte[] result = new byte[input.size()]; 2490 int pos = 0; 2491 for (int i = 0; i < input.size(); i++) { 2492 byte c = input.byteAt(i); 2493 if (c == '\\') { 2494 if (i + 1 < input.size()) { 2495 ++i; 2496 c = input.byteAt(i); 2497 if (isOctal(c)) { 2498 // Octal escape. 2499 int code = digitValue(c); 2500 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2501 ++i; 2502 code = code * 8 + digitValue(input.byteAt(i)); 2503 } 2504 if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) { 2505 ++i; 2506 code = code * 8 + digitValue(input.byteAt(i)); 2507 } 2508 // TODO: Check that 0 <= code && code <= 0xFF. 2509 result[pos++] = (byte) code; 2510 } else { 2511 switch (c) { 2512 case 'a': 2513 result[pos++] = 0x07; 2514 break; 2515 case 'b': 2516 result[pos++] = '\b'; 2517 break; 2518 case 'f': 2519 result[pos++] = '\f'; 2520 break; 2521 case 'n': 2522 result[pos++] = '\n'; 2523 break; 2524 case 'r': 2525 result[pos++] = '\r'; 2526 break; 2527 case 't': 2528 result[pos++] = '\t'; 2529 break; 2530 case 'v': 2531 result[pos++] = 0x0b; 2532 break; 2533 case '\\': 2534 result[pos++] = '\\'; 2535 break; 2536 case '\'': 2537 result[pos++] = '\''; 2538 break; 2539 case '"': 2540 result[pos++] = '\"'; 2541 break; 2542 case '?': 2543 result[pos++] = '?'; 2544 break; 2545 2546 case 'x': 2547 // hex escape 2548 int code = 0; 2549 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2550 ++i; 2551 code = digitValue(input.byteAt(i)); 2552 } else { 2553 throw new InvalidEscapeSequenceException( 2554 "Invalid escape sequence: '\\x' with no digits"); 2555 } 2556 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) { 2557 ++i; 2558 code = code * 16 + digitValue(input.byteAt(i)); 2559 } 2560 result[pos++] = (byte) code; 2561 break; 2562 2563 case 'u': 2564 // Unicode escape 2565 ++i; 2566 if (i + 3 < input.size() 2567 && isHex(input.byteAt(i)) 2568 && isHex(input.byteAt(i + 1)) 2569 && isHex(input.byteAt(i + 2)) 2570 && isHex(input.byteAt(i + 3))) { 2571 char ch = 2572 (char) 2573 (digitValue(input.byteAt(i)) << 12 2574 | digitValue(input.byteAt(i + 1)) << 8 2575 | digitValue(input.byteAt(i + 2)) << 4 2576 | digitValue(input.byteAt(i + 3))); 2577 2578 if (ch >= Character.MIN_SURROGATE && ch <= Character.MAX_SURROGATE) { 2579 throw new InvalidEscapeSequenceException( 2580 "Invalid escape sequence: '\\u' refers to a surrogate"); 2581 } 2582 byte[] chUtf8 = Character.toString(ch).getBytes(Internal.UTF_8); 2583 System.arraycopy(chUtf8, 0, result, pos, chUtf8.length); 2584 pos += chUtf8.length; 2585 i += 3; 2586 } else { 2587 throw new InvalidEscapeSequenceException( 2588 "Invalid escape sequence: '\\u' with too few hex chars"); 2589 } 2590 break; 2591 2592 case 'U': 2593 // Unicode escape 2594 ++i; 2595 if (i + 7 >= input.size()) { 2596 throw new InvalidEscapeSequenceException( 2597 "Invalid escape sequence: '\\U' with too few hex chars"); 2598 } 2599 int codepoint = 0; 2600 for (int offset = i; offset < i + 8; offset++) { 2601 byte b = input.byteAt(offset); 2602 if (!isHex(b)) { 2603 throw new InvalidEscapeSequenceException( 2604 "Invalid escape sequence: '\\U' with too few hex chars"); 2605 } 2606 codepoint = (codepoint << 4) | digitValue(b); 2607 } 2608 if (!Character.isValidCodePoint(codepoint)) { 2609 throw new InvalidEscapeSequenceException( 2610 "Invalid escape sequence: '\\U" 2611 + input.substring(i, i + 8).toStringUtf8() 2612 + "' is not a valid code point value"); 2613 } 2614 Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codepoint); 2615 if (unicodeBlock != null 2616 && (unicodeBlock.equals(Character.UnicodeBlock.LOW_SURROGATES) 2617 || unicodeBlock.equals(Character.UnicodeBlock.HIGH_SURROGATES) 2618 || unicodeBlock.equals( 2619 Character.UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES))) { 2620 throw new InvalidEscapeSequenceException( 2621 "Invalid escape sequence: '\\U" 2622 + input.substring(i, i + 8).toStringUtf8() 2623 + "' refers to a surrogate code unit"); 2624 } 2625 int[] codepoints = new int[1]; 2626 codepoints[0] = codepoint; 2627 byte[] chUtf8 = new String(codepoints, 0, 1).getBytes(Internal.UTF_8); 2628 System.arraycopy(chUtf8, 0, result, pos, chUtf8.length); 2629 pos += chUtf8.length; 2630 i += 7; 2631 break; 2632 2633 default: 2634 throw new InvalidEscapeSequenceException( 2635 "Invalid escape sequence: '\\" + (char) c + '\''); 2636 } 2637 } 2638 } else { 2639 throw new InvalidEscapeSequenceException( 2640 "Invalid escape sequence: '\\' at end of string."); 2641 } 2642 } else { 2643 result[pos++] = c; 2644 } 2645 } 2646 2647 return result.length == pos 2648 ? ByteString.wrap(result) // This reference has not been out of our control. 2649 : ByteString.copyFrom(result, 0, pos); 2650 } 2651 2652 /** 2653 * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid 2654 * escape sequence is seen. 2655 */ 2656 public static class InvalidEscapeSequenceException extends IOException { 2657 private static final long serialVersionUID = -8164033650142593304L; 2658 InvalidEscapeSequenceException(final String description)2659 InvalidEscapeSequenceException(final String description) { 2660 super(description); 2661 } 2662 } 2663 2664 /** 2665 * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are 2666 * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes, 2667 * it's weird. 2668 */ escapeText(final String input)2669 static String escapeText(final String input) { 2670 return escapeBytes(ByteString.copyFromUtf8(input)); 2671 } 2672 2673 /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */ escapeDoubleQuotesAndBackslashes(final String input)2674 public static String escapeDoubleQuotesAndBackslashes(final String input) { 2675 return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input); 2676 } 2677 2678 /** 2679 * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes 2680 * (starting with "\x") are also recognized. 2681 */ unescapeText(final String input)2682 static String unescapeText(final String input) throws InvalidEscapeSequenceException { 2683 return unescapeBytes(input).toStringUtf8(); 2684 } 2685 2686 /** Is this an octal digit? */ isOctal(final byte c)2687 private static boolean isOctal(final byte c) { 2688 return '0' <= c && c <= '7'; 2689 } 2690 2691 /** Is this a hex digit? */ isHex(final byte c)2692 private static boolean isHex(final byte c) { 2693 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'); 2694 } 2695 2696 /** 2697 * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is 2698 * like {@code Character.digit()} but we don't accept non-ASCII digits. 2699 */ digitValue(final byte c)2700 private static int digitValue(final byte c) { 2701 if ('0' <= c && c <= '9') { 2702 return c - '0'; 2703 } else if ('a' <= c && c <= 'z') { 2704 return c - 'a' + 10; 2705 } else { 2706 return c - 'A' + 10; 2707 } 2708 } 2709 2710 /** 2711 * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code 2712 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2713 * and octal numbers, respectively. 2714 */ parseInt32(final String text)2715 static int parseInt32(final String text) throws NumberFormatException { 2716 return (int) parseInteger(text, true, false); 2717 } 2718 2719 /** 2720 * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code 2721 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2722 * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned 2723 * since Java has no unsigned integer type. 2724 */ parseUInt32(final String text)2725 static int parseUInt32(final String text) throws NumberFormatException { 2726 return (int) parseInteger(text, false, false); 2727 } 2728 2729 /** 2730 * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code 2731 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2732 * and octal numbers, respectively. 2733 */ parseInt64(final String text)2734 static long parseInt64(final String text) throws NumberFormatException { 2735 return parseInteger(text, true, true); 2736 } 2737 2738 /** 2739 * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code 2740 * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal 2741 * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned 2742 * since Java has no unsigned long type. 2743 */ parseUInt64(final String text)2744 static long parseUInt64(final String text) throws NumberFormatException { 2745 return parseInteger(text, false, true); 2746 } 2747 parseInteger(final String text, final boolean isSigned, final boolean isLong)2748 private static long parseInteger(final String text, final boolean isSigned, final boolean isLong) 2749 throws NumberFormatException { 2750 int pos = 0; 2751 2752 boolean negative = false; 2753 if (text.startsWith("-", pos)) { 2754 if (!isSigned) { 2755 throw new NumberFormatException("Number must be positive: " + text); 2756 } 2757 ++pos; 2758 negative = true; 2759 } 2760 2761 int radix = 10; 2762 if (text.startsWith("0x", pos)) { 2763 pos += 2; 2764 radix = 16; 2765 } else if (text.startsWith("0", pos)) { 2766 radix = 8; 2767 } 2768 2769 final String numberText = text.substring(pos); 2770 2771 long result = 0; 2772 if (numberText.length() < 16) { 2773 // Can safely assume no overflow. 2774 result = Long.parseLong(numberText, radix); 2775 if (negative) { 2776 result = -result; 2777 } 2778 2779 // Check bounds. 2780 // No need to check for 64-bit numbers since they'd have to be 16 chars 2781 // or longer to overflow. 2782 if (!isLong) { 2783 if (isSigned) { 2784 if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) { 2785 throw new NumberFormatException( 2786 "Number out of range for 32-bit signed integer: " + text); 2787 } 2788 } else { 2789 if (result >= (1L << 32) || result < 0) { 2790 throw new NumberFormatException( 2791 "Number out of range for 32-bit unsigned integer: " + text); 2792 } 2793 } 2794 } 2795 } else { 2796 BigInteger bigValue = new BigInteger(numberText, radix); 2797 if (negative) { 2798 bigValue = bigValue.negate(); 2799 } 2800 2801 // Check bounds. 2802 if (!isLong) { 2803 if (isSigned) { 2804 if (bigValue.bitLength() > 31) { 2805 throw new NumberFormatException( 2806 "Number out of range for 32-bit signed integer: " + text); 2807 } 2808 } else { 2809 if (bigValue.bitLength() > 32) { 2810 throw new NumberFormatException( 2811 "Number out of range for 32-bit unsigned integer: " + text); 2812 } 2813 } 2814 } else { 2815 if (isSigned) { 2816 if (bigValue.bitLength() > 63) { 2817 throw new NumberFormatException( 2818 "Number out of range for 64-bit signed integer: " + text); 2819 } 2820 } else { 2821 if (bigValue.bitLength() > 64) { 2822 throw new NumberFormatException( 2823 "Number out of range for 64-bit unsigned integer: " + text); 2824 } 2825 } 2826 } 2827 2828 result = bigValue.longValue(); 2829 } 2830 2831 return result; 2832 } 2833 } 2834