• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 package com.google.protobuf;
9 
10 import com.google.protobuf.Descriptors.Descriptor;
11 import com.google.protobuf.Descriptors.EnumDescriptor;
12 import com.google.protobuf.Descriptors.EnumValueDescriptor;
13 import com.google.protobuf.Descriptors.FieldDescriptor;
14 import com.google.protobuf.MessageReflection.MergeTarget;
15 import java.io.IOException;
16 import java.math.BigInteger;
17 import java.nio.CharBuffer;
18 import java.util.ArrayList;
19 import java.util.Collections;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.List;
23 import java.util.Locale;
24 import java.util.Map;
25 import java.util.Set;
26 import java.util.logging.Logger;
27 import java.util.regex.Matcher;
28 import java.util.regex.Pattern;
29 
30 /**
31  * Provide text parsing and formatting support for proto2 instances. The implementation largely
32  * follows text_format.cc.
33  *
34  * @author wenboz@google.com Wenbo Zhu
35  * @author kenton@google.com Kenton Varda
36  */
37 public final class TextFormat {
TextFormat()38   private TextFormat() {}
39 
40   private static final Logger logger = Logger.getLogger(TextFormat.class.getName());
41 
42   private static final String DEBUG_STRING_SILENT_MARKER = "\t ";
43 
44   private static final String REDACTED_MARKER = "[REDACTED]";
45 
46   /**
47    * Generates a human readable form of this message, useful for debugging and other purposes, with
48    * no newline characters. This is just a trivial wrapper around {@link
49    * TextFormat.Printer#shortDebugString(MessageOrBuilder)}.
50    *
51    * @deprecated Use {@code printer().emittingSingleLine(true).printToString(MessageOrBuilder)}
52    */
53   @Deprecated
shortDebugString(final MessageOrBuilder message)54   public static String shortDebugString(final MessageOrBuilder message) {
55     return printer()
56         .emittingSingleLine(true)
57         .printToString(message, Printer.FieldReporterLevel.SHORT_DEBUG_STRING);
58   }
59 
60   /**
61    * Outputs a textual representation of the value of an unknown field.
62    *
63    * @param tag the field's tag number
64    * @param value the value of the field
65    * @param output the output to which to append the formatted value
66    * @throws ClassCastException if the value is not appropriate for the given field descriptor
67    * @throws IOException if there is an exception writing to the output
68    */
printUnknownFieldValue( final int tag, final Object value, final Appendable output)69   public static void printUnknownFieldValue(
70       final int tag, final Object value, final Appendable output) throws IOException {
71     printUnknownFieldValue(tag, value, setSingleLineOutput(output, false), false);
72   }
73 
printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator, boolean redact)74   private static void printUnknownFieldValue(
75       final int tag, final Object value, final TextGenerator generator, boolean redact)
76       throws IOException {
77     switch (WireFormat.getTagWireType(tag)) {
78       case WireFormat.WIRETYPE_VARINT:
79         generator.print(unsignedToString((Long) value));
80         break;
81       case WireFormat.WIRETYPE_FIXED32:
82         generator.print(String.format((Locale) null, "0x%08x", (Integer) value));
83         break;
84       case WireFormat.WIRETYPE_FIXED64:
85         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
86         break;
87       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
88         try {
89           // Try to parse and print the field as an embedded message
90           UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
91           generator.print("{");
92           generator.eol();
93           generator.indent();
94           Printer.printUnknownFields(message, generator, redact);
95           generator.outdent();
96           generator.print("}");
97         } catch (InvalidProtocolBufferException e) {
98           // If not parseable as a message, print as a String
99           generator.print("\"");
100           generator.print(escapeBytes((ByteString) value));
101           generator.print("\"");
102         }
103         break;
104       case WireFormat.WIRETYPE_START_GROUP:
105         Printer.printUnknownFields((UnknownFieldSet) value, generator, redact);
106         break;
107       default:
108         throw new IllegalArgumentException("Bad tag: " + tag);
109     }
110   }
111 
112   /** Printer instance which escapes non-ASCII characters. */
printer()113   public static Printer printer() {
114     return Printer.DEFAULT_TEXT_FORMAT;
115   }
116 
117   /** Printer instance which escapes non-ASCII characters and prints in the debug format. */
debugFormatPrinter()118   public static Printer debugFormatPrinter() {
119     return Printer.DEFAULT_DEBUG_FORMAT;
120   }
121 
122   /** Helper class for converting protobufs to text. */
123   public static final class Printer {
124 
125     // Printer instance which escapes non-ASCII characters and prints in the text format.
126     private static final Printer DEFAULT_TEXT_FORMAT =
127         new Printer(
128             true,
129             TypeRegistry.getEmptyTypeRegistry(),
130             ExtensionRegistryLite.getEmptyRegistry(),
131             false,
132             false);
133 
134     // Printer instance which escapes non-ASCII characters and prints in the debug format.
135     private static final Printer DEFAULT_DEBUG_FORMAT =
136         new Printer(
137             true,
138             TypeRegistry.getEmptyTypeRegistry(),
139             ExtensionRegistryLite.getEmptyRegistry(),
140             true,
141             false);
142 
143     /**
144      * A list of the public APIs that output human-readable text from a message. A higher-level API
145      * must be larger than any lower-level APIs it calls under the hood, e.g
146      * DEBUG_MULTILINE.compareTo(PRINTER_PRINT_TO_STRING) > 0. The inverse is not necessarily true.
147      */
148     static enum FieldReporterLevel {
149       NO_REPORT(0),
150       PRINT(1),
151       PRINTER_PRINT_TO_STRING(2),
152       TEXTFORMAT_PRINT_TO_STRING(3),
153       PRINT_UNICODE(4),
154       SHORT_DEBUG_STRING(5),
155       LEGACY_MULTILINE(6),
156       LEGACY_SINGLE_LINE(7),
157       DEBUG_MULTILINE(8),
158       DEBUG_SINGLE_LINE(9),
159       ABSTRACT_TO_STRING(10),
160       ABSTRACT_MUTABLE_TO_STRING(11);
161       private final int index;
162 
FieldReporterLevel(int index)163       FieldReporterLevel(int index) {
164         this.index = index;
165       }
166     }
167 
168     /** Whether to escape non ASCII characters with backslash and octal. */
169     private final boolean escapeNonAscii;
170 
171     private final TypeRegistry typeRegistry;
172     private final ExtensionRegistryLite extensionRegistry;
173 
174     /**
175      * Whether to enable redaction of sensitive fields and introduce randomization. Note that when
176      * this is enabled, the output will no longer be deserializable.
177      */
178     private final boolean enablingSafeDebugFormat;
179 
180     private final boolean singleLine;
181 
182     // Any API level higher than this level will be reported. This is set to
183     // ABSTRACT_MUTABLE_TO_STRING by default to prevent reporting for now.
184     private static final ThreadLocal<FieldReporterLevel> sensitiveFieldReportingLevel =
185         new ThreadLocal<FieldReporterLevel>() {
186           @Override
187           protected FieldReporterLevel initialValue() {
188             return FieldReporterLevel.ABSTRACT_MUTABLE_TO_STRING;
189           }
190         };
191 
Printer( boolean escapeNonAscii, TypeRegistry typeRegistry, ExtensionRegistryLite extensionRegistry, boolean enablingSafeDebugFormat, boolean singleLine)192     private Printer(
193         boolean escapeNonAscii,
194         TypeRegistry typeRegistry,
195         ExtensionRegistryLite extensionRegistry,
196         boolean enablingSafeDebugFormat,
197         boolean singleLine) {
198       this.escapeNonAscii = escapeNonAscii;
199       this.typeRegistry = typeRegistry;
200       this.extensionRegistry = extensionRegistry;
201       this.enablingSafeDebugFormat = enablingSafeDebugFormat;
202       this.singleLine = singleLine;
203     }
204 
205     /**
206      * Return a new Printer instance with the specified escape mode.
207      *
208      * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the
209      *     default behavior. If false, the new Printer will print non-ASCII characters as is. In
210      *     either case, the new Printer still escapes newlines and quotes in strings.
211      * @return a new Printer that clones all other configurations from the current {@link Printer},
212      *     with the escape mode set to the given parameter.
213      */
escapingNonAscii(boolean escapeNonAscii)214     public Printer escapingNonAscii(boolean escapeNonAscii) {
215       return new Printer(
216           escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine);
217     }
218 
219     /**
220      * Creates a new {@link Printer} using the given typeRegistry. The new Printer clones all other
221      * configurations from the current {@link Printer}.
222      *
223      * @throws IllegalArgumentException if a registry is already set.
224      */
usingTypeRegistry(TypeRegistry typeRegistry)225     public Printer usingTypeRegistry(TypeRegistry typeRegistry) {
226       if (this.typeRegistry != TypeRegistry.getEmptyTypeRegistry()) {
227         throw new IllegalArgumentException("Only one typeRegistry is allowed.");
228       }
229       return new Printer(
230           escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine);
231     }
232 
233     /**
234      * Creates a new {@link Printer} using the given extensionRegistry. The new Printer clones all
235      * other configurations from the current {@link Printer}.
236      *
237      * @throws IllegalArgumentException if a registry is already set.
238      */
usingExtensionRegistry(ExtensionRegistryLite extensionRegistry)239     public Printer usingExtensionRegistry(ExtensionRegistryLite extensionRegistry) {
240       if (this.extensionRegistry != ExtensionRegistryLite.getEmptyRegistry()) {
241         throw new IllegalArgumentException("Only one extensionRegistry is allowed.");
242       }
243       return new Printer(
244           escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine);
245     }
246 
247     /**
248      * Return a new Printer instance that outputs a redacted and unstable format suitable for
249      * debugging.
250      *
251      * @param enablingSafeDebugFormat If true, the new Printer will redact all proto fields that are
252      *     marked by a debug_redact=true option, and apply an unstable prefix to the output.
253      * @return a new Printer that clones all other configurations from the current {@link Printer},
254      *     with the enablingSafeDebugFormat mode set to the given parameter.
255      */
enablingSafeDebugFormat(boolean enablingSafeDebugFormat)256     Printer enablingSafeDebugFormat(boolean enablingSafeDebugFormat) {
257       return new Printer(
258           escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine);
259     }
260 
261     /**
262      * Return a new Printer instance with the specified line formatting status.
263      *
264      * @param singleLine If true, the new Printer will output no newline characters.
265      * @return a new Printer that clones all other configurations from the current {@link Printer},
266      *     with the singleLine mode set to the given parameter.
267      */
emittingSingleLine(boolean singleLine)268     public Printer emittingSingleLine(boolean singleLine) {
269       return new Printer(
270           escapeNonAscii, typeRegistry, extensionRegistry, enablingSafeDebugFormat, singleLine);
271     }
272 
setSensitiveFieldReportingLevel(FieldReporterLevel level)273     void setSensitiveFieldReportingLevel(FieldReporterLevel level) {
274       Printer.sensitiveFieldReportingLevel.set(level);
275     }
276 
277     /**
278      * Outputs a textual representation of the Protocol Message supplied into the parameter output.
279      * (This representation is the new version of the classic "ProtocolPrinter" output from the
280      * original Protocol Buffer system)
281      */
print(final MessageOrBuilder message, final Appendable output)282     public void print(final MessageOrBuilder message, final Appendable output) throws IOException {
283       print(message, output, FieldReporterLevel.PRINT);
284     }
285 
print(final MessageOrBuilder message, final Appendable output, FieldReporterLevel level)286     void print(final MessageOrBuilder message, final Appendable output, FieldReporterLevel level)
287         throws IOException {
288       TextGenerator generator = setSingleLineOutput(output, this.singleLine, level);
289       print(message, generator);
290     }
291 
292     /** Outputs a textual representation of {@code fields} to {@code output}. */
print(final UnknownFieldSet fields, final Appendable output)293     public void print(final UnknownFieldSet fields, final Appendable output) throws IOException {
294       printUnknownFields(
295           fields, setSingleLineOutput(output, this.singleLine), this.enablingSafeDebugFormat);
296     }
297 
print(final MessageOrBuilder message, final TextGenerator generator)298     private void print(final MessageOrBuilder message, final TextGenerator generator)
299         throws IOException {
300       if (message.getDescriptorForType().getFullName().equals("google.protobuf.Any")
301           && printAny(message, generator)) {
302         return;
303       }
304       printMessage(message, generator);
305     }
306 
applyUnstablePrefix(final Appendable output)307     private void applyUnstablePrefix(final Appendable output) {
308       try {
309         output.append("");
310       } catch (IOException e) {
311         throw new IllegalStateException(e);
312       }
313     }
314 
315     /**
316      * Attempt to print the 'google.protobuf.Any' message in a human-friendly format. Returns false
317      * if the message isn't a valid 'google.protobuf.Any' message (in which case the message should
318      * be rendered just like a regular message to help debugging).
319      */
printAny(final MessageOrBuilder message, final TextGenerator generator)320     private boolean printAny(final MessageOrBuilder message, final TextGenerator generator)
321         throws IOException {
322       Descriptor messageType = message.getDescriptorForType();
323       FieldDescriptor typeUrlField = messageType.findFieldByNumber(1);
324       FieldDescriptor valueField = messageType.findFieldByNumber(2);
325       if (typeUrlField == null
326           || typeUrlField.getType() != FieldDescriptor.Type.STRING
327           || valueField == null
328           || valueField.getType() != FieldDescriptor.Type.BYTES) {
329         // The message may look like an Any but isn't actually an Any message (might happen if the
330         // user tries to use DynamicMessage to construct an Any from incomplete Descriptor).
331         return false;
332       }
333       String typeUrl = (String) message.getField(typeUrlField);
334       // If type_url is not set, we will not be able to decode the content of the value, so just
335       // print out the Any like a regular message.
336       if (typeUrl.isEmpty()) {
337         return false;
338       }
339       Object value = message.getField(valueField);
340 
341       Message.Builder contentBuilder = null;
342       try {
343         Descriptor contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl);
344         if (contentType == null) {
345           return false;
346         }
347         contentBuilder = DynamicMessage.getDefaultInstance(contentType).newBuilderForType();
348         contentBuilder.mergeFrom((ByteString) value, extensionRegistry);
349       } catch (InvalidProtocolBufferException e) {
350         // The value of Any is malformed. We cannot print it out nicely, so fallback to printing out
351         // the type_url and value as bytes. Note that we fail open here to be consistent with
352         // text_format.cc, and also to allow a way for users to inspect the content of the broken
353         // message.
354         return false;
355       }
356       generator.print("[");
357       generator.print(typeUrl);
358       generator.print("] {");
359       generator.eol();
360       generator.indent();
361       print(contentBuilder, generator);
362       generator.outdent();
363       generator.print("}");
364       generator.eol();
365       return true;
366     }
367 
printFieldToString(final FieldDescriptor field, final Object value)368     public String printFieldToString(final FieldDescriptor field, final Object value) {
369       try {
370         final StringBuilder text = new StringBuilder();
371         if (enablingSafeDebugFormat) {
372           applyUnstablePrefix(text);
373         }
374         printField(field, value, text);
375         return text.toString();
376       } catch (IOException e) {
377         throw new IllegalStateException(e);
378       }
379     }
380 
printField(final FieldDescriptor field, final Object value, final Appendable output)381     public void printField(final FieldDescriptor field, final Object value, final Appendable output)
382         throws IOException {
383       printField(field, value, setSingleLineOutput(output, this.singleLine));
384     }
385 
printField( final FieldDescriptor field, final Object value, final TextGenerator generator)386     private void printField(
387         final FieldDescriptor field, final Object value, final TextGenerator generator)
388         throws IOException {
389       // Sort map field entries by key
390       if (field.isMapField()) {
391         List<MapEntryAdapter> adapters = new ArrayList<>();
392         for (Object entry : (List<?>) value) {
393           adapters.add(new MapEntryAdapter(entry, field));
394         }
395         Collections.sort(adapters);
396         for (MapEntryAdapter adapter : adapters) {
397           printSingleField(field, adapter.getEntry(), generator);
398         }
399       } else if (field.isRepeated()) {
400         // Repeated field.  Print each element.
401         for (Object element : (List<?>) value) {
402           printSingleField(field, element, generator);
403         }
404       } else {
405         printSingleField(field, value, generator);
406       }
407     }
408 
409     /** An adapter class that can take a {@link MapEntry} and returns its key and entry. */
410     private static class MapEntryAdapter implements Comparable<MapEntryAdapter> {
411       private Object entry;
412 
413       @SuppressWarnings({"rawtypes"})
414       private MapEntry mapEntry;
415 
416       private final FieldDescriptor.JavaType fieldType;
417 
MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor)418       MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor) {
419         if (entry instanceof MapEntry) {
420           this.mapEntry = (MapEntry) entry;
421         } else {
422           this.entry = entry;
423         }
424         this.fieldType = extractFieldType(fieldDescriptor);
425       }
426 
extractFieldType(FieldDescriptor fieldDescriptor)427       private static FieldDescriptor.JavaType extractFieldType(FieldDescriptor fieldDescriptor) {
428         return fieldDescriptor.getMessageType().getFields().get(0).getJavaType();
429       }
430 
getKey()431       Object getKey() {
432         if (mapEntry != null) {
433           return mapEntry.getKey();
434         }
435         return null;
436       }
437 
getEntry()438       Object getEntry() {
439         if (mapEntry != null) {
440           return mapEntry;
441         }
442         return entry;
443       }
444 
445       @Override
compareTo(MapEntryAdapter b)446       public int compareTo(MapEntryAdapter b) {
447         if (getKey() == null || b.getKey() == null) {
448           logger.info("Invalid key for map field.");
449           return -1;
450         }
451         switch (fieldType) {
452           case BOOLEAN:
453             return ((Boolean) getKey()).compareTo((Boolean) b.getKey());
454           case LONG:
455             return ((Long) getKey()).compareTo((Long) b.getKey());
456           case INT:
457             return ((Integer) getKey()).compareTo((Integer) b.getKey());
458           case STRING:
459             String aString = (String) getKey();
460             String bString = (String) b.getKey();
461             if (aString == null && bString == null) {
462               return 0;
463             } else if (aString == null && bString != null) {
464               return -1;
465             } else if (aString != null && bString == null) {
466               return 1;
467             } else {
468               return aString.compareTo(bString);
469             }
470           default:
471             return 0;
472         }
473       }
474     }
475 
476     /**
477      * Outputs a textual representation of the value of given field value.
478      *
479      * @param field the descriptor of the field
480      * @param value the value of the field
481      * @param output the output to which to append the formatted value
482      * @throws ClassCastException if the value is not appropriate for the given field descriptor
483      * @throws IOException if there is an exception writing to the output
484      */
printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)485     public void printFieldValue(
486         final FieldDescriptor field, final Object value, final Appendable output)
487         throws IOException {
488       printFieldValue(field, value, setSingleLineOutput(output, this.singleLine));
489     }
490 
printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)491     private void printFieldValue(
492         final FieldDescriptor field, final Object value, final TextGenerator generator)
493         throws IOException {
494       if (shouldRedact(field, generator)) {
495         generator.print(REDACTED_MARKER);
496         if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
497           generator.eol();
498         }
499         return;
500       }
501       switch (field.getType()) {
502         case INT32:
503         case SINT32:
504         case SFIXED32:
505           generator.print(((Integer) value).toString());
506           break;
507 
508         case INT64:
509         case SINT64:
510         case SFIXED64:
511           generator.print(((Long) value).toString());
512           break;
513 
514         case BOOL:
515           generator.print(((Boolean) value).toString());
516           break;
517 
518         case FLOAT:
519           generator.print(((Float) value).toString());
520           break;
521 
522         case DOUBLE:
523           generator.print(((Double) value).toString());
524           break;
525 
526         case UINT32:
527         case FIXED32:
528           generator.print(unsignedToString((Integer) value));
529           break;
530 
531         case UINT64:
532         case FIXED64:
533           generator.print(unsignedToString((Long) value));
534           break;
535 
536         case STRING:
537           generator.print("\"");
538           generator.print(
539               escapeNonAscii
540                   ? TextFormatEscaper.escapeText((String) value)
541                   : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n"));
542           generator.print("\"");
543           break;
544 
545         case BYTES:
546           generator.print("\"");
547           if (value instanceof ByteString) {
548             generator.print(escapeBytes((ByteString) value));
549           } else {
550             generator.print(escapeBytes((byte[]) value));
551           }
552           generator.print("\"");
553           break;
554 
555         case ENUM:
556           if (((EnumValueDescriptor) value).getIndex() == -1) {
557             // Unknown enum value, print the number instead of the name.
558             generator.print(Integer.toString(((EnumValueDescriptor) value).getNumber()));
559           } else {
560             generator.print(((EnumValueDescriptor) value).getName());
561           }
562           break;
563 
564         case MESSAGE:
565         case GROUP:
566           print((MessageOrBuilder) value, generator);
567           break;
568       }
569     }
570 
shouldRedactOptionValue(EnumValueDescriptor optionValue)571     private boolean shouldRedactOptionValue(EnumValueDescriptor optionValue) {
572       if (optionValue.getOptions().hasDebugRedact()) {
573         return optionValue.getOptions().getDebugRedact();
574       }
575       return false;
576     }
577 
578     // The criteria for redacting a field is as follows: 1) The enablingSafeDebugFormat printer
579     // option
580     // must be on. 2) The field must be marked by a debug_redact=true option, or is marked by an
581     // option with an enum value that is marked by a debug_redact=true option.
582     @SuppressWarnings("unchecked") // List<EnumValueDescriptor> guaranteed by protobuf runtime.
shouldRedact(final FieldDescriptor field, TextGenerator generator)583     private boolean shouldRedact(final FieldDescriptor field, TextGenerator generator) {
584       // Skip checking if it's sensitive and potentially reporting it if we don't care about either.
585       if (!shouldReport(generator.fieldReporterLevel) && !enablingSafeDebugFormat) {
586         return false;
587       }
588       boolean isSensitive = false;
589       if (field.getOptions().hasDebugRedact() && field.getOptions().getDebugRedact()) {
590         isSensitive = true;
591       } else {
592         // Iterate through every option; if it's an enum, we check each enum value for debug_redact.
593         for (Map.Entry<Descriptors.FieldDescriptor, Object> entry :
594             field.getOptions().getAllFields().entrySet()) {
595           Descriptors.FieldDescriptor option = entry.getKey();
596           if (option.getType() != Descriptors.FieldDescriptor.Type.ENUM) {
597             continue;
598           }
599           if (option.isRepeated()) {
600             for (EnumValueDescriptor value : (List<EnumValueDescriptor>) entry.getValue()) {
601               if (shouldRedactOptionValue(value)) {
602                 isSensitive = true;
603                 break;
604               }
605             }
606           } else {
607             EnumValueDescriptor optionValue = (EnumValueDescriptor) entry.getValue();
608             if (shouldRedactOptionValue(optionValue)) {
609               isSensitive = true;
610               break;
611             }
612           }
613         }
614       }
615       return isSensitive && enablingSafeDebugFormat;
616     }
617 
shouldReport(FieldReporterLevel level)618     private boolean shouldReport(FieldReporterLevel level) {
619       return sensitiveFieldReportingLevel.get().compareTo(level) < 0;
620     }
621 
622     /** Like {@code print()}, but writes directly to a {@code String} and returns it. */
printToString(final MessageOrBuilder message)623     public String printToString(final MessageOrBuilder message) {
624       return printToString(message, FieldReporterLevel.PRINTER_PRINT_TO_STRING);
625     }
626 
printToString(final MessageOrBuilder message, FieldReporterLevel level)627     String printToString(final MessageOrBuilder message, FieldReporterLevel level) {
628       try {
629         final StringBuilder text = new StringBuilder();
630         if (enablingSafeDebugFormat) {
631           applyUnstablePrefix(text);
632         }
633         print(message, text, level);
634         return text.toString();
635       } catch (IOException e) {
636         throw new IllegalStateException(e);
637       }
638     }
639 
640     /** Like {@code print()}, but writes directly to a {@code String} and returns it. */
printToString(final UnknownFieldSet fields)641     public String printToString(final UnknownFieldSet fields) {
642       try {
643         final StringBuilder text = new StringBuilder();
644         if (enablingSafeDebugFormat) {
645           applyUnstablePrefix(text);
646         }
647         print(fields, text);
648         return text.toString();
649       } catch (IOException e) {
650         throw new IllegalStateException(e);
651       }
652     }
653 
654     /**
655      * Generates a human readable form of this message, useful for debugging and other purposes,
656      * with no newline characters.
657      *
658      * @deprecated Use {@code
659      *     this.printer().emittingSingleLine(true).printToString(MessageOrBuilder)}
660      */
661     @Deprecated
shortDebugString(final MessageOrBuilder message)662     public String shortDebugString(final MessageOrBuilder message) {
663       return this.emittingSingleLine(true)
664           .printToString(message, FieldReporterLevel.SHORT_DEBUG_STRING);
665     }
666 
667     /**
668      * Generates a human readable form of the field, useful for debugging and other purposes, with
669      * no newline characters.
670      *
671      * @deprecated Use {@code this.emittingSingleLine(true).printFieldToString(FieldDescriptor,
672      *     Object)}
673      */
674     @Deprecated
675     @InlineMe(replacement = "this.emittingSingleLine(true).printFieldToString(field, value)")
shortDebugString(final FieldDescriptor field, final Object value)676     public String shortDebugString(final FieldDescriptor field, final Object value) {
677       return this.emittingSingleLine(true).printFieldToString(field, value);
678     }
679 
680     /**
681      * Generates a human readable form of the unknown fields, useful for debugging and other
682      * purposes, with no newline characters.
683      *
684      * @deprecated Use {@code this.emittingSingleLine(true).printToString(UnknownFieldSet)}
685      */
686     @Deprecated
687     @InlineMe(replacement = "this.emittingSingleLine(true).printToString(fields)")
shortDebugString(final UnknownFieldSet fields)688     public String shortDebugString(final UnknownFieldSet fields) {
689       return this.emittingSingleLine(true).printToString(fields);
690     }
691 
printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator, boolean redact)692     private static void printUnknownFieldValue(
693         final int tag, final Object value, final TextGenerator generator, boolean redact)
694         throws IOException {
695       switch (WireFormat.getTagWireType(tag)) {
696         case WireFormat.WIRETYPE_VARINT:
697           generator.print(
698               redact
699                   ? String.format("UNKNOWN_VARINT %s", REDACTED_MARKER)
700                   : unsignedToString((Long) value));
701           break;
702         case WireFormat.WIRETYPE_FIXED32:
703           generator.print(
704               redact
705                   ? String.format("UNKNOWN_FIXED32 %s", REDACTED_MARKER)
706                   : String.format((Locale) null, "0x%08x", (Integer) value));
707           break;
708         case WireFormat.WIRETYPE_FIXED64:
709           generator.print(
710               redact
711                   ? String.format("UNKNOWN_FIXED64 %s", REDACTED_MARKER)
712                   : String.format((Locale) null, "0x%016x", (Long) value));
713           break;
714         case WireFormat.WIRETYPE_LENGTH_DELIMITED:
715           try {
716             // Try to parse and print the field as an embedded message
717             UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
718             generator.print("{");
719             generator.eol();
720             generator.indent();
721             printUnknownFields(message, generator, redact);
722             generator.outdent();
723             generator.print("}");
724           } catch (InvalidProtocolBufferException e) {
725             // If not parseable as a message, print as a String
726             if (redact) {
727               generator.print(String.format("UNKNOWN_STRING %s", REDACTED_MARKER));
728               break;
729             }
730             generator.print("\"");
731             generator.print(escapeBytes((ByteString) value));
732             generator.print("\"");
733           }
734           break;
735         case WireFormat.WIRETYPE_START_GROUP:
736           printUnknownFields((UnknownFieldSet) value, generator, redact);
737           break;
738         default:
739           throw new IllegalArgumentException("Bad tag: " + tag);
740       }
741     }
742 
printMessage(final MessageOrBuilder message, final TextGenerator generator)743     private void printMessage(final MessageOrBuilder message, final TextGenerator generator)
744         throws IOException {
745       for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) {
746         printField(field.getKey(), field.getValue(), generator);
747       }
748       printUnknownFields(message.getUnknownFields(), generator, this.enablingSafeDebugFormat);
749     }
750 
printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)751     private void printSingleField(
752         final FieldDescriptor field, final Object value, final TextGenerator generator)
753         throws IOException {
754       if (field.isExtension()) {
755         generator.print("[");
756         // We special-case MessageSet elements for compatibility with proto1.
757         if (field.getContainingType().getOptions().getMessageSetWireFormat()
758             && (field.getType() == FieldDescriptor.Type.MESSAGE)
759             && (field.isOptional())
760             // object equality
761             && (field.getExtensionScope() == field.getMessageType())) {
762           generator.print(field.getMessageType().getFullName());
763         } else {
764           generator.print(field.getFullName());
765         }
766         generator.print("]");
767       } else {
768         if (field.isGroupLike()) {
769           // Groups must be serialized with their original capitalization.
770           generator.print(field.getMessageType().getName());
771         } else {
772           generator.print(field.getName());
773         }
774       }
775 
776       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
777         generator.print(" {");
778         generator.eol();
779         generator.indent();
780       } else {
781         generator.print(": ");
782       }
783 
784       printFieldValue(field, value, generator);
785 
786       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
787         generator.outdent();
788         generator.print("}");
789       }
790       generator.eol();
791     }
792 
printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator, boolean redact)793     private static void printUnknownFields(
794         final UnknownFieldSet unknownFields, final TextGenerator generator, boolean redact)
795         throws IOException {
796       if (unknownFields.isEmpty()) {
797         return;
798       }
799       for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) {
800         final int number = entry.getKey();
801         final UnknownFieldSet.Field field = entry.getValue();
802         printUnknownField(
803             number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator, redact);
804         printUnknownField(
805             number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator, redact);
806         printUnknownField(
807             number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator, redact);
808         printUnknownField(
809             number,
810             WireFormat.WIRETYPE_LENGTH_DELIMITED,
811             field.getLengthDelimitedList(),
812             generator,
813             redact);
814         for (final UnknownFieldSet value : field.getGroupList()) {
815           generator.print(entry.getKey().toString());
816           generator.print(" {");
817           generator.eol();
818           generator.indent();
819           printUnknownFields(value, generator, redact);
820           generator.outdent();
821           generator.print("}");
822           generator.eol();
823         }
824       }
825     }
826 
printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator, boolean redact)827     private static void printUnknownField(
828         final int number,
829         final int wireType,
830         final List<?> values,
831         final TextGenerator generator,
832         boolean redact)
833         throws IOException {
834       for (final Object value : values) {
835         generator.print(String.valueOf(number));
836         generator.print(": ");
837         printUnknownFieldValue(wireType, value, generator, redact);
838         generator.eol();
839       }
840     }
841   }
842 
843   /** Convert an unsigned 32-bit integer to a string. */
unsignedToString(final int value)844   public static String unsignedToString(final int value) {
845     if (value >= 0) {
846       return Integer.toString(value);
847     } else {
848       return Long.toString(value & 0x00000000FFFFFFFFL);
849     }
850   }
851 
852   /** Convert an unsigned 64-bit integer to a string. */
unsignedToString(final long value)853   public static String unsignedToString(final long value) {
854     if (value >= 0) {
855       return Long.toString(value);
856     } else {
857       // Pull off the most-significant bit so that BigInteger doesn't think
858       // the number is negative, then set it again using setBit().
859       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString();
860     }
861   }
862 
setSingleLineOutput(Appendable output, boolean singleLine)863   private static TextGenerator setSingleLineOutput(Appendable output, boolean singleLine) {
864     return new TextGenerator(output, singleLine, Printer.FieldReporterLevel.NO_REPORT);
865   }
866 
setSingleLineOutput( Appendable output, boolean singleLine, Printer.FieldReporterLevel fieldReporterLevel)867   private static TextGenerator setSingleLineOutput(
868       Appendable output, boolean singleLine, Printer.FieldReporterLevel fieldReporterLevel) {
869     return new TextGenerator(output, singleLine, fieldReporterLevel);
870   }
871 
872   /** An inner class for writing text to the output stream. */
873   private static final class TextGenerator {
874     private final Appendable output;
875     private final StringBuilder indent = new StringBuilder();
876     private final boolean singleLineMode;
877     // While technically we are "at the start of a line" at the very beginning of the output, all
878     // we would do in response to this is emit the (zero length) indentation, so it has no effect.
879     // Setting it false here does however suppress an unwanted leading space in single-line mode.
880     private boolean atStartOfLine = false;
881     // Indicate which Protobuf public stringification API (e.g AbstractMessage.toString()) is
882     // called.
883     private final Printer.FieldReporterLevel fieldReporterLevel;
884 
TextGenerator( final Appendable output, boolean singleLineMode, Printer.FieldReporterLevel fieldReporterLevel)885     private TextGenerator(
886         final Appendable output,
887         boolean singleLineMode,
888         Printer.FieldReporterLevel fieldReporterLevel) {
889       this.output = output;
890       this.singleLineMode = singleLineMode;
891       this.fieldReporterLevel = fieldReporterLevel;
892     }
893 
894     /**
895      * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the
896      * beginning of each line of text. Indent() may be called multiple times to produce deeper
897      * indents.
898      */
indent()899     public void indent() {
900       indent.append("  ");
901     }
902 
903     /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */
outdent()904     public void outdent() {
905       final int length = indent.length();
906       if (length == 0) {
907         throw new IllegalArgumentException(" Outdent() without matching Indent().");
908       }
909       indent.setLength(length - 2);
910     }
911 
912     /**
913      * Print text to the output stream. Bare newlines are never expected to be passed to this
914      * method; to indicate the end of a line, call "eol()".
915      */
print(final CharSequence text)916     public void print(final CharSequence text) throws IOException {
917       if (atStartOfLine) {
918         atStartOfLine = false;
919         output.append(singleLineMode ? " " : indent);
920       }
921       output.append(text);
922     }
923 
924     /**
925      * Signifies reaching the "end of the current line" in the output. In single-line mode, this
926      * does not result in a newline being emitted, but ensures that a separating space is written
927      * before the next output.
928      */
eol()929     public void eol() throws IOException {
930       if (!singleLineMode) {
931         output.append("\n");
932       }
933       atStartOfLine = true;
934     }
935   }
936 
937   // =================================================================
938   // Parsing
939 
940   /**
941    * Represents a stream of tokens parsed from a {@code String}.
942    *
943    * <p>The Java standard library provides many classes that you might think would be useful for
944    * implementing this, but aren't. For example:
945    *
946    * <ul>
947    *   <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something
948    *       that would get us close to what we want -- except for one fatal flaw: It automatically
949    *       un-escapes strings using Java escape sequences, which do not include all the escape
950    *       sequences we need to support (e.g. '\x').
951    *   <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular
952    *       expressions out of a stream (so we wouldn't have to load the entire input into a single
953    *       string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with
954    *       some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and
955    *       ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code
956    *       Scanner} provides no way to inspect the contents of delimiters, making it impossible to
957    *       keep track of line and column numbers.
958    * </ul>
959    *
960    * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need
961    * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least.
962    * Unfortunately, this implies that we need to have the entire input in one contiguous string.
963    */
964   private static final class Tokenizer {
965     private final CharSequence text;
966     private final Matcher matcher;
967     private String currentToken;
968 
969     // The character index within this.text at which the current token begins.
970     private int pos = 0;
971 
972     // The line and column numbers of the current token.
973     private int line = 0;
974     private int column = 0;
975 
976     // The line and column numbers of the previous token (allows throwing
977     // errors *after* consuming).
978     private int previousLine = 0;
979     private int previousColumn = 0;
980 
981     // We use possessive quantifiers (*+ and ++) because otherwise the Java
982     // regex matcher has stack overflows on large inputs.
983     private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
984     private static final Pattern TOKEN =
985         Pattern.compile(
986             "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier
987                 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number
988                 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string
989                 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string
990             Pattern.MULTILINE);
991 
992     private static final Pattern DOUBLE_INFINITY =
993         Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE);
994     private static final Pattern FLOAT_INFINITY =
995         Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE);
996     private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE);
997 
998     /**
999      * {@link containsSilentMarkerAfterCurrentToken} indicates if there is a silent marker after the
1000      * current token. This value is moved to {@link containsSilentMarkerAfterPrevToken} every time
1001      * the next token is parsed.
1002      */
1003     private boolean containsSilentMarkerAfterCurrentToken = false;
1004 
1005     private boolean containsSilentMarkerAfterPrevToken = false;
1006 
1007     /** Construct a tokenizer that parses tokens from the given text. */
Tokenizer(final CharSequence text)1008     private Tokenizer(final CharSequence text) {
1009       this.text = text;
1010       this.matcher = WHITESPACE.matcher(text);
1011       skipWhitespace();
1012       nextToken();
1013     }
1014 
getPreviousLine()1015     int getPreviousLine() {
1016       return previousLine;
1017     }
1018 
getPreviousColumn()1019     int getPreviousColumn() {
1020       return previousColumn;
1021     }
1022 
getLine()1023     int getLine() {
1024       return line;
1025     }
1026 
getColumn()1027     int getColumn() {
1028       return column;
1029     }
1030 
getContainsSilentMarkerAfterCurrentToken()1031     boolean getContainsSilentMarkerAfterCurrentToken() {
1032       return containsSilentMarkerAfterCurrentToken;
1033     }
1034 
getContainsSilentMarkerAfterPrevToken()1035     boolean getContainsSilentMarkerAfterPrevToken() {
1036       return containsSilentMarkerAfterPrevToken;
1037     }
1038 
1039     /** Are we at the end of the input? */
atEnd()1040     boolean atEnd() {
1041       return currentToken.length() == 0;
1042     }
1043 
1044     /** Advance to the next token. */
nextToken()1045     void nextToken() {
1046       previousLine = line;
1047       previousColumn = column;
1048 
1049       // Advance the line counter to the current position.
1050       while (pos < matcher.regionStart()) {
1051         if (text.charAt(pos) == '\n') {
1052           ++line;
1053           column = 0;
1054         } else {
1055           ++column;
1056         }
1057         ++pos;
1058       }
1059 
1060       // Match the next token.
1061       if (matcher.regionStart() == matcher.regionEnd()) {
1062         // EOF
1063         currentToken = "";
1064       } else {
1065         matcher.usePattern(TOKEN);
1066         if (matcher.lookingAt()) {
1067           currentToken = matcher.group();
1068           matcher.region(matcher.end(), matcher.regionEnd());
1069         } else {
1070           // Take one character.
1071           currentToken = String.valueOf(text.charAt(pos));
1072           matcher.region(pos + 1, matcher.regionEnd());
1073         }
1074 
1075         skipWhitespace();
1076       }
1077     }
1078 
1079     /** Skip over any whitespace so that the matcher region starts at the next token. */
skipWhitespace()1080     private void skipWhitespace() {
1081       matcher.usePattern(WHITESPACE);
1082       if (matcher.lookingAt()) {
1083         matcher.region(matcher.end(), matcher.regionEnd());
1084       }
1085     }
1086 
1087     /**
1088      * If the next token exactly matches {@code token}, consume it and return {@code true}.
1089      * Otherwise, return {@code false} without doing anything.
1090      */
tryConsume(final String token)1091     boolean tryConsume(final String token) {
1092       if (currentToken.equals(token)) {
1093         nextToken();
1094         return true;
1095       } else {
1096         return false;
1097       }
1098     }
1099 
1100     /**
1101      * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link
1102      * ParseException}.
1103      */
consume(final String token)1104     void consume(final String token) throws ParseException {
1105       if (!tryConsume(token)) {
1106         throw parseException("Expected \"" + token + "\".");
1107       }
1108     }
1109 
1110     /** Returns {@code true} if the next token is an integer, but does not consume it. */
lookingAtInteger()1111     boolean lookingAtInteger() {
1112       if (currentToken.length() == 0) {
1113         return false;
1114       }
1115 
1116       final char c = currentToken.charAt(0);
1117       return ('0' <= c && c <= '9') || c == '-' || c == '+';
1118     }
1119 
1120     /** Returns {@code true} if the current token's text is equal to that specified. */
lookingAt(String text)1121     boolean lookingAt(String text) {
1122       return currentToken.equals(text);
1123     }
1124 
1125     /**
1126      * If the next token is an identifier, consume it and return its value. Otherwise, throw a
1127      * {@link ParseException}.
1128      */
consumeIdentifier()1129     String consumeIdentifier() throws ParseException {
1130       for (int i = 0; i < currentToken.length(); i++) {
1131         final char c = currentToken.charAt(i);
1132         if (('a' <= c && c <= 'z')
1133             || ('A' <= c && c <= 'Z')
1134             || ('0' <= c && c <= '9')
1135             || (c == '_')
1136             || (c == '.')) {
1137           // OK
1138         } else {
1139           throw parseException("Expected identifier. Found '" + currentToken + "'");
1140         }
1141       }
1142 
1143       final String result = currentToken;
1144       nextToken();
1145       return result;
1146     }
1147 
1148     /**
1149      * If the next token is an identifier, consume it and return {@code true}. Otherwise, return
1150      * {@code false} without doing anything.
1151      */
tryConsumeIdentifier()1152     boolean tryConsumeIdentifier() {
1153       try {
1154         consumeIdentifier();
1155         return true;
1156       } catch (ParseException e) {
1157         return false;
1158       }
1159     }
1160 
1161     /**
1162      * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise,
1163      * throw a {@link ParseException}.
1164      */
consumeInt32()1165     int consumeInt32() throws ParseException {
1166       try {
1167         final int result = parseInt32(currentToken);
1168         nextToken();
1169         return result;
1170       } catch (NumberFormatException e) {
1171         throw integerParseException(e);
1172       }
1173     }
1174 
1175     /**
1176      * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise,
1177      * throw a {@link ParseException}.
1178      */
consumeUInt32()1179     int consumeUInt32() throws ParseException {
1180       try {
1181         final int result = parseUInt32(currentToken);
1182         nextToken();
1183         return result;
1184       } catch (NumberFormatException e) {
1185         throw integerParseException(e);
1186       }
1187     }
1188 
1189     /**
1190      * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise,
1191      * throw a {@link ParseException}.
1192      */
consumeInt64()1193     long consumeInt64() throws ParseException {
1194       try {
1195         final long result = parseInt64(currentToken);
1196         nextToken();
1197         return result;
1198       } catch (NumberFormatException e) {
1199         throw integerParseException(e);
1200       }
1201     }
1202 
1203     /**
1204      * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise,
1205      * return {@code false} without doing anything.
1206      */
tryConsumeInt64()1207     boolean tryConsumeInt64() {
1208       try {
1209         consumeInt64();
1210         return true;
1211       } catch (ParseException e) {
1212         return false;
1213       }
1214     }
1215 
1216     /**
1217      * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise,
1218      * throw a {@link ParseException}.
1219      */
consumeUInt64()1220     long consumeUInt64() throws ParseException {
1221       try {
1222         final long result = parseUInt64(currentToken);
1223         nextToken();
1224         return result;
1225       } catch (NumberFormatException e) {
1226         throw integerParseException(e);
1227       }
1228     }
1229 
1230     /**
1231      * If the next token is a 64-bit unsigned integer, consume it and return {@code true}.
1232      * Otherwise, return {@code false} without doing anything.
1233      */
tryConsumeUInt64()1234     public boolean tryConsumeUInt64() {
1235       try {
1236         consumeUInt64();
1237         return true;
1238       } catch (ParseException e) {
1239         return false;
1240       }
1241     }
1242 
1243     /**
1244      * If the next token is a double, consume it and return its value. Otherwise, throw a {@link
1245      * ParseException}.
1246      */
consumeDouble()1247     public double consumeDouble() throws ParseException {
1248       // We need to parse infinity and nan separately because
1249       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
1250       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
1251         final boolean negative = currentToken.startsWith("-");
1252         nextToken();
1253         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
1254       }
1255       if (currentToken.equalsIgnoreCase("nan")) {
1256         nextToken();
1257         return Double.NaN;
1258       }
1259       try {
1260         final double result = Double.parseDouble(currentToken);
1261         nextToken();
1262         return result;
1263       } catch (NumberFormatException e) {
1264         throw floatParseException(e);
1265       }
1266     }
1267 
1268     /**
1269      * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code
1270      * false} without doing anything.
1271      */
tryConsumeDouble()1272     public boolean tryConsumeDouble() {
1273       try {
1274         consumeDouble();
1275         return true;
1276       } catch (ParseException e) {
1277         return false;
1278       }
1279     }
1280 
1281     /**
1282      * If the next token is a float, consume it and return its value. Otherwise, throw a {@link
1283      * ParseException}.
1284      */
consumeFloat()1285     public float consumeFloat() throws ParseException {
1286       // We need to parse infinity and nan separately because
1287       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
1288       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
1289         final boolean negative = currentToken.startsWith("-");
1290         nextToken();
1291         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
1292       }
1293       if (FLOAT_NAN.matcher(currentToken).matches()) {
1294         nextToken();
1295         return Float.NaN;
1296       }
1297       try {
1298         final float result = Float.parseFloat(currentToken);
1299         nextToken();
1300         return result;
1301       } catch (NumberFormatException e) {
1302         throw floatParseException(e);
1303       }
1304     }
1305 
1306     /**
1307      * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code
1308      * false} without doing anything.
1309      */
tryConsumeFloat()1310     public boolean tryConsumeFloat() {
1311       try {
1312         consumeFloat();
1313         return true;
1314       } catch (ParseException e) {
1315         return false;
1316       }
1317     }
1318 
1319     /**
1320      * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link
1321      * ParseException}.
1322      */
consumeBoolean()1323     public boolean consumeBoolean() throws ParseException {
1324       if (currentToken.equals("true")
1325           || currentToken.equals("True")
1326           || currentToken.equals("t")
1327           || currentToken.equals("1")) {
1328         nextToken();
1329         return true;
1330       } else if (currentToken.equals("false")
1331           || currentToken.equals("False")
1332           || currentToken.equals("f")
1333           || currentToken.equals("0")) {
1334         nextToken();
1335         return false;
1336       } else {
1337         throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\".");
1338       }
1339     }
1340 
1341     /**
1342      * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw
1343      * a {@link ParseException}.
1344      */
consumeString()1345     public String consumeString() throws ParseException {
1346       return consumeByteString().toStringUtf8();
1347     }
1348 
1349     /**
1350      * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return
1351      * it. Otherwise, throw a {@link ParseException}.
1352      */
1353     @CanIgnoreReturnValue
consumeByteString()1354     ByteString consumeByteString() throws ParseException {
1355       List<ByteString> list = new ArrayList<ByteString>();
1356       consumeByteString(list);
1357       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
1358         consumeByteString(list);
1359       }
1360       return ByteString.copyFrom(list);
1361     }
1362 
1363     /** If the next token is a string, consume it and return true. Otherwise, return false. */
tryConsumeByteString()1364     boolean tryConsumeByteString() {
1365       try {
1366         consumeByteString();
1367         return true;
1368       } catch (ParseException e) {
1369         return false;
1370       }
1371     }
1372 
1373     /**
1374      * Like {@link #consumeByteString()} but adds each token of the string to the given list. String
1375      * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically
1376      * concatenated, like in C or Python.
1377      */
consumeByteString(List<ByteString> list)1378     private void consumeByteString(List<ByteString> list) throws ParseException {
1379       final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0';
1380       if (quote != '\"' && quote != '\'') {
1381         throw parseException("Expected string.");
1382       }
1383 
1384       if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) {
1385         throw parseException("String missing ending quote.");
1386       }
1387 
1388       try {
1389         final String escaped = currentToken.substring(1, currentToken.length() - 1);
1390         final ByteString result = unescapeBytes(escaped);
1391         nextToken();
1392         list.add(result);
1393       } catch (InvalidEscapeSequenceException e) {
1394         throw parseException(e.getMessage());
1395       }
1396     }
1397 
1398     /**
1399      * Returns a {@link ParseException} with the current line and column numbers in the description,
1400      * suitable for throwing.
1401      */
parseException(final String description)1402     ParseException parseException(final String description) {
1403       // Note:  People generally prefer one-based line and column numbers.
1404       return new ParseException(line + 1, column + 1, description);
1405     }
1406 
1407     /**
1408      * Returns a {@link ParseException} with the line and column numbers of the previous token in
1409      * the description, suitable for throwing.
1410      */
parseExceptionPreviousToken(final String description)1411     ParseException parseExceptionPreviousToken(final String description) {
1412       // Note:  People generally prefer one-based line and column numbers.
1413       return new ParseException(previousLine + 1, previousColumn + 1, description);
1414     }
1415 
1416     /**
1417      * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException}
1418      * when trying to parse an integer.
1419      */
integerParseException(final NumberFormatException e)1420     private ParseException integerParseException(final NumberFormatException e) {
1421       return parseException("Couldn't parse integer: " + e.getMessage());
1422     }
1423 
1424     /**
1425      * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException}
1426      * when trying to parse a float or double.
1427      */
floatParseException(final NumberFormatException e)1428     private ParseException floatParseException(final NumberFormatException e) {
1429       return parseException("Couldn't parse number: " + e.getMessage());
1430     }
1431   }
1432 
1433   /** Thrown when parsing an invalid text format message. */
1434   public static class ParseException extends IOException {
1435     private static final long serialVersionUID = 3196188060225107702L;
1436 
1437     private final int line;
1438     private final int column;
1439 
1440     /** Create a new instance, with -1 as the line and column numbers. */
ParseException(final String message)1441     public ParseException(final String message) {
1442       this(-1, -1, message);
1443     }
1444 
1445     /**
1446      * Create a new instance
1447      *
1448      * @param line the line number where the parse error occurred, using 1-offset.
1449      * @param column the column number where the parser error occurred, using 1-offset.
1450      */
ParseException(final int line, final int column, final String message)1451     public ParseException(final int line, final int column, final String message) {
1452       super(Integer.toString(line) + ":" + column + ": " + message);
1453       this.line = line;
1454       this.column = column;
1455     }
1456 
1457     /**
1458      * Return the line where the parse exception occurred, or -1 when none is provided. The value is
1459      * specified as 1-offset, so the first line is line 1.
1460      */
getLine()1461     public int getLine() {
1462       return line;
1463     }
1464 
1465     /**
1466      * Return the column where the parse exception occurred, or -1 when none is provided. The value
1467      * is specified as 1-offset, so the first line is line 1.
1468      */
getColumn()1469     public int getColumn() {
1470       return column;
1471     }
1472   }
1473 
1474   /** Obsolete exception, once thrown when encountering an unknown field while parsing a text
1475   format message.
1476   *
1477   * @deprecated This exception is unused and will be removed in the next breaking release
1478   (v5.x.x).
1479   */
1480   @Deprecated
1481   public static class UnknownFieldParseException extends ParseException {
1482     private final String unknownField;
1483 
1484     /**
1485      * Create a new instance, with -1 as the line and column numbers, and an empty unknown field
1486      * name.
1487      */
UnknownFieldParseException(final String message)1488     public UnknownFieldParseException(final String message) {
1489       this(-1, -1, "", message);
1490     }
1491 
1492     /**
1493      * Create a new instance
1494      *
1495      * @param line the line number where the parse error occurred, using 1-offset.
1496      * @param column the column number where the parser error occurred, using 1-offset.
1497      * @param unknownField the name of the unknown field found while parsing.
1498      */
UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1499     public UnknownFieldParseException(
1500         final int line, final int column, final String unknownField, final String message) {
1501       super(line, column, message);
1502       this.unknownField = unknownField;
1503     }
1504 
1505     /**
1506      * Return the name of the unknown field encountered while parsing the protocol buffer string.
1507      */
getUnknownField()1508     public String getUnknownField() {
1509       return unknownField;
1510     }
1511   }
1512 
1513   private static final Parser PARSER = Parser.newBuilder().build();
1514 
1515   /**
1516    * Return a {@link Parser} instance which can parse text-format messages. The returned instance is
1517    * thread-safe.
1518    */
getParser()1519   public static Parser getParser() {
1520     return PARSER;
1521   }
1522 
1523   /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */
merge(final Readable input, final Message.Builder builder)1524   public static void merge(final Readable input, final Message.Builder builder) throws IOException {
1525     PARSER.merge(input, builder);
1526   }
1527 
1528   /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */
merge(final CharSequence input, final Message.Builder builder)1529   public static void merge(final CharSequence input, final Message.Builder builder)
1530       throws ParseException {
1531     PARSER.merge(input, builder);
1532   }
1533 
1534   /**
1535    * Parse a text-format message from {@code input}.
1536    *
1537    * @return the parsed message, guaranteed initialized
1538    */
parse(final CharSequence input, final Class<T> protoClass)1539   public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass)
1540       throws ParseException {
1541     Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType();
1542     merge(input, builder);
1543     @SuppressWarnings("unchecked")
1544     T output = (T) builder.build();
1545     return output;
1546   }
1547 
1548   /**
1549    * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1550    * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1551    */
merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1552   public static void merge(
1553       final Readable input,
1554       final ExtensionRegistry extensionRegistry,
1555       final Message.Builder builder)
1556       throws IOException {
1557     PARSER.merge(input, extensionRegistry, builder);
1558   }
1559 
1560   /**
1561    * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1562    * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1563    */
merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1564   public static void merge(
1565       final CharSequence input,
1566       final ExtensionRegistry extensionRegistry,
1567       final Message.Builder builder)
1568       throws ParseException {
1569     PARSER.merge(input, extensionRegistry, builder);
1570   }
1571 
1572   /**
1573    * Parse a text-format message from {@code input}. Extensions will be recognized if they are
1574    * registered in {@code extensionRegistry}.
1575    *
1576    * @return the parsed message, guaranteed initialized
1577    */
parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1578   public static <T extends Message> T parse(
1579       final CharSequence input,
1580       final ExtensionRegistry extensionRegistry,
1581       final Class<T> protoClass)
1582       throws ParseException {
1583     Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType();
1584     merge(input, extensionRegistry, builder);
1585     @SuppressWarnings("unchecked")
1586     T output = (T) builder.build();
1587     return output;
1588   }
1589 
1590   /**
1591    * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely
1592    * follows google/protobuf/text_format.cc.
1593    *
1594    * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to
1595    * control the parser behavior.
1596    */
1597   public static class Parser {
1598 
1599     /**
1600      * A valid silent marker appears between a field name and its value. If there is a ":" in
1601      * between, the silent marker will only appear after the colon. This is called after a field
1602      * name is parsed, and before the ":" if it exists. If the current token is ":", then
1603      * containsSilentMarkerAfterCurrentToken indicates if there is a valid silent marker. Otherwise,
1604      * the current token is part of the field value, so the silent marker is indicated by
1605      * containsSilentMarkerAfterPrevToken.
1606      */
detectSilentMarker( Tokenizer tokenizer, Descriptor immediateMessageType, String fieldName)1607     private void detectSilentMarker(
1608         Tokenizer tokenizer, Descriptor immediateMessageType, String fieldName) {
1609     }
1610 
1611     /**
1612      * Determines if repeated values for non-repeated fields and oneofs are permitted. For example,
1613      * given required/optional field "foo" and a oneof containing "baz" and "moo":
1614      *
1615      * <ul>
1616      *   <li>"foo: 1 foo: 2"
1617      *   <li>"baz: 1 moo: 2"
1618      *   <li>merging "foo: 2" into a proto in which foo is already set, or
1619      *   <li>merging "moo: 2" into a proto in which baz is already set.
1620      * </ul>
1621      */
1622     public enum SingularOverwritePolicy {
1623       /**
1624        * Later values are merged with earlier values. For primitive fields or conflicting oneofs,
1625        * the last value is retained.
1626        */
1627       ALLOW_SINGULAR_OVERWRITES,
1628       /** An error is issued. */
1629       FORBID_SINGULAR_OVERWRITES
1630     }
1631 
1632     private final TypeRegistry typeRegistry;
1633     private final boolean allowUnknownFields;
1634     private final boolean allowUnknownEnumValues;
1635     private final boolean allowUnknownExtensions;
1636     private final SingularOverwritePolicy singularOverwritePolicy;
1637     private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
1638     private final int recursionLimit;
1639 
Parser( TypeRegistry typeRegistry, boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder, int recursionLimit)1640     private Parser(
1641         TypeRegistry typeRegistry,
1642         boolean allowUnknownFields,
1643         boolean allowUnknownEnumValues,
1644         boolean allowUnknownExtensions,
1645         SingularOverwritePolicy singularOverwritePolicy,
1646         TextFormatParseInfoTree.Builder parseInfoTreeBuilder,
1647         int recursionLimit) {
1648       this.typeRegistry = typeRegistry;
1649       this.allowUnknownFields = allowUnknownFields;
1650       this.allowUnknownEnumValues = allowUnknownEnumValues;
1651       this.allowUnknownExtensions = allowUnknownExtensions;
1652       this.singularOverwritePolicy = singularOverwritePolicy;
1653       this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1654       this.recursionLimit = recursionLimit;
1655     }
1656 
1657     /** Returns a new instance of {@link Builder}. */
newBuilder()1658     public static Builder newBuilder() {
1659       return new Builder();
1660     }
1661 
1662     /** Builder that can be used to obtain new instances of {@link Parser}. */
1663     public static class Builder {
1664       private boolean allowUnknownFields = false;
1665       private boolean allowUnknownEnumValues = false;
1666       private boolean allowUnknownExtensions = false;
1667       private SingularOverwritePolicy singularOverwritePolicy =
1668           SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
1669       private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
1670       private TypeRegistry typeRegistry = TypeRegistry.getEmptyTypeRegistry();
1671       private int recursionLimit = 100;
1672 
1673       /**
1674        * Sets the TypeRegistry for resolving Any. If this is not set, TextFormat will not be able to
1675        * parse Any unless Any is write as bytes.
1676        *
1677        * @throws IllegalArgumentException if a registry is already set.
1678        */
setTypeRegistry(TypeRegistry typeRegistry)1679       public Builder setTypeRegistry(TypeRegistry typeRegistry) {
1680         this.typeRegistry = typeRegistry;
1681         return this;
1682       }
1683 
1684       /**
1685        * Set whether this parser will allow unknown fields. By default, an exception is thrown if an
1686        * unknown field is encountered. If this is set, the parser will only log a warning. Allow
1687        * unknown fields will also allow unknown extensions.
1688        *
1689        * <p>Use of this parameter is discouraged which may hide some errors (e.g. spelling error on
1690        * field name).
1691        */
setAllowUnknownFields(boolean allowUnknownFields)1692       public Builder setAllowUnknownFields(boolean allowUnknownFields) {
1693         this.allowUnknownFields = allowUnknownFields;
1694         return this;
1695       }
1696 
1697       /**
1698        * Set whether this parser will allow unknown extensions. By default, an exception is thrown
1699        * if unknown extension is encountered. If this is set true, the parser will only log a
1700        * warning. Allow unknown extensions does not mean allow normal unknown fields.
1701        */
setAllowUnknownExtensions(boolean allowUnknownExtensions)1702       public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) {
1703         this.allowUnknownExtensions = allowUnknownExtensions;
1704         return this;
1705       }
1706 
1707       /** Sets parser behavior when a non-repeated field appears more than once. */
setSingularOverwritePolicy(SingularOverwritePolicy p)1708       public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
1709         this.singularOverwritePolicy = p;
1710         return this;
1711       }
1712 
setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1713       public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1714         this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1715         return this;
1716       }
1717 
1718       /**
1719        * Set the maximum recursion limit that the parser will allow. If the depth of the message
1720        * exceeds this limit then the parser will stop and throw an exception.
1721        */
setRecursionLimit(int recursionLimit)1722       public Builder setRecursionLimit(int recursionLimit) {
1723         this.recursionLimit = recursionLimit;
1724         return this;
1725       }
1726 
build()1727       public Parser build() {
1728         return new Parser(
1729             typeRegistry,
1730             allowUnknownFields,
1731             allowUnknownEnumValues,
1732             allowUnknownExtensions,
1733             singularOverwritePolicy,
1734             parseInfoTreeBuilder,
1735             recursionLimit);
1736       }
1737     }
1738 
1739     /**
1740      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1741      */
merge(final Readable input, final Message.Builder builder)1742     public void merge(final Readable input, final Message.Builder builder) throws IOException {
1743       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1744     }
1745 
1746     /**
1747      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1748      */
merge(final CharSequence input, final Message.Builder builder)1749     public void merge(final CharSequence input, final Message.Builder builder)
1750         throws ParseException {
1751       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1752     }
1753 
1754     /**
1755      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1756      * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1757      */
merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1758     public void merge(
1759         final Readable input,
1760         final ExtensionRegistry extensionRegistry,
1761         final Message.Builder builder)
1762         throws IOException {
1763       // Read the entire input to a String then parse that.
1764 
1765       // If StreamTokenizer was not so limited, or if there were a kind
1766       // of Reader that could read in chunks that match some particular regex,
1767       // or if we wanted to write a custom Reader to tokenize our stream, then
1768       // we would not have to read to one big String.  Alas, none of these is
1769       // the case.  Oh well.
1770 
1771       merge(toStringBuilder(input), extensionRegistry, builder);
1772     }
1773 
1774     private static final int BUFFER_SIZE = 4096;
1775 
1776     // TODO: See if working around java.io.Reader#read(CharBuffer)
1777     // overhead is worthwhile
toStringBuilder(final Readable input)1778     private static StringBuilder toStringBuilder(final Readable input) throws IOException {
1779       final StringBuilder text = new StringBuilder();
1780       final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
1781       while (true) {
1782         final int n = input.read(buffer);
1783         if (n == -1) {
1784           break;
1785         }
1786         Java8Compatibility.flip(buffer);
1787         text.append(buffer, 0, n);
1788       }
1789       return text;
1790     }
1791 
1792     static final class UnknownField {
1793       static enum Type {
1794         FIELD,
1795         EXTENSION;
1796       }
1797 
1798       final String message;
1799       final Type type;
1800 
UnknownField(String message, Type type)1801       UnknownField(String message, Type type) {
1802         this.message = message;
1803         this.type = type;
1804       }
1805     }
1806 
1807     // Check both unknown fields and unknown extensions and log warning messages
1808     // or throw exceptions according to the flag.
checkUnknownFields(final List<UnknownField> unknownFields)1809     private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException {
1810       if (unknownFields.isEmpty()) {
1811         return;
1812       }
1813 
1814       StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:");
1815       for (UnknownField field : unknownFields) {
1816         msg.append('\n').append(field.message);
1817       }
1818 
1819       if (allowUnknownFields) {
1820         logger.warning(msg.toString());
1821         return;
1822       }
1823 
1824       int firstErrorIndex = 0;
1825       if (allowUnknownExtensions) {
1826         boolean allUnknownExtensions = true;
1827         for (UnknownField field : unknownFields) {
1828           if (field.type == UnknownField.Type.FIELD) {
1829             allUnknownExtensions = false;
1830             break;
1831           }
1832           ++firstErrorIndex;
1833         }
1834         if (allUnknownExtensions) {
1835           logger.warning(msg.toString());
1836           return;
1837         }
1838       }
1839 
1840       String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":");
1841       throw new ParseException(
1842           Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString());
1843     }
1844 
1845     /**
1846      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1847      * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1848      */
merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1849     public void merge(
1850         final CharSequence input,
1851         final ExtensionRegistry extensionRegistry,
1852         final Message.Builder builder)
1853         throws ParseException {
1854       final Tokenizer tokenizer = new Tokenizer(input);
1855       MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder);
1856       List<UnknownField> unknownFields = new ArrayList<UnknownField>();
1857 
1858       while (!tokenizer.atEnd()) {
1859         mergeField(tokenizer, extensionRegistry, target, unknownFields, recursionLimit);
1860       }
1861       checkUnknownFields(unknownFields);
1862     }
1863 
1864     /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */
mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields, int recursionLimit)1865     private void mergeField(
1866         final Tokenizer tokenizer,
1867         final ExtensionRegistry extensionRegistry,
1868         final MessageReflection.MergeTarget target,
1869         List<UnknownField> unknownFields,
1870         int recursionLimit)
1871         throws ParseException {
1872       mergeField(
1873           tokenizer,
1874           extensionRegistry,
1875           target,
1876           parseInfoTreeBuilder,
1877           unknownFields,
1878           recursionLimit);
1879     }
1880 
1881     /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */
mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, int recursionLimit)1882     private void mergeField(
1883         final Tokenizer tokenizer,
1884         final ExtensionRegistry extensionRegistry,
1885         final MessageReflection.MergeTarget target,
1886         TextFormatParseInfoTree.Builder parseTreeBuilder,
1887         List<UnknownField> unknownFields,
1888         int recursionLimit)
1889         throws ParseException {
1890       FieldDescriptor field = null;
1891       String name;
1892       int startLine = tokenizer.getLine();
1893       int startColumn = tokenizer.getColumn();
1894       final Descriptor type = target.getDescriptorForType();
1895       ExtensionRegistry.ExtensionInfo extension = null;
1896 
1897       if ("google.protobuf.Any".equals(type.getFullName()) && tokenizer.tryConsume("[")) {
1898         if (recursionLimit < 1) {
1899           throw tokenizer.parseException("Message is nested too deep");
1900         }
1901         mergeAnyFieldValue(
1902             tokenizer,
1903             extensionRegistry,
1904             target,
1905             parseTreeBuilder,
1906             unknownFields,
1907             type,
1908             recursionLimit - 1);
1909         return;
1910       }
1911 
1912       if (tokenizer.tryConsume("[")) {
1913         // An extension.
1914         StringBuilder nameBuilder = new StringBuilder(tokenizer.consumeIdentifier());
1915         while (tokenizer.tryConsume(".")) {
1916           nameBuilder.append('.');
1917           nameBuilder.append(tokenizer.consumeIdentifier());
1918         }
1919         name = nameBuilder.toString();
1920 
1921         extension = target.findExtensionByName(extensionRegistry, name);
1922 
1923         if (extension == null) {
1924           String message =
1925               (tokenizer.getPreviousLine() + 1)
1926                   + ":"
1927                   + (tokenizer.getPreviousColumn() + 1)
1928                   + ":\t"
1929                   + type.getFullName()
1930                   + ".["
1931                   + name
1932                   + "]";
1933           unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION));
1934         } else {
1935           if (extension.descriptor.getContainingType() != type) {
1936             throw tokenizer.parseExceptionPreviousToken(
1937                 "Extension \""
1938                     + name
1939                     + "\" does not extend message type \""
1940                     + type.getFullName()
1941                     + "\".");
1942           }
1943           field = extension.descriptor;
1944         }
1945 
1946         tokenizer.consume("]");
1947       } else {
1948         name = tokenizer.consumeIdentifier();
1949         field = type.findFieldByName(name);
1950 
1951         // Group names are expected to be capitalized as they appear in the
1952         // .proto file, which actually matches their type names, not their field
1953         // names.
1954         if (field == null) {
1955           // Explicitly specify US locale so that this code does not break when
1956           // executing in Turkey.
1957           final String lowerName = name.toLowerCase(Locale.US);
1958           field = type.findFieldByName(lowerName);
1959           // If the case-insensitive match worked but the field is NOT a group,
1960           if (field != null && !field.isGroupLike()) {
1961             field = null;
1962           }
1963           if (field != null && !field.getMessageType().getName().equals(name)) {
1964             field = null;
1965           }
1966         }
1967 
1968         if (field == null) {
1969           String message =
1970               (tokenizer.getPreviousLine() + 1)
1971                   + ":"
1972                   + (tokenizer.getPreviousColumn() + 1)
1973                   + ":\t"
1974                   + type.getFullName()
1975                   + "."
1976                   + name;
1977           unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD));
1978         }
1979       }
1980 
1981       // Skips unknown fields.
1982       if (field == null) {
1983         detectSilentMarker(tokenizer, type, name);
1984         guessFieldTypeAndSkip(tokenizer, type, recursionLimit);
1985         return;
1986       }
1987 
1988       // Handle potential ':'.
1989       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1990         detectSilentMarker(tokenizer, type, field.getFullName());
1991         tokenizer.tryConsume(":"); // optional
1992         if (parseTreeBuilder != null) {
1993           TextFormatParseInfoTree.Builder childParseTreeBuilder =
1994               parseTreeBuilder.getBuilderForSubMessageField(field);
1995           consumeFieldValues(
1996               tokenizer,
1997               extensionRegistry,
1998               target,
1999               field,
2000               extension,
2001               childParseTreeBuilder,
2002               unknownFields,
2003               recursionLimit);
2004         } else {
2005           consumeFieldValues(
2006               tokenizer,
2007               extensionRegistry,
2008               target,
2009               field,
2010               extension,
2011               parseTreeBuilder,
2012               unknownFields,
2013               recursionLimit);
2014         }
2015       } else {
2016         detectSilentMarker(tokenizer, type, field.getFullName());
2017         tokenizer.consume(":"); // required
2018         consumeFieldValues(
2019             tokenizer,
2020             extensionRegistry,
2021             target,
2022             field,
2023             extension,
2024             parseTreeBuilder,
2025             unknownFields,
2026             recursionLimit);
2027       }
2028 
2029       if (parseTreeBuilder != null) {
2030         parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn));
2031       }
2032 
2033       // For historical reasons, fields may optionally be separated by commas or
2034       // semicolons.
2035       if (!tokenizer.tryConsume(";")) {
2036         tokenizer.tryConsume(",");
2037       }
2038     }
2039 
consumeFullTypeName(Tokenizer tokenizer)2040     private String consumeFullTypeName(Tokenizer tokenizer) throws ParseException {
2041       // If there is not a leading `[`, this is just a type name.
2042       if (!tokenizer.tryConsume("[")) {
2043         return tokenizer.consumeIdentifier();
2044       }
2045 
2046       // Otherwise, this is an extension or google.protobuf.Any type URL: we consume proto path
2047       // elements until we've addressed the type.
2048       String name = tokenizer.consumeIdentifier();
2049       while (tokenizer.tryConsume(".")) {
2050         name += "." + tokenizer.consumeIdentifier();
2051       }
2052       if (tokenizer.tryConsume("/")) {
2053         name += "/" + tokenizer.consumeIdentifier();
2054         while (tokenizer.tryConsume(".")) {
2055           name += "." + tokenizer.consumeIdentifier();
2056         }
2057       }
2058       tokenizer.consume("]");
2059 
2060       return name;
2061     }
2062 
2063     /**
2064      * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}.
2065      */
consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, int recursionLimit)2066     private void consumeFieldValues(
2067         final Tokenizer tokenizer,
2068         final ExtensionRegistry extensionRegistry,
2069         final MessageReflection.MergeTarget target,
2070         final FieldDescriptor field,
2071         final ExtensionRegistry.ExtensionInfo extension,
2072         final TextFormatParseInfoTree.Builder parseTreeBuilder,
2073         List<UnknownField> unknownFields,
2074         int recursionLimit)
2075         throws ParseException {
2076       // Support specifying repeated field values as a comma-separated list.
2077       // Ex."foo: [1, 2, 3]"
2078       if (field.isRepeated() && tokenizer.tryConsume("[")) {
2079         if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty.
2080           while (true) {
2081             consumeFieldValue(
2082                 tokenizer,
2083                 extensionRegistry,
2084                 target,
2085                 field,
2086                 extension,
2087                 parseTreeBuilder,
2088                 unknownFields,
2089                 recursionLimit);
2090             if (tokenizer.tryConsume("]")) {
2091               // End of list.
2092               break;
2093             }
2094             tokenizer.consume(",");
2095           }
2096         }
2097       } else {
2098         consumeFieldValue(
2099             tokenizer,
2100             extensionRegistry,
2101             target,
2102             field,
2103             extension,
2104             parseTreeBuilder,
2105             unknownFields,
2106             recursionLimit);
2107       }
2108     }
2109 
2110     /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */
consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, int recursionLimit)2111     private void consumeFieldValue(
2112         final Tokenizer tokenizer,
2113         final ExtensionRegistry extensionRegistry,
2114         final MessageReflection.MergeTarget target,
2115         final FieldDescriptor field,
2116         final ExtensionRegistry.ExtensionInfo extension,
2117         final TextFormatParseInfoTree.Builder parseTreeBuilder,
2118         List<UnknownField> unknownFields,
2119         int recursionLimit)
2120         throws ParseException {
2121       if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES
2122           && !field.isRepeated()) {
2123         if (target.hasField(field)) {
2124           throw tokenizer.parseExceptionPreviousToken(
2125               "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten.");
2126         } else if (field.getContainingOneof() != null
2127             && target.hasOneof(field.getContainingOneof())) {
2128           Descriptors.OneofDescriptor oneof = field.getContainingOneof();
2129           throw tokenizer.parseExceptionPreviousToken(
2130               "Field \""
2131                   + field.getFullName()
2132                   + "\" is specified along with field \""
2133                   + target.getOneofFieldDescriptor(oneof).getFullName()
2134                   + "\", another member of oneof \""
2135                   + oneof.getName()
2136                   + "\".");
2137         }
2138       }
2139 
2140       Object value = null;
2141 
2142       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
2143         if (recursionLimit < 1) {
2144           throw tokenizer.parseException("Message is nested too deep");
2145         }
2146 
2147         final String endToken;
2148         if (tokenizer.tryConsume("<")) {
2149           endToken = ">";
2150         } else {
2151           tokenizer.consume("{");
2152           endToken = "}";
2153         }
2154 
2155         Message defaultInstance = (extension == null) ? null : extension.defaultInstance;
2156         MessageReflection.MergeTarget subField =
2157             target.newMergeTargetForField(field, defaultInstance);
2158 
2159         while (!tokenizer.tryConsume(endToken)) {
2160           if (tokenizer.atEnd()) {
2161             throw tokenizer.parseException("Expected \"" + endToken + "\".");
2162           }
2163           mergeField(
2164               tokenizer,
2165               extensionRegistry,
2166               subField,
2167               parseTreeBuilder,
2168               unknownFields,
2169               recursionLimit - 1);
2170         }
2171 
2172         value = subField.finish();
2173       } else {
2174         switch (field.getType()) {
2175           case INT32:
2176           case SINT32:
2177           case SFIXED32:
2178             value = tokenizer.consumeInt32();
2179             break;
2180 
2181           case INT64:
2182           case SINT64:
2183           case SFIXED64:
2184             value = tokenizer.consumeInt64();
2185             break;
2186 
2187           case UINT32:
2188           case FIXED32:
2189             value = tokenizer.consumeUInt32();
2190             break;
2191 
2192           case UINT64:
2193           case FIXED64:
2194             value = tokenizer.consumeUInt64();
2195             break;
2196 
2197           case FLOAT:
2198             value = tokenizer.consumeFloat();
2199             break;
2200 
2201           case DOUBLE:
2202             value = tokenizer.consumeDouble();
2203             break;
2204 
2205           case BOOL:
2206             value = tokenizer.consumeBoolean();
2207             break;
2208 
2209           case STRING:
2210             value = tokenizer.consumeString();
2211             break;
2212 
2213           case BYTES:
2214             value = tokenizer.consumeByteString();
2215             break;
2216 
2217           case ENUM:
2218             final EnumDescriptor enumType = field.getEnumType();
2219 
2220             if (tokenizer.lookingAtInteger()) {
2221               final int number = tokenizer.consumeInt32();
2222               value =
2223                   enumType.isClosed()
2224                       ? enumType.findValueByNumber(number)
2225                       : enumType.findValueByNumberCreatingIfUnknown(number);
2226               if (value == null) {
2227                 String unknownValueMsg =
2228                     "Enum type \""
2229                         + enumType.getFullName()
2230                         + "\" has no value with number "
2231                         + number
2232                         + '.';
2233                 if (allowUnknownEnumValues) {
2234                   logger.warning(unknownValueMsg);
2235                   return;
2236                 } else {
2237                   throw tokenizer.parseExceptionPreviousToken(
2238                       "Enum type \""
2239                           + enumType.getFullName()
2240                           + "\" has no value with number "
2241                           + number
2242                           + '.');
2243                 }
2244               }
2245             } else {
2246               final String id = tokenizer.consumeIdentifier();
2247               value = enumType.findValueByName(id);
2248               if (value == null) {
2249                 String unknownValueMsg =
2250                     "Enum type \""
2251                         + enumType.getFullName()
2252                         + "\" has no value named \""
2253                         + id
2254                         + "\".";
2255                 if (allowUnknownEnumValues) {
2256                   logger.warning(unknownValueMsg);
2257                   return;
2258                 } else {
2259                   throw tokenizer.parseExceptionPreviousToken(unknownValueMsg);
2260                 }
2261               }
2262             }
2263 
2264             break;
2265 
2266           case MESSAGE:
2267           case GROUP:
2268             throw new RuntimeException("Can't get here.");
2269         }
2270       }
2271 
2272       if (field.isRepeated()) {
2273         // TODO: If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode,
2274         //     check for duplicate map keys here.
2275         target.addRepeatedField(field, value);
2276       } else {
2277         target.setField(field, value);
2278       }
2279     }
2280 
mergeAnyFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, MergeTarget target, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, Descriptor anyDescriptor, int recursionLimit)2281     private void mergeAnyFieldValue(
2282         final Tokenizer tokenizer,
2283         final ExtensionRegistry extensionRegistry,
2284         MergeTarget target,
2285         final TextFormatParseInfoTree.Builder parseTreeBuilder,
2286         List<UnknownField> unknownFields,
2287         Descriptor anyDescriptor,
2288         int recursionLimit)
2289         throws ParseException {
2290       // Try to parse human readable format of Any in the form: [type_url]: { ... }
2291       StringBuilder typeUrlBuilder = new StringBuilder();
2292       // Parse the type_url inside [].
2293       while (true) {
2294         typeUrlBuilder.append(tokenizer.consumeIdentifier());
2295         if (tokenizer.tryConsume("]")) {
2296           break;
2297         }
2298         if (tokenizer.tryConsume("/")) {
2299           typeUrlBuilder.append("/");
2300         } else if (tokenizer.tryConsume(".")) {
2301           typeUrlBuilder.append(".");
2302         } else {
2303           throw tokenizer.parseExceptionPreviousToken("Expected a valid type URL.");
2304         }
2305       }
2306       detectSilentMarker(tokenizer, anyDescriptor, typeUrlBuilder.toString());
2307       tokenizer.tryConsume(":");
2308       final String anyEndToken;
2309       if (tokenizer.tryConsume("<")) {
2310         anyEndToken = ">";
2311       } else {
2312         tokenizer.consume("{");
2313         anyEndToken = "}";
2314       }
2315       String typeUrl = typeUrlBuilder.toString();
2316       Descriptor contentType = null;
2317       try {
2318         contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl);
2319       } catch (InvalidProtocolBufferException e) {
2320         throw tokenizer.parseException("Invalid valid type URL. Found: " + typeUrl);
2321       }
2322       if (contentType == null) {
2323         throw tokenizer.parseException(
2324             "Unable to parse Any of type: "
2325                 + typeUrl
2326                 + ". Please make sure that the TypeRegistry contains the descriptors for the given"
2327                 + " types.");
2328       }
2329       Message.Builder contentBuilder =
2330           DynamicMessage.getDefaultInstance(contentType).newBuilderForType();
2331       MessageReflection.BuilderAdapter contentTarget =
2332           new MessageReflection.BuilderAdapter(contentBuilder);
2333       while (!tokenizer.tryConsume(anyEndToken)) {
2334         mergeField(
2335             tokenizer,
2336             extensionRegistry,
2337             contentTarget,
2338             parseTreeBuilder,
2339             unknownFields,
2340             recursionLimit);
2341       }
2342 
2343       target.setField(anyDescriptor.findFieldByName("type_url"), typeUrlBuilder.toString());
2344       target.setField(
2345           anyDescriptor.findFieldByName("value"), contentBuilder.build().toByteString());
2346     }
2347 
2348     /** Skips the next field including the field's name and value. */
skipField(Tokenizer tokenizer, Descriptor type, int recursionLimit)2349     private void skipField(Tokenizer tokenizer, Descriptor type, int recursionLimit)
2350         throws ParseException {
2351       String name = consumeFullTypeName(tokenizer);
2352       detectSilentMarker(tokenizer, type, name);
2353       guessFieldTypeAndSkip(tokenizer, type, recursionLimit);
2354 
2355       // For historical reasons, fields may optionally be separated by commas or
2356       // semicolons.
2357       if (!tokenizer.tryConsume(";")) {
2358         tokenizer.tryConsume(",");
2359       }
2360     }
2361 
2362     /**
2363      * Skips the whole body of a message including the beginning delimiter and the ending delimiter.
2364      */
skipFieldMessage(Tokenizer tokenizer, Descriptor type, int recursionLimit)2365     private void skipFieldMessage(Tokenizer tokenizer, Descriptor type, int recursionLimit)
2366         throws ParseException {
2367       final String delimiter;
2368       if (tokenizer.tryConsume("<")) {
2369         delimiter = ">";
2370       } else {
2371         tokenizer.consume("{");
2372         delimiter = "}";
2373       }
2374       while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
2375         skipField(tokenizer, type, recursionLimit);
2376       }
2377       tokenizer.consume(delimiter);
2378     }
2379 
2380     /** Skips a field value. */
skipFieldValue(Tokenizer tokenizer)2381     private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
2382       if (!tokenizer.tryConsumeByteString()
2383           && !tokenizer.tryConsumeIdentifier() // includes enum & boolean
2384           && !tokenizer.tryConsumeInt64() // includes int32
2385           && !tokenizer.tryConsumeUInt64() // includes uint32
2386           && !tokenizer.tryConsumeDouble()
2387           && !tokenizer.tryConsumeFloat()) {
2388         throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken);
2389       }
2390     }
2391 
2392     /**
2393      * Tries to guess the type of this field and skip it.
2394      *
2395      * <p>If this field is not a message, there should be a ":" between the field name and the field
2396      * value and also the field value should not start with "{" or "<" which indicates the beginning
2397      * of a message body. If there is no ":" or there is a "{" or "<" after ":", this field has to
2398      * be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"),
2399      * if it is repeated scalar, there must be a ":" between the field name and the starting "[" .
2400      */
guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type, int recursionLimit)2401     private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type, int recursionLimit)
2402         throws ParseException {
2403       boolean semicolonConsumed = tokenizer.tryConsume(":");
2404       if (tokenizer.lookingAt("[")) {
2405         // Short repeated field form. If a semicolon was consumed, it could be repeated scalar or
2406         // repeated message. If not, it must be repeated message.
2407         skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type, recursionLimit);
2408       } else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
2409         skipFieldValue(tokenizer);
2410       } else {
2411         if (recursionLimit < 1) {
2412           throw tokenizer.parseException("Message is nested too deep");
2413         }
2414         skipFieldMessage(tokenizer, type, recursionLimit - 1);
2415       }
2416     }
2417 
2418     /**
2419      * Skips a short-formed repeated field value.
2420      *
2421      * <p>Reports an error if scalar type is not allowed but showing up inside "[]".
2422      */
skipFieldShortFormedRepeated( Tokenizer tokenizer, boolean scalarAllowed, Descriptor type, int recursionLimit)2423     private void skipFieldShortFormedRepeated(
2424         Tokenizer tokenizer, boolean scalarAllowed, Descriptor type, int recursionLimit)
2425         throws ParseException {
2426       if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) {
2427         // Try skipping "[]".
2428         return;
2429       }
2430 
2431       while (true) {
2432         if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) {
2433           // Try skipping message field inside "[]"
2434           if (recursionLimit < 1) {
2435             throw tokenizer.parseException("Message is nested too deep");
2436           }
2437           skipFieldMessage(tokenizer, type, recursionLimit - 1);
2438         } else if (scalarAllowed) {
2439           // Try skipping scalar field inside "[]".
2440           skipFieldValue(tokenizer);
2441         } else {
2442           throw tokenizer.parseException(
2443               "Invalid repeated scalar field: missing \":\" before \"[\".");
2444         }
2445         if (tokenizer.tryConsume("]")) {
2446           break;
2447         }
2448         tokenizer.consume(",");
2449       }
2450     }
2451   }
2452 
2453   // =================================================================
2454   // Utility functions
2455   //
2456   // Some of these methods are package-private because Descriptors.java uses
2457   // them.
2458 
2459   /**
2460    * Escapes bytes in the format used in protocol buffer text format, which is the same as the
2461    * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are
2462    * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which
2463    * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences.
2464    */
escapeBytes(ByteString input)2465   public static String escapeBytes(ByteString input) {
2466     return TextFormatEscaper.escapeBytes(input);
2467   }
2468 
2469   /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */
escapeBytes(byte[] input)2470   public static String escapeBytes(byte[] input) {
2471     return TextFormatEscaper.escapeBytes(input);
2472   }
2473 
2474   /**
2475    * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex
2476    * escapes (starting with "\x") are also recognized.
2477    */
unescapeBytes(CharSequence charString)2478   public static ByteString unescapeBytes(CharSequence charString)
2479       throws InvalidEscapeSequenceException {
2480     // First convert the Java character sequence to UTF-8 bytes.
2481     ByteString input = ByteString.copyFromUtf8(charString.toString());
2482     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
2483     // escapes can all be expressed with ASCII characters, so it is safe to
2484     // operate on bytes here.
2485     //
2486     // Unescaping the input byte array will result in a byte sequence that's no
2487     // longer than the input.  That's because each escape sequence is between
2488     // two and four bytes long and stands for a single byte.
2489     final byte[] result = new byte[input.size()];
2490     int pos = 0;
2491     for (int i = 0; i < input.size(); i++) {
2492       byte c = input.byteAt(i);
2493       if (c == '\\') {
2494         if (i + 1 < input.size()) {
2495           ++i;
2496           c = input.byteAt(i);
2497           if (isOctal(c)) {
2498             // Octal escape.
2499             int code = digitValue(c);
2500             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
2501               ++i;
2502               code = code * 8 + digitValue(input.byteAt(i));
2503             }
2504             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
2505               ++i;
2506               code = code * 8 + digitValue(input.byteAt(i));
2507             }
2508             // TODO: Check that 0 <= code && code <= 0xFF.
2509             result[pos++] = (byte) code;
2510           } else {
2511             switch (c) {
2512               case 'a':
2513                 result[pos++] = 0x07;
2514                 break;
2515               case 'b':
2516                 result[pos++] = '\b';
2517                 break;
2518               case 'f':
2519                 result[pos++] = '\f';
2520                 break;
2521               case 'n':
2522                 result[pos++] = '\n';
2523                 break;
2524               case 'r':
2525                 result[pos++] = '\r';
2526                 break;
2527               case 't':
2528                 result[pos++] = '\t';
2529                 break;
2530               case 'v':
2531                 result[pos++] = 0x0b;
2532                 break;
2533               case '\\':
2534                 result[pos++] = '\\';
2535                 break;
2536               case '\'':
2537                 result[pos++] = '\'';
2538                 break;
2539               case '"':
2540                 result[pos++] = '\"';
2541                 break;
2542               case '?':
2543                 result[pos++] = '?';
2544                 break;
2545 
2546               case 'x':
2547                 // hex escape
2548                 int code = 0;
2549                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
2550                   ++i;
2551                   code = digitValue(input.byteAt(i));
2552                 } else {
2553                   throw new InvalidEscapeSequenceException(
2554                       "Invalid escape sequence: '\\x' with no digits");
2555                 }
2556                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
2557                   ++i;
2558                   code = code * 16 + digitValue(input.byteAt(i));
2559                 }
2560                 result[pos++] = (byte) code;
2561                 break;
2562 
2563               case 'u':
2564                 // Unicode escape
2565                 ++i;
2566                 if (i + 3 < input.size()
2567                     && isHex(input.byteAt(i))
2568                     && isHex(input.byteAt(i + 1))
2569                     && isHex(input.byteAt(i + 2))
2570                     && isHex(input.byteAt(i + 3))) {
2571                   char ch =
2572                       (char)
2573                           (digitValue(input.byteAt(i)) << 12
2574                               | digitValue(input.byteAt(i + 1)) << 8
2575                               | digitValue(input.byteAt(i + 2)) << 4
2576                               | digitValue(input.byteAt(i + 3)));
2577 
2578                   if (ch >= Character.MIN_SURROGATE && ch <= Character.MAX_SURROGATE) {
2579                     throw new InvalidEscapeSequenceException(
2580                         "Invalid escape sequence: '\\u' refers to a surrogate");
2581                   }
2582                   byte[] chUtf8 = Character.toString(ch).getBytes(Internal.UTF_8);
2583                   System.arraycopy(chUtf8, 0, result, pos, chUtf8.length);
2584                   pos += chUtf8.length;
2585                   i += 3;
2586                 } else {
2587                   throw new InvalidEscapeSequenceException(
2588                       "Invalid escape sequence: '\\u' with too few hex chars");
2589                 }
2590                 break;
2591 
2592               case 'U':
2593                 // Unicode escape
2594                 ++i;
2595                 if (i + 7 >= input.size()) {
2596                   throw new InvalidEscapeSequenceException(
2597                       "Invalid escape sequence: '\\U' with too few hex chars");
2598                 }
2599                 int codepoint = 0;
2600                 for (int offset = i; offset < i + 8; offset++) {
2601                   byte b = input.byteAt(offset);
2602                   if (!isHex(b)) {
2603                     throw new InvalidEscapeSequenceException(
2604                         "Invalid escape sequence: '\\U' with too few hex chars");
2605                   }
2606                   codepoint = (codepoint << 4) | digitValue(b);
2607                 }
2608                 if (!Character.isValidCodePoint(codepoint)) {
2609                   throw new InvalidEscapeSequenceException(
2610                       "Invalid escape sequence: '\\U"
2611                           + input.substring(i, i + 8).toStringUtf8()
2612                           + "' is not a valid code point value");
2613                 }
2614                 Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codepoint);
2615                 if (unicodeBlock != null
2616                     && (unicodeBlock.equals(Character.UnicodeBlock.LOW_SURROGATES)
2617                         || unicodeBlock.equals(Character.UnicodeBlock.HIGH_SURROGATES)
2618                         || unicodeBlock.equals(
2619                             Character.UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES))) {
2620                   throw new InvalidEscapeSequenceException(
2621                       "Invalid escape sequence: '\\U"
2622                           + input.substring(i, i + 8).toStringUtf8()
2623                           + "' refers to a surrogate code unit");
2624                 }
2625                 int[] codepoints = new int[1];
2626                 codepoints[0] = codepoint;
2627                 byte[] chUtf8 = new String(codepoints, 0, 1).getBytes(Internal.UTF_8);
2628                 System.arraycopy(chUtf8, 0, result, pos, chUtf8.length);
2629                 pos += chUtf8.length;
2630                 i += 7;
2631                 break;
2632 
2633               default:
2634                 throw new InvalidEscapeSequenceException(
2635                     "Invalid escape sequence: '\\" + (char) c + '\'');
2636             }
2637           }
2638         } else {
2639           throw new InvalidEscapeSequenceException(
2640               "Invalid escape sequence: '\\' at end of string.");
2641         }
2642       } else {
2643         result[pos++] = c;
2644       }
2645     }
2646 
2647     return result.length == pos
2648         ? ByteString.wrap(result) // This reference has not been out of our control.
2649         : ByteString.copyFrom(result, 0, pos);
2650   }
2651 
2652   /**
2653    * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid
2654    * escape sequence is seen.
2655    */
2656   public static class InvalidEscapeSequenceException extends IOException {
2657     private static final long serialVersionUID = -8164033650142593304L;
2658 
InvalidEscapeSequenceException(final String description)2659     InvalidEscapeSequenceException(final String description) {
2660       super(description);
2661     }
2662   }
2663 
2664   /**
2665    * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are
2666    * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes,
2667    * it's weird.
2668    */
escapeText(final String input)2669   static String escapeText(final String input) {
2670     return escapeBytes(ByteString.copyFromUtf8(input));
2671   }
2672 
2673   /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */
escapeDoubleQuotesAndBackslashes(final String input)2674   public static String escapeDoubleQuotesAndBackslashes(final String input) {
2675     return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
2676   }
2677 
2678   /**
2679    * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes
2680    * (starting with "\x") are also recognized.
2681    */
unescapeText(final String input)2682   static String unescapeText(final String input) throws InvalidEscapeSequenceException {
2683     return unescapeBytes(input).toStringUtf8();
2684   }
2685 
2686   /** Is this an octal digit? */
isOctal(final byte c)2687   private static boolean isOctal(final byte c) {
2688     return '0' <= c && c <= '7';
2689   }
2690 
2691   /** Is this a hex digit? */
isHex(final byte c)2692   private static boolean isHex(final byte c) {
2693     return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
2694   }
2695 
2696   /**
2697    * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is
2698    * like {@code Character.digit()} but we don't accept non-ASCII digits.
2699    */
digitValue(final byte c)2700   private static int digitValue(final byte c) {
2701     if ('0' <= c && c <= '9') {
2702       return c - '0';
2703     } else if ('a' <= c && c <= 'z') {
2704       return c - 'a' + 10;
2705     } else {
2706       return c - 'A' + 10;
2707     }
2708   }
2709 
2710   /**
2711    * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code
2712    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2713    * and octal numbers, respectively.
2714    */
parseInt32(final String text)2715   static int parseInt32(final String text) throws NumberFormatException {
2716     return (int) parseInteger(text, true, false);
2717   }
2718 
2719   /**
2720    * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code
2721    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2722    * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned
2723    * since Java has no unsigned integer type.
2724    */
parseUInt32(final String text)2725   static int parseUInt32(final String text) throws NumberFormatException {
2726     return (int) parseInteger(text, false, false);
2727   }
2728 
2729   /**
2730    * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code
2731    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2732    * and octal numbers, respectively.
2733    */
parseInt64(final String text)2734   static long parseInt64(final String text) throws NumberFormatException {
2735     return parseInteger(text, true, true);
2736   }
2737 
2738   /**
2739    * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code
2740    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2741    * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned
2742    * since Java has no unsigned long type.
2743    */
parseUInt64(final String text)2744   static long parseUInt64(final String text) throws NumberFormatException {
2745     return parseInteger(text, false, true);
2746   }
2747 
parseInteger(final String text, final boolean isSigned, final boolean isLong)2748   private static long parseInteger(final String text, final boolean isSigned, final boolean isLong)
2749       throws NumberFormatException {
2750     int pos = 0;
2751 
2752     boolean negative = false;
2753     if (text.startsWith("-", pos)) {
2754       if (!isSigned) {
2755         throw new NumberFormatException("Number must be positive: " + text);
2756       }
2757       ++pos;
2758       negative = true;
2759     }
2760 
2761     int radix = 10;
2762     if (text.startsWith("0x", pos)) {
2763       pos += 2;
2764       radix = 16;
2765     } else if (text.startsWith("0", pos)) {
2766       radix = 8;
2767     }
2768 
2769     final String numberText = text.substring(pos);
2770 
2771     long result = 0;
2772     if (numberText.length() < 16) {
2773       // Can safely assume no overflow.
2774       result = Long.parseLong(numberText, radix);
2775       if (negative) {
2776         result = -result;
2777       }
2778 
2779       // Check bounds.
2780       // No need to check for 64-bit numbers since they'd have to be 16 chars
2781       // or longer to overflow.
2782       if (!isLong) {
2783         if (isSigned) {
2784           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
2785             throw new NumberFormatException(
2786                 "Number out of range for 32-bit signed integer: " + text);
2787           }
2788         } else {
2789           if (result >= (1L << 32) || result < 0) {
2790             throw new NumberFormatException(
2791                 "Number out of range for 32-bit unsigned integer: " + text);
2792           }
2793         }
2794       }
2795     } else {
2796       BigInteger bigValue = new BigInteger(numberText, radix);
2797       if (negative) {
2798         bigValue = bigValue.negate();
2799       }
2800 
2801       // Check bounds.
2802       if (!isLong) {
2803         if (isSigned) {
2804           if (bigValue.bitLength() > 31) {
2805             throw new NumberFormatException(
2806                 "Number out of range for 32-bit signed integer: " + text);
2807           }
2808         } else {
2809           if (bigValue.bitLength() > 32) {
2810             throw new NumberFormatException(
2811                 "Number out of range for 32-bit unsigned integer: " + text);
2812           }
2813         }
2814       } else {
2815         if (isSigned) {
2816           if (bigValue.bitLength() > 63) {
2817             throw new NumberFormatException(
2818                 "Number out of range for 64-bit signed integer: " + text);
2819           }
2820         } else {
2821           if (bigValue.bitLength() > 64) {
2822             throw new NumberFormatException(
2823                 "Number out of range for 64-bit unsigned integer: " + text);
2824           }
2825         }
2826       }
2827 
2828       result = bigValue.longValue();
2829     }
2830 
2831     return result;
2832   }
2833 }
2834