• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import static java.nio.charset.StandardCharsets.UTF_8;
34 
35 import com.google.protobuf.Descriptors.Descriptor;
36 import com.google.protobuf.Descriptors.EnumDescriptor;
37 import com.google.protobuf.Descriptors.EnumValueDescriptor;
38 import com.google.protobuf.Descriptors.FieldDescriptor;
39 import com.google.protobuf.MessageReflection.MergeTarget;
40 import java.io.IOException;
41 import java.math.BigInteger;
42 import java.nio.CharBuffer;
43 import java.util.ArrayList;
44 import java.util.Collections;
45 import java.util.List;
46 import java.util.Locale;
47 import java.util.Map;
48 import java.util.logging.Logger;
49 import java.util.regex.Matcher;
50 import java.util.regex.Pattern;
51 
52 /**
53  * Provide text parsing and formatting support for proto2 instances. The implementation largely
54  * follows google/protobuf/text_format.cc.
55  *
56  * @author wenboz@google.com Wenbo Zhu
57  * @author kenton@google.com Kenton Varda
58  */
59 public final class TextFormat {
TextFormat()60   private TextFormat() {}
61 
62   private static final Logger logger = Logger.getLogger(TextFormat.class.getName());
63 
64   /**
65    * Outputs a textual representation of the Protocol Message supplied into the parameter output.
66    * (This representation is the new version of the classic "ProtocolPrinter" output from the
67    * original Protocol Buffer system)
68    *
69    * @deprecated Use {@code printer().print(MessageOrBuilder, Appendable)}
70    */
71   @Deprecated
print(final MessageOrBuilder message, final Appendable output)72   public static void print(final MessageOrBuilder message, final Appendable output)
73       throws IOException {
74     printer().print(message, output);
75   }
76 
77   /**
78    * Outputs a textual representation of {@code fields} to {@code output}.
79    *
80    * @deprecated Use {@code printer().print(UnknownFieldSet, Appendable)}
81    */
82   @Deprecated
print(final UnknownFieldSet fields, final Appendable output)83   public static void print(final UnknownFieldSet fields, final Appendable output)
84       throws IOException {
85     printer().print(fields, output);
86   }
87 
88   /**
89    * Same as {@code print()}, except that non-ASCII characters are not escaped.
90    *
91    * @deprecated Use {@code printer().escapingNonAscii(false).print(MessageOrBuilder, Appendable)}
92    */
93   @Deprecated
printUnicode(final MessageOrBuilder message, final Appendable output)94   public static void printUnicode(final MessageOrBuilder message, final Appendable output)
95       throws IOException {
96     printer().escapingNonAscii(false).print(message, output);
97   }
98 
99   /**
100    * Same as {@code print()}, except that non-ASCII characters are not escaped.
101    *
102    * @deprecated Use {@code printer().escapingNonAscii(false).print(UnknownFieldSet, Appendable)}
103    */
104   @Deprecated
printUnicode(final UnknownFieldSet fields, final Appendable output)105   public static void printUnicode(final UnknownFieldSet fields, final Appendable output)
106       throws IOException {
107     printer().escapingNonAscii(false).print(fields, output);
108   }
109 
110   /**
111    * Generates a human readable form of this message, useful for debugging and other purposes, with
112    * no newline characters. This is just a trivial wrapper around
113    * {@link TextFormat.Printer#shortDebugString(MessageOrBuilder)}.
114    */
shortDebugString(final MessageOrBuilder message)115   public static String shortDebugString(final MessageOrBuilder message) {
116     return printer().shortDebugString(message);
117   }
118 
119   /**
120    * Generates a human readable form of the field, useful for debugging and other purposes, with
121    * no newline characters.
122    *
123    * @deprecated Use {@code printer().shortDebugString(FieldDescriptor, Object)}
124    */
125   @Deprecated
shortDebugString(final FieldDescriptor field, final Object value)126   public static String shortDebugString(final FieldDescriptor field, final Object value) {
127     return printer().shortDebugString(field, value);
128   }
129   //
130   /**
131    * Generates a human readable form of the unknown fields, useful for debugging and other
132    * purposes, with no newline characters.
133    *
134    * @deprecated Use {@code printer().shortDebugString(UnknownFieldSet)}
135    */
136   @Deprecated
shortDebugString(final UnknownFieldSet fields)137   public static String shortDebugString(final UnknownFieldSet fields) {
138     return printer().shortDebugString(fields);
139   }
140 
141   /**
142    * Like {@code print()}, but writes directly to a {@code String} and returns it.
143    *
144    * @deprecated Use {@code message.toString()}
145    */
146   @Deprecated
printToString(final MessageOrBuilder message)147   public static String printToString(final MessageOrBuilder message) {
148     return printer().printToString(message);
149   }
150 
151   /**
152    * Like {@code print()}, but writes directly to a {@code String} and returns it.
153    *
154    * @deprecated Use {@link UnknownFieldSet#toString()}
155    */
156   @Deprecated
printToString(final UnknownFieldSet fields)157   public static String printToString(final UnknownFieldSet fields) {
158     return printer().printToString(fields);
159   }
160 
161   /**
162    * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not
163    * escaped in backslash+octals.
164    *
165    * @deprecated Use {@code printer().escapingNonAscii(false).printToString(MessageOrBuilder)}
166    */
167   @Deprecated
printToUnicodeString(final MessageOrBuilder message)168   public static String printToUnicodeString(final MessageOrBuilder message) {
169     return printer().escapingNonAscii(false).printToString(message);
170   }
171 
172   /**
173    * Same as {@code printToString()}, except that non-ASCII characters in string type fields are
174    * not escaped in backslash+octals.
175    *
176    * @deprecated Use {@code printer().escapingNonAscii(false).printToString(UnknownFieldSet)}
177    */
178   @Deprecated
printToUnicodeString(final UnknownFieldSet fields)179   public static String printToUnicodeString(final UnknownFieldSet fields) {
180     return printer().escapingNonAscii(false).printToString(fields);
181   }
182   //
183   /** @deprecated Use {@code printer().printField(FieldDescriptor, Object, Appendable)} */
184   @Deprecated
printField( final FieldDescriptor field, final Object value, final Appendable output)185   public static void printField(
186       final FieldDescriptor field, final Object value, final Appendable output)
187       throws IOException {
188     printer().printField(field, value, output);
189   }
190   //
191   /** @deprecated Use {@code printer().printFieldToString(FieldDescriptor, Object)} */
192   @Deprecated
printFieldToString(final FieldDescriptor field, final Object value)193   public static String printFieldToString(final FieldDescriptor field, final Object value) {
194     return printer().printFieldToString(field, value);
195   }
196   //
197   /**
198    * Outputs a unicode textual representation of the value of given field value.
199    *
200    * <p>Same as {@code printFieldValue()}, except that non-ASCII characters in string type fields
201    * are not escaped in backslash+octals.
202    *
203    * @deprecated Use {@code printer().escapingNonAscii(false).printFieldValue(FieldDescriptor,
204    *     Object, Appendable)}
205    * @param field the descriptor of the field
206    * @param value the value of the field
207    * @param output the output to which to append the formatted value
208    * @throws ClassCastException if the value is not appropriate for the given field descriptor
209    * @throws IOException if there is an exception writing to the output
210    */
211   @Deprecated
printUnicodeFieldValue( final FieldDescriptor field, final Object value, final Appendable output)212   public static void printUnicodeFieldValue(
213       final FieldDescriptor field, final Object value, final Appendable output)
214       throws IOException {
215     printer().escapingNonAscii(false).printFieldValue(field, value, output);
216   }
217 
218   /**
219    * Outputs a textual representation of the value of given field value.
220    *
221    * @deprecated Use {@code printer().printFieldValue(FieldDescriptor, Object, Appendable)}
222    * @param field the descriptor of the field
223    * @param value the value of the field
224    * @param output the output to which to append the formatted value
225    * @throws ClassCastException if the value is not appropriate for the given field descriptor
226    * @throws IOException if there is an exception writing to the output
227    */
228   @Deprecated
printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)229   public static void printFieldValue(
230       final FieldDescriptor field, final Object value, final Appendable output) throws IOException {
231     printer().printFieldValue(field, value, output);
232   }
233 
234   /**
235    * Outputs a textual representation of the value of an unknown field.
236    *
237    * @param tag the field's tag number
238    * @param value the value of the field
239    * @param output the output to which to append the formatted value
240    * @throws ClassCastException if the value is not appropriate for the given field descriptor
241    * @throws IOException if there is an exception writing to the output
242    */
printUnknownFieldValue( final int tag, final Object value, final Appendable output)243   public static void printUnknownFieldValue(
244       final int tag, final Object value, final Appendable output) throws IOException {
245     printUnknownFieldValue(tag, value, multiLineOutput(output));
246   }
247 
printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)248   private static void printUnknownFieldValue(
249       final int tag, final Object value, final TextGenerator generator) throws IOException {
250     switch (WireFormat.getTagWireType(tag)) {
251       case WireFormat.WIRETYPE_VARINT:
252         generator.print(unsignedToString((Long) value));
253         break;
254       case WireFormat.WIRETYPE_FIXED32:
255         generator.print(String.format((Locale) null, "0x%08x", (Integer) value));
256         break;
257       case WireFormat.WIRETYPE_FIXED64:
258         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
259         break;
260       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
261         try {
262           // Try to parse and print the field as an embedded message
263           UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
264           generator.print("{");
265           generator.eol();
266           generator.indent();
267           Printer.printUnknownFields(message, generator);
268           generator.outdent();
269           generator.print("}");
270         } catch (InvalidProtocolBufferException e) {
271           // If not parseable as a message, print as a String
272           generator.print("\"");
273           generator.print(escapeBytes((ByteString) value));
274           generator.print("\"");
275         }
276         break;
277       case WireFormat.WIRETYPE_START_GROUP:
278         Printer.printUnknownFields((UnknownFieldSet) value, generator);
279         break;
280       default:
281         throw new IllegalArgumentException("Bad tag: " + tag);
282     }
283   }
284 
285   /** Printer instance which escapes non-ASCII characters. */
printer()286   public static Printer printer() {
287     return Printer.DEFAULT;
288   }
289 
290   /** Helper class for converting protobufs to text. */
291   public static final class Printer {
292 
293     // Printer instance which escapes non-ASCII characters.
294     private static final Printer DEFAULT = new Printer(true, TypeRegistry.getEmptyTypeRegistry());
295 
296     /** Whether to escape non ASCII characters with backslash and octal. */
297     private final boolean escapeNonAscii;
298 
299     private final TypeRegistry typeRegistry;
300 
Printer(boolean escapeNonAscii, TypeRegistry typeRegistry)301     private Printer(boolean escapeNonAscii, TypeRegistry typeRegistry) {
302       this.escapeNonAscii = escapeNonAscii;
303       this.typeRegistry = typeRegistry;
304     }
305 
306     /**
307      * Return a new Printer instance with the specified escape mode.
308      *
309      * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the
310      *     default behavior. If false, the new Printer will print non-ASCII characters as is. In
311      *     either case, the new Printer still escapes newlines and quotes in strings.
312      * @return a new Printer that clones all other configurations from the current {@link Printer},
313      *     with the escape mode set to the given parameter.
314      */
escapingNonAscii(boolean escapeNonAscii)315     public Printer escapingNonAscii(boolean escapeNonAscii) {
316       return new Printer(escapeNonAscii, typeRegistry);
317     }
318 
319     /**
320      * Creates a new {@link Printer} using the given typeRegistry. The new Printer clones all other
321      * configurations from the current {@link Printer}.
322      *
323      * @throws IllegalArgumentException if a registry is already set.
324      */
usingTypeRegistry(TypeRegistry typeRegistry)325     public Printer usingTypeRegistry(TypeRegistry typeRegistry) {
326       if (this.typeRegistry != TypeRegistry.getEmptyTypeRegistry()) {
327         throw new IllegalArgumentException("Only one typeRegistry is allowed.");
328       }
329       return new Printer(escapeNonAscii, typeRegistry);
330     }
331 
332     /**
333      * Outputs a textual representation of the Protocol Message supplied into the parameter output.
334      * (This representation is the new version of the classic "ProtocolPrinter" output from the
335      * original Protocol Buffer system)
336      */
print(final MessageOrBuilder message, final Appendable output)337     public void print(final MessageOrBuilder message, final Appendable output) throws IOException {
338       print(message, multiLineOutput(output));
339     }
340 
341     /** Outputs a textual representation of {@code fields} to {@code output}. */
print(final UnknownFieldSet fields, final Appendable output)342     public void print(final UnknownFieldSet fields, final Appendable output) throws IOException {
343       printUnknownFields(fields, multiLineOutput(output));
344     }
345 
print(final MessageOrBuilder message, final TextGenerator generator)346     private void print(final MessageOrBuilder message, final TextGenerator generator)
347         throws IOException {
348       if (message.getDescriptorForType().getFullName().equals("google.protobuf.Any")
349           && printAny(message, generator)) {
350         return;
351       }
352       printMessage(message, generator);
353     }
354 
355     /**
356      * Attempt to print the 'google.protobuf.Any' message in a human-friendly format. Returns false
357      * if the message isn't a valid 'google.protobuf.Any' message (in which case the message should
358      * be rendered just like a regular message to help debugging).
359      */
printAny(final MessageOrBuilder message, final TextGenerator generator)360     private boolean printAny(final MessageOrBuilder message, final TextGenerator generator)
361         throws IOException {
362       Descriptor messageType = message.getDescriptorForType();
363       FieldDescriptor typeUrlField = messageType.findFieldByNumber(1);
364       FieldDescriptor valueField = messageType.findFieldByNumber(2);
365       if (typeUrlField == null
366           || typeUrlField.getType() != FieldDescriptor.Type.STRING
367           || valueField == null
368           || valueField.getType() != FieldDescriptor.Type.BYTES) {
369         // The message may look like an Any but isn't actually an Any message (might happen if the
370         // user tries to use DynamicMessage to construct an Any from incomplete Descriptor).
371         return false;
372       }
373       String typeUrl = (String) message.getField(typeUrlField);
374       // If type_url is not set, we will not be able to decode the content of the value, so just
375       // print out the Any like a regular message.
376       if (typeUrl.isEmpty()) {
377         return false;
378       }
379       Object value = message.getField(valueField);
380 
381       Message.Builder contentBuilder = null;
382       try {
383         Descriptor contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl);
384         if (contentType == null) {
385           return false;
386         }
387         contentBuilder = DynamicMessage.getDefaultInstance(contentType).newBuilderForType();
388         contentBuilder.mergeFrom((ByteString) value);
389       } catch (InvalidProtocolBufferException e) {
390         // The value of Any is malformed. We cannot print it out nicely, so fallback to printing out
391         // the type_url and value as bytes. Note that we fail open here to be consistent with
392         // text_format.cc, and also to allow a way for users to inspect the content of the broken
393         // message.
394         return false;
395       }
396       generator.print("[");
397       generator.print(typeUrl);
398       generator.print("] {");
399       generator.eol();
400       generator.indent();
401       print(contentBuilder, generator);
402       generator.outdent();
403       generator.print("}");
404       generator.eol();
405       return true;
406     }
407 
printFieldToString(final FieldDescriptor field, final Object value)408     public String printFieldToString(final FieldDescriptor field, final Object value) {
409       try {
410         final StringBuilder text = new StringBuilder();
411         printField(field, value, text);
412         return text.toString();
413       } catch (IOException e) {
414         throw new IllegalStateException(e);
415       }
416     }
417 
printField(final FieldDescriptor field, final Object value, final Appendable output)418     public void printField(final FieldDescriptor field, final Object value, final Appendable output)
419         throws IOException {
420       printField(field, value, multiLineOutput(output));
421     }
422 
printField( final FieldDescriptor field, final Object value, final TextGenerator generator)423     private void printField(
424         final FieldDescriptor field, final Object value, final TextGenerator generator)
425         throws IOException {
426       // Sort map field entries by key
427       if (field.isMapField()) {
428         List<MapEntryAdapter> adapters = new ArrayList<>();
429         for (Object entry : (List<?>) value) {
430           adapters.add(new MapEntryAdapter(entry, field));
431         }
432         Collections.sort(adapters);
433         for (MapEntryAdapter adapter : adapters) {
434           printSingleField(field, adapter.getEntry(), generator);
435         }
436       } else if (field.isRepeated()) {
437         // Repeated field.  Print each element.
438         for (Object element : (List<?>) value) {
439           printSingleField(field, element, generator);
440         }
441       } else {
442         printSingleField(field, value, generator);
443       }
444     }
445 
446     /**
447      * An adapter class that can take a MapEntry or a MutableMapEntry and returns its key and entry.
448      * This class is created solely for the purpose of sorting map entries by its key and prevent
449      * duplicated logic by having a separate comparator for MapEntry and MutableMapEntry.
450      */
451     private static class MapEntryAdapter implements Comparable<MapEntryAdapter> {
452       private Object entry;
453 
454       @SuppressWarnings({"rawtypes"})
455       private MapEntry mapEntry;
456 
457 
458       private final FieldDescriptor.JavaType fieldType;
459 
MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor)460       public MapEntryAdapter(Object entry, FieldDescriptor fieldDescriptor) {
461         if (entry instanceof MapEntry) {
462           this.mapEntry = (MapEntry) entry;
463         } else {
464           this.entry = entry;
465         }
466         this.fieldType = extractFieldType(fieldDescriptor);
467       }
468 
extractFieldType(FieldDescriptor fieldDescriptor)469       private static FieldDescriptor.JavaType extractFieldType(FieldDescriptor fieldDescriptor) {
470         return fieldDescriptor.getMessageType().getFields().get(0).getJavaType();
471       }
472 
getKey()473       public Object getKey() {
474         if (mapEntry != null) {
475           return mapEntry.getKey();
476         }
477         return null;
478       }
479 
getEntry()480       public Object getEntry() {
481         if (mapEntry != null) {
482           return mapEntry;
483         }
484         return entry;
485       }
486 
487       @Override
compareTo(MapEntryAdapter b)488       public int compareTo(MapEntryAdapter b) {
489         if (getKey() == null || b.getKey() == null) {
490           logger.info("Invalid key for map field.");
491           return -1;
492         }
493         switch (fieldType) {
494           case BOOLEAN:
495             return Boolean.compare((boolean) getKey(), (boolean) b.getKey());
496           case LONG:
497             return Long.compare((long) getKey(), (long) b.getKey());
498           case INT:
499             return Integer.compare((int) getKey(), (int) b.getKey());
500           case STRING:
501             String aString = (String) getKey();
502             String bString = (String) b.getKey();
503             if (aString == null && bString == null) {
504               return 0;
505             } else if (aString == null && bString != null) {
506               return -1;
507             } else if (aString != null && bString == null) {
508               return 1;
509             } else {
510               return aString.compareTo(bString);
511             }
512           default:
513             return 0;
514         }
515       }
516     }
517 
518     /**
519      * Outputs a textual representation of the value of given field value.
520      *
521      * @param field the descriptor of the field
522      * @param value the value of the field
523      * @param output the output to which to append the formatted value
524      * @throws ClassCastException if the value is not appropriate for the given field descriptor
525      * @throws IOException if there is an exception writing to the output
526      */
printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)527     public void printFieldValue(
528         final FieldDescriptor field, final Object value, final Appendable output)
529         throws IOException {
530       printFieldValue(field, value, multiLineOutput(output));
531     }
532 
printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)533     private void printFieldValue(
534         final FieldDescriptor field, final Object value, final TextGenerator generator)
535         throws IOException {
536       switch (field.getType()) {
537         case INT32:
538         case SINT32:
539         case SFIXED32:
540           generator.print(((Integer) value).toString());
541           break;
542 
543         case INT64:
544         case SINT64:
545         case SFIXED64:
546           generator.print(((Long) value).toString());
547           break;
548 
549         case BOOL:
550           generator.print(((Boolean) value).toString());
551           break;
552 
553         case FLOAT:
554           generator.print(((Float) value).toString());
555           break;
556 
557         case DOUBLE:
558           generator.print(((Double) value).toString());
559           break;
560 
561         case UINT32:
562         case FIXED32:
563           generator.print(unsignedToString((Integer) value));
564           break;
565 
566         case UINT64:
567         case FIXED64:
568           generator.print(unsignedToString((Long) value));
569           break;
570 
571         case STRING:
572           generator.print("\"");
573           generator.print(
574               escapeNonAscii
575                   ? TextFormatEscaper.escapeText((String) value)
576                   : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n"));
577           generator.print("\"");
578           break;
579 
580         case BYTES:
581           generator.print("\"");
582           if (value instanceof ByteString) {
583             generator.print(escapeBytes((ByteString) value));
584           } else {
585             generator.print(escapeBytes((byte[]) value));
586           }
587           generator.print("\"");
588           break;
589 
590         case ENUM:
591           generator.print(((EnumValueDescriptor) value).getName());
592           break;
593 
594         case MESSAGE:
595         case GROUP:
596           print((MessageOrBuilder) value, generator);
597           break;
598       }
599     }
600 
601     /** Like {@code print()}, but writes directly to a {@code String} and returns it. */
printToString(final MessageOrBuilder message)602     public String printToString(final MessageOrBuilder message) {
603       try {
604         final StringBuilder text = new StringBuilder();
605         print(message, text);
606         return text.toString();
607       } catch (IOException e) {
608         throw new IllegalStateException(e);
609       }
610     }
611     /** Like {@code print()}, but writes directly to a {@code String} and returns it. */
printToString(final UnknownFieldSet fields)612     public String printToString(final UnknownFieldSet fields) {
613       try {
614         final StringBuilder text = new StringBuilder();
615         print(fields, text);
616         return text.toString();
617       } catch (IOException e) {
618         throw new IllegalStateException(e);
619       }
620     }
621 
622     /**
623      * Generates a human readable form of this message, useful for debugging and other purposes,
624      * with no newline characters.
625      */
shortDebugString(final MessageOrBuilder message)626     public String shortDebugString(final MessageOrBuilder message) {
627       try {
628         final StringBuilder text = new StringBuilder();
629         print(message, singleLineOutput(text));
630         return text.toString();
631       } catch (IOException e) {
632         throw new IllegalStateException(e);
633       }
634     }
635 
636     /**
637      * Generates a human readable form of the field, useful for debugging and other purposes, with
638      * no newline characters.
639      */
shortDebugString(final FieldDescriptor field, final Object value)640     public String shortDebugString(final FieldDescriptor field, final Object value) {
641       try {
642         final StringBuilder text = new StringBuilder();
643         printField(field, value, singleLineOutput(text));
644         return text.toString();
645       } catch (IOException e) {
646         throw new IllegalStateException(e);
647       }
648     }
649 
650     /**
651      * Generates a human readable form of the unknown fields, useful for debugging and other
652      * purposes, with no newline characters.
653      */
shortDebugString(final UnknownFieldSet fields)654     public String shortDebugString(final UnknownFieldSet fields) {
655       try {
656         final StringBuilder text = new StringBuilder();
657         printUnknownFields(fields, singleLineOutput(text));
658         return text.toString();
659       } catch (IOException e) {
660         throw new IllegalStateException(e);
661       }
662     }
663 
printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)664     private static void printUnknownFieldValue(
665         final int tag, final Object value, final TextGenerator generator) throws IOException {
666       switch (WireFormat.getTagWireType(tag)) {
667         case WireFormat.WIRETYPE_VARINT:
668           generator.print(unsignedToString((Long) value));
669           break;
670         case WireFormat.WIRETYPE_FIXED32:
671           generator.print(String.format((Locale) null, "0x%08x", (Integer) value));
672           break;
673         case WireFormat.WIRETYPE_FIXED64:
674           generator.print(String.format((Locale) null, "0x%016x", (Long) value));
675           break;
676         case WireFormat.WIRETYPE_LENGTH_DELIMITED:
677           try {
678             // Try to parse and print the field as an embedded message
679             UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
680             generator.print("{");
681             generator.eol();
682             generator.indent();
683             printUnknownFields(message, generator);
684             generator.outdent();
685             generator.print("}");
686           } catch (InvalidProtocolBufferException e) {
687             // If not parseable as a message, print as a String
688             generator.print("\"");
689             generator.print(escapeBytes((ByteString) value));
690             generator.print("\"");
691           }
692           break;
693         case WireFormat.WIRETYPE_START_GROUP:
694           printUnknownFields((UnknownFieldSet) value, generator);
695           break;
696         default:
697           throw new IllegalArgumentException("Bad tag: " + tag);
698       }
699     }
700 
printMessage(final MessageOrBuilder message, final TextGenerator generator)701     private void printMessage(final MessageOrBuilder message, final TextGenerator generator)
702         throws IOException {
703       for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) {
704         printField(field.getKey(), field.getValue(), generator);
705       }
706       printUnknownFields(message.getUnknownFields(), generator);
707     }
708 
printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)709     private void printSingleField(
710         final FieldDescriptor field, final Object value, final TextGenerator generator)
711         throws IOException {
712       if (field.isExtension()) {
713         generator.print("[");
714         // We special-case MessageSet elements for compatibility with proto1.
715         if (field.getContainingType().getOptions().getMessageSetWireFormat()
716             && (field.getType() == FieldDescriptor.Type.MESSAGE)
717             && (field.isOptional())
718             // object equality
719             && (field.getExtensionScope() == field.getMessageType())) {
720           generator.print(field.getMessageType().getFullName());
721         } else {
722           generator.print(field.getFullName());
723         }
724         generator.print("]");
725       } else {
726         if (field.getType() == FieldDescriptor.Type.GROUP) {
727           // Groups must be serialized with their original capitalization.
728           generator.print(field.getMessageType().getName());
729         } else {
730           generator.print(field.getName());
731         }
732       }
733 
734       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
735         generator.print(" {");
736         generator.eol();
737         generator.indent();
738       } else {
739         generator.print(": ");
740       }
741 
742       printFieldValue(field, value, generator);
743 
744       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
745         generator.outdent();
746         generator.print("}");
747       }
748       generator.eol();
749     }
750 
printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator)751     private static void printUnknownFields(
752         final UnknownFieldSet unknownFields, final TextGenerator generator) throws IOException {
753       for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) {
754         final int number = entry.getKey();
755         final UnknownFieldSet.Field field = entry.getValue();
756         printUnknownField(number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator);
757         printUnknownField(number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator);
758         printUnknownField(number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator);
759         printUnknownField(
760             number,
761             WireFormat.WIRETYPE_LENGTH_DELIMITED,
762             field.getLengthDelimitedList(),
763             generator);
764         for (final UnknownFieldSet value : field.getGroupList()) {
765           generator.print(entry.getKey().toString());
766           generator.print(" {");
767           generator.eol();
768           generator.indent();
769           printUnknownFields(value, generator);
770           generator.outdent();
771           generator.print("}");
772           generator.eol();
773         }
774       }
775     }
776 
printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator)777     private static void printUnknownField(
778         final int number, final int wireType, final List<?> values, final TextGenerator generator)
779         throws IOException {
780       for (final Object value : values) {
781         generator.print(String.valueOf(number));
782         generator.print(": ");
783         printUnknownFieldValue(wireType, value, generator);
784         generator.eol();
785       }
786     }
787   }
788 
789   /** Convert an unsigned 32-bit integer to a string. */
unsignedToString(final int value)790   public static String unsignedToString(final int value) {
791     if (value >= 0) {
792       return Integer.toString(value);
793     } else {
794       return Long.toString(value & 0x00000000FFFFFFFFL);
795     }
796   }
797 
798   /** Convert an unsigned 64-bit integer to a string. */
unsignedToString(final long value)799   public static String unsignedToString(final long value) {
800     if (value >= 0) {
801       return Long.toString(value);
802     } else {
803       // Pull off the most-significant bit so that BigInteger doesn't think
804       // the number is negative, then set it again using setBit().
805       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString();
806     }
807   }
808 
multiLineOutput(Appendable output)809   private static TextGenerator multiLineOutput(Appendable output) {
810     return new TextGenerator(output, false);
811   }
812 
singleLineOutput(Appendable output)813   private static TextGenerator singleLineOutput(Appendable output) {
814     return new TextGenerator(output, true);
815   }
816 
817   /** An inner class for writing text to the output stream. */
818   private static final class TextGenerator {
819     private final Appendable output;
820     private final StringBuilder indent = new StringBuilder();
821     private final boolean singleLineMode;
822     // While technically we are "at the start of a line" at the very beginning of the output, all
823     // we would do in response to this is emit the (zero length) indentation, so it has no effect.
824     // Setting it false here does however suppress an unwanted leading space in single-line mode.
825     private boolean atStartOfLine = false;
826 
TextGenerator(final Appendable output, boolean singleLineMode)827     private TextGenerator(final Appendable output, boolean singleLineMode) {
828       this.output = output;
829       this.singleLineMode = singleLineMode;
830     }
831 
832     /**
833      * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the
834      * beginning of each line of text. Indent() may be called multiple times to produce deeper
835      * indents.
836      */
indent()837     public void indent() {
838       indent.append("  ");
839     }
840 
841     /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */
outdent()842     public void outdent() {
843       final int length = indent.length();
844       if (length == 0) {
845         throw new IllegalArgumentException(" Outdent() without matching Indent().");
846       }
847       indent.setLength(length - 2);
848     }
849 
850     /**
851      * Print text to the output stream. Bare newlines are never expected to be passed to this
852      * method; to indicate the end of a line, call "eol()".
853      */
print(final CharSequence text)854     public void print(final CharSequence text) throws IOException {
855       if (atStartOfLine) {
856         atStartOfLine = false;
857         output.append(singleLineMode ? " " : indent);
858       }
859       output.append(text);
860     }
861 
862     /**
863      * Signifies reaching the "end of the current line" in the output. In single-line mode, this
864      * does not result in a newline being emitted, but ensures that a separating space is written
865      * before the next output.
866      */
eol()867     public void eol() throws IOException {
868       if (!singleLineMode) {
869         output.append("\n");
870       }
871       atStartOfLine = true;
872     }
873   }
874 
875   // =================================================================
876   // Parsing
877 
878   /**
879    * Represents a stream of tokens parsed from a {@code String}.
880    *
881    * <p>The Java standard library provides many classes that you might think would be useful for
882    * implementing this, but aren't. For example:
883    *
884    * <ul>
885    *   <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something
886    *       that would get us close to what we want -- except for one fatal flaw: It automatically
887    *       un-escapes strings using Java escape sequences, which do not include all the escape
888    *       sequences we need to support (e.g. '\x').
889    *   <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular
890    *       expressions out of a stream (so we wouldn't have to load the entire input into a single
891    *       string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with
892    *       some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and
893    *       ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code
894    *       Scanner} provides no way to inspect the contents of delimiters, making it impossible to
895    *       keep track of line and column numbers.
896    * </ul>
897    *
898    * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need
899    * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least.
900    * Unfortunately, this implies that we need to have the entire input in one contiguous string.
901    */
902   private static final class Tokenizer {
903     private final CharSequence text;
904     private final Matcher matcher;
905     private String currentToken;
906 
907     // The character index within this.text at which the current token begins.
908     private int pos = 0;
909 
910     // The line and column numbers of the current token.
911     private int line = 0;
912     private int column = 0;
913 
914     // The line and column numbers of the previous token (allows throwing
915     // errors *after* consuming).
916     private int previousLine = 0;
917     private int previousColumn = 0;
918 
919     // We use possessive quantifiers (*+ and ++) because otherwise the Java
920     // regex matcher has stack overflows on large inputs.
921     private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
922     private static final Pattern TOKEN =
923         Pattern.compile(
924             "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier
925                 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number
926                 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string
927                 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string
928             Pattern.MULTILINE);
929 
930     private static final Pattern DOUBLE_INFINITY =
931         Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE);
932     private static final Pattern FLOAT_INFINITY =
933         Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE);
934     private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE);
935 
936     /** Construct a tokenizer that parses tokens from the given text. */
Tokenizer(final CharSequence text)937     private Tokenizer(final CharSequence text) {
938       this.text = text;
939       this.matcher = WHITESPACE.matcher(text);
940       skipWhitespace();
941       nextToken();
942     }
943 
getPreviousLine()944     int getPreviousLine() {
945       return previousLine;
946     }
947 
getPreviousColumn()948     int getPreviousColumn() {
949       return previousColumn;
950     }
951 
getLine()952     int getLine() {
953       return line;
954     }
955 
getColumn()956     int getColumn() {
957       return column;
958     }
959 
960     /** Are we at the end of the input? */
atEnd()961     public boolean atEnd() {
962       return currentToken.length() == 0;
963     }
964 
965     /** Advance to the next token. */
nextToken()966     public void nextToken() {
967       previousLine = line;
968       previousColumn = column;
969 
970       // Advance the line counter to the current position.
971       while (pos < matcher.regionStart()) {
972         if (text.charAt(pos) == '\n') {
973           ++line;
974           column = 0;
975         } else {
976           ++column;
977         }
978         ++pos;
979       }
980 
981       // Match the next token.
982       if (matcher.regionStart() == matcher.regionEnd()) {
983         // EOF
984         currentToken = "";
985       } else {
986         matcher.usePattern(TOKEN);
987         if (matcher.lookingAt()) {
988           currentToken = matcher.group();
989           matcher.region(matcher.end(), matcher.regionEnd());
990         } else {
991           // Take one character.
992           currentToken = String.valueOf(text.charAt(pos));
993           matcher.region(pos + 1, matcher.regionEnd());
994         }
995 
996         skipWhitespace();
997       }
998     }
999 
1000     /** Skip over any whitespace so that the matcher region starts at the next token. */
skipWhitespace()1001     private void skipWhitespace() {
1002       matcher.usePattern(WHITESPACE);
1003       if (matcher.lookingAt()) {
1004         matcher.region(matcher.end(), matcher.regionEnd());
1005       }
1006     }
1007 
1008     /**
1009      * If the next token exactly matches {@code token}, consume it and return {@code true}.
1010      * Otherwise, return {@code false} without doing anything.
1011      */
tryConsume(final String token)1012     public boolean tryConsume(final String token) {
1013       if (currentToken.equals(token)) {
1014         nextToken();
1015         return true;
1016       } else {
1017         return false;
1018       }
1019     }
1020 
1021     /**
1022      * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link
1023      * ParseException}.
1024      */
consume(final String token)1025     public void consume(final String token) throws ParseException {
1026       if (!tryConsume(token)) {
1027         throw parseException("Expected \"" + token + "\".");
1028       }
1029     }
1030 
1031     /** Returns {@code true} if the next token is an integer, but does not consume it. */
lookingAtInteger()1032     public boolean lookingAtInteger() {
1033       if (currentToken.length() == 0) {
1034         return false;
1035       }
1036 
1037       final char c = currentToken.charAt(0);
1038       return ('0' <= c && c <= '9') || c == '-' || c == '+';
1039     }
1040 
1041     /** Returns {@code true} if the current token's text is equal to that specified. */
lookingAt(String text)1042     public boolean lookingAt(String text) {
1043       return currentToken.equals(text);
1044     }
1045 
1046     /**
1047      * If the next token is an identifier, consume it and return its value. Otherwise, throw a
1048      * {@link ParseException}.
1049      */
consumeIdentifier()1050     public String consumeIdentifier() throws ParseException {
1051       for (int i = 0; i < currentToken.length(); i++) {
1052         final char c = currentToken.charAt(i);
1053         if (('a' <= c && c <= 'z')
1054             || ('A' <= c && c <= 'Z')
1055             || ('0' <= c && c <= '9')
1056             || (c == '_')
1057             || (c == '.')) {
1058           // OK
1059         } else {
1060           throw parseException("Expected identifier. Found '" + currentToken + "'");
1061         }
1062       }
1063 
1064       final String result = currentToken;
1065       nextToken();
1066       return result;
1067     }
1068 
1069     /**
1070      * If the next token is an identifier, consume it and return {@code true}. Otherwise, return
1071      * {@code false} without doing anything.
1072      */
tryConsumeIdentifier()1073     public boolean tryConsumeIdentifier() {
1074       try {
1075         consumeIdentifier();
1076         return true;
1077       } catch (ParseException e) {
1078         return false;
1079       }
1080     }
1081 
1082     /**
1083      * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise,
1084      * throw a {@link ParseException}.
1085      */
consumeInt32()1086     public int consumeInt32() throws ParseException {
1087       try {
1088         final int result = parseInt32(currentToken);
1089         nextToken();
1090         return result;
1091       } catch (NumberFormatException e) {
1092         throw integerParseException(e);
1093       }
1094     }
1095 
1096     /**
1097      * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise,
1098      * throw a {@link ParseException}.
1099      */
consumeUInt32()1100     public int consumeUInt32() throws ParseException {
1101       try {
1102         final int result = parseUInt32(currentToken);
1103         nextToken();
1104         return result;
1105       } catch (NumberFormatException e) {
1106         throw integerParseException(e);
1107       }
1108     }
1109 
1110     /**
1111      * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise,
1112      * throw a {@link ParseException}.
1113      */
consumeInt64()1114     public long consumeInt64() throws ParseException {
1115       try {
1116         final long result = parseInt64(currentToken);
1117         nextToken();
1118         return result;
1119       } catch (NumberFormatException e) {
1120         throw integerParseException(e);
1121       }
1122     }
1123 
1124     /**
1125      * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise,
1126      * return {@code false} without doing anything.
1127      */
tryConsumeInt64()1128     public boolean tryConsumeInt64() {
1129       try {
1130         consumeInt64();
1131         return true;
1132       } catch (ParseException e) {
1133         return false;
1134       }
1135     }
1136 
1137     /**
1138      * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise,
1139      * throw a {@link ParseException}.
1140      */
consumeUInt64()1141     public long consumeUInt64() throws ParseException {
1142       try {
1143         final long result = parseUInt64(currentToken);
1144         nextToken();
1145         return result;
1146       } catch (NumberFormatException e) {
1147         throw integerParseException(e);
1148       }
1149     }
1150 
1151     /**
1152      * If the next token is a 64-bit unsigned integer, consume it and return {@code true}.
1153      * Otherwise, return {@code false} without doing anything.
1154      */
tryConsumeUInt64()1155     public boolean tryConsumeUInt64() {
1156       try {
1157         consumeUInt64();
1158         return true;
1159       } catch (ParseException e) {
1160         return false;
1161       }
1162     }
1163 
1164     /**
1165      * If the next token is a double, consume it and return its value. Otherwise, throw a {@link
1166      * ParseException}.
1167      */
consumeDouble()1168     public double consumeDouble() throws ParseException {
1169       // We need to parse infinity and nan separately because
1170       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
1171       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
1172         final boolean negative = currentToken.startsWith("-");
1173         nextToken();
1174         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
1175       }
1176       if (currentToken.equalsIgnoreCase("nan")) {
1177         nextToken();
1178         return Double.NaN;
1179       }
1180       try {
1181         final double result = Double.parseDouble(currentToken);
1182         nextToken();
1183         return result;
1184       } catch (NumberFormatException e) {
1185         throw floatParseException(e);
1186       }
1187     }
1188 
1189     /**
1190      * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code
1191      * false} without doing anything.
1192      */
tryConsumeDouble()1193     public boolean tryConsumeDouble() {
1194       try {
1195         consumeDouble();
1196         return true;
1197       } catch (ParseException e) {
1198         return false;
1199       }
1200     }
1201 
1202     /**
1203      * If the next token is a float, consume it and return its value. Otherwise, throw a {@link
1204      * ParseException}.
1205      */
consumeFloat()1206     public float consumeFloat() throws ParseException {
1207       // We need to parse infinity and nan separately because
1208       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
1209       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
1210         final boolean negative = currentToken.startsWith("-");
1211         nextToken();
1212         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
1213       }
1214       if (FLOAT_NAN.matcher(currentToken).matches()) {
1215         nextToken();
1216         return Float.NaN;
1217       }
1218       try {
1219         final float result = Float.parseFloat(currentToken);
1220         nextToken();
1221         return result;
1222       } catch (NumberFormatException e) {
1223         throw floatParseException(e);
1224       }
1225     }
1226 
1227     /**
1228      * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code
1229      * false} without doing anything.
1230      */
tryConsumeFloat()1231     public boolean tryConsumeFloat() {
1232       try {
1233         consumeFloat();
1234         return true;
1235       } catch (ParseException e) {
1236         return false;
1237       }
1238     }
1239 
1240     /**
1241      * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link
1242      * ParseException}.
1243      */
consumeBoolean()1244     public boolean consumeBoolean() throws ParseException {
1245       if (currentToken.equals("true")
1246           || currentToken.equals("True")
1247           || currentToken.equals("t")
1248           || currentToken.equals("1")) {
1249         nextToken();
1250         return true;
1251       } else if (currentToken.equals("false")
1252           || currentToken.equals("False")
1253           || currentToken.equals("f")
1254           || currentToken.equals("0")) {
1255         nextToken();
1256         return false;
1257       } else {
1258         throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\".");
1259       }
1260     }
1261 
1262     /**
1263      * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw
1264      * a {@link ParseException}.
1265      */
consumeString()1266     public String consumeString() throws ParseException {
1267       return consumeByteString().toStringUtf8();
1268     }
1269 
1270     /** If the next token is a string, consume it and return true. Otherwise, return false. */
tryConsumeString()1271     public boolean tryConsumeString() {
1272       try {
1273         consumeString();
1274         return true;
1275       } catch (ParseException e) {
1276         return false;
1277       }
1278     }
1279 
1280     /**
1281      * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return
1282      * it. Otherwise, throw a {@link ParseException}.
1283      */
consumeByteString()1284     public ByteString consumeByteString() throws ParseException {
1285       List<ByteString> list = new ArrayList<ByteString>();
1286       consumeByteString(list);
1287       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
1288         consumeByteString(list);
1289       }
1290       return ByteString.copyFrom(list);
1291     }
1292 
1293     /**
1294      * Like {@link #consumeByteString()} but adds each token of the string to the given list. String
1295      * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically
1296      * concatenated, like in C or Python.
1297      */
consumeByteString(List<ByteString> list)1298     private void consumeByteString(List<ByteString> list) throws ParseException {
1299       final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0';
1300       if (quote != '\"' && quote != '\'') {
1301         throw parseException("Expected string.");
1302       }
1303 
1304       if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) {
1305         throw parseException("String missing ending quote.");
1306       }
1307 
1308       try {
1309         final String escaped = currentToken.substring(1, currentToken.length() - 1);
1310         final ByteString result = unescapeBytes(escaped);
1311         nextToken();
1312         list.add(result);
1313       } catch (InvalidEscapeSequenceException e) {
1314         throw parseException(e.getMessage());
1315       }
1316     }
1317 
1318     /**
1319      * Returns a {@link ParseException} with the current line and column numbers in the description,
1320      * suitable for throwing.
1321      */
parseException(final String description)1322     public ParseException parseException(final String description) {
1323       // Note:  People generally prefer one-based line and column numbers.
1324       return new ParseException(line + 1, column + 1, description);
1325     }
1326 
1327     /**
1328      * Returns a {@link ParseException} with the line and column numbers of the previous token in
1329      * the description, suitable for throwing.
1330      */
parseExceptionPreviousToken(final String description)1331     public ParseException parseExceptionPreviousToken(final String description) {
1332       // Note:  People generally prefer one-based line and column numbers.
1333       return new ParseException(previousLine + 1, previousColumn + 1, description);
1334     }
1335 
1336     /**
1337      * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException}
1338      * when trying to parse an integer.
1339      */
integerParseException(final NumberFormatException e)1340     private ParseException integerParseException(final NumberFormatException e) {
1341       return parseException("Couldn't parse integer: " + e.getMessage());
1342     }
1343 
1344     /**
1345      * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException}
1346      * when trying to parse a float or double.
1347      */
floatParseException(final NumberFormatException e)1348     private ParseException floatParseException(final NumberFormatException e) {
1349       return parseException("Couldn't parse number: " + e.getMessage());
1350     }
1351 
1352     /**
1353      * Returns a {@link UnknownFieldParseException} with the line and column numbers of the previous
1354      * token in the description, and the unknown field name, suitable for throwing.
1355      */
unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1356     public UnknownFieldParseException unknownFieldParseExceptionPreviousToken(
1357         final String unknownField, final String description) {
1358       // Note:  People generally prefer one-based line and column numbers.
1359       return new UnknownFieldParseException(
1360           previousLine + 1, previousColumn + 1, unknownField, description);
1361     }
1362   }
1363 
1364   /** Thrown when parsing an invalid text format message. */
1365   public static class ParseException extends IOException {
1366     private static final long serialVersionUID = 3196188060225107702L;
1367 
1368     private final int line;
1369     private final int column;
1370 
1371     /** Create a new instance, with -1 as the line and column numbers. */
ParseException(final String message)1372     public ParseException(final String message) {
1373       this(-1, -1, message);
1374     }
1375 
1376     /**
1377      * Create a new instance
1378      *
1379      * @param line the line number where the parse error occurred, using 1-offset.
1380      * @param column the column number where the parser error occurred, using 1-offset.
1381      */
ParseException(final int line, final int column, final String message)1382     public ParseException(final int line, final int column, final String message) {
1383       super(Integer.toString(line) + ":" + column + ": " + message);
1384       this.line = line;
1385       this.column = column;
1386     }
1387 
1388     /**
1389      * Return the line where the parse exception occurred, or -1 when none is provided. The value is
1390      * specified as 1-offset, so the first line is line 1.
1391      */
getLine()1392     public int getLine() {
1393       return line;
1394     }
1395 
1396     /**
1397      * Return the column where the parse exception occurred, or -1 when none is provided. The value
1398      * is specified as 1-offset, so the first line is line 1.
1399      */
getColumn()1400     public int getColumn() {
1401       return column;
1402     }
1403   }
1404 
1405   /** Thrown when encountering an unknown field while parsing a text format message. */
1406   public static class UnknownFieldParseException extends ParseException {
1407     private final String unknownField;
1408 
1409     /**
1410      * Create a new instance, with -1 as the line and column numbers, and an empty unknown field
1411      * name.
1412      */
UnknownFieldParseException(final String message)1413     public UnknownFieldParseException(final String message) {
1414       this(-1, -1, "", message);
1415     }
1416 
1417     /**
1418      * Create a new instance
1419      *
1420      * @param line the line number where the parse error occurred, using 1-offset.
1421      * @param column the column number where the parser error occurred, using 1-offset.
1422      * @param unknownField the name of the unknown field found while parsing.
1423      */
UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1424     public UnknownFieldParseException(
1425         final int line, final int column, final String unknownField, final String message) {
1426       super(line, column, message);
1427       this.unknownField = unknownField;
1428     }
1429 
1430     /**
1431      * Return the name of the unknown field encountered while parsing the protocol buffer string.
1432      */
getUnknownField()1433     public String getUnknownField() {
1434       return unknownField;
1435     }
1436   }
1437 
1438   private static final Parser PARSER = Parser.newBuilder().build();
1439 
1440   /**
1441    * Return a {@link Parser} instance which can parse text-format messages. The returned instance is
1442    * thread-safe.
1443    */
getParser()1444   public static Parser getParser() {
1445     return PARSER;
1446   }
1447 
1448   /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */
merge(final Readable input, final Message.Builder builder)1449   public static void merge(final Readable input, final Message.Builder builder) throws IOException {
1450     PARSER.merge(input, builder);
1451   }
1452 
1453   /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */
merge(final CharSequence input, final Message.Builder builder)1454   public static void merge(final CharSequence input, final Message.Builder builder)
1455       throws ParseException {
1456     PARSER.merge(input, builder);
1457   }
1458 
1459   /**
1460    * Parse a text-format message from {@code input}.
1461    *
1462    * @return the parsed message, guaranteed initialized
1463    */
parse(final CharSequence input, final Class<T> protoClass)1464   public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass)
1465       throws ParseException {
1466     Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType();
1467     merge(input, builder);
1468     @SuppressWarnings("unchecked")
1469     T output = (T) builder.build();
1470     return output;
1471   }
1472 
1473   /**
1474    * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1475    * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1476    */
merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1477   public static void merge(
1478       final Readable input,
1479       final ExtensionRegistry extensionRegistry,
1480       final Message.Builder builder)
1481       throws IOException {
1482     PARSER.merge(input, extensionRegistry, builder);
1483   }
1484 
1485 
1486   /**
1487    * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1488    * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1489    */
merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1490   public static void merge(
1491       final CharSequence input,
1492       final ExtensionRegistry extensionRegistry,
1493       final Message.Builder builder)
1494       throws ParseException {
1495     PARSER.merge(input, extensionRegistry, builder);
1496   }
1497 
1498   /**
1499    * Parse a text-format message from {@code input}. Extensions will be recognized if they are
1500    * registered in {@code extensionRegistry}.
1501    *
1502    * @return the parsed message, guaranteed initialized
1503    */
parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1504   public static <T extends Message> T parse(
1505       final CharSequence input,
1506       final ExtensionRegistry extensionRegistry,
1507       final Class<T> protoClass)
1508       throws ParseException {
1509     Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType();
1510     merge(input, extensionRegistry, builder);
1511     @SuppressWarnings("unchecked")
1512     T output = (T) builder.build();
1513     return output;
1514   }
1515 
1516 
1517   /**
1518    * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely
1519    * follows google/protobuf/text_format.cc.
1520    *
1521    * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to
1522    * control the parser behavior.
1523    */
1524   public static class Parser {
1525     /**
1526      * Determines if repeated values for non-repeated fields and oneofs are permitted. For example,
1527      * given required/optional field "foo" and a oneof containing "baz" and "qux":
1528      *
1529      * <ul>
1530      *   <li>"foo: 1 foo: 2"
1531      *   <li>"baz: 1 qux: 2"
1532      *   <li>merging "foo: 2" into a proto in which foo is already set, or
1533      *   <li>merging "qux: 2" into a proto in which baz is already set.
1534      * </ul>
1535      */
1536     public enum SingularOverwritePolicy {
1537       /**
1538        * Later values are merged with earlier values. For primitive fields or conflicting oneofs,
1539        * the last value is retained.
1540        */
1541       ALLOW_SINGULAR_OVERWRITES,
1542       /** An error is issued. */
1543       FORBID_SINGULAR_OVERWRITES
1544     }
1545 
1546     private final TypeRegistry typeRegistry;
1547     private final boolean allowUnknownFields;
1548     private final boolean allowUnknownEnumValues;
1549     private final boolean allowUnknownExtensions;
1550     private final SingularOverwritePolicy singularOverwritePolicy;
1551     private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
1552 
Parser( TypeRegistry typeRegistry, boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1553     private Parser(
1554         TypeRegistry typeRegistry,
1555         boolean allowUnknownFields,
1556         boolean allowUnknownEnumValues,
1557         boolean allowUnknownExtensions,
1558         SingularOverwritePolicy singularOverwritePolicy,
1559         TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1560       this.typeRegistry = typeRegistry;
1561       this.allowUnknownFields = allowUnknownFields;
1562       this.allowUnknownEnumValues = allowUnknownEnumValues;
1563       this.allowUnknownExtensions = allowUnknownExtensions;
1564       this.singularOverwritePolicy = singularOverwritePolicy;
1565       this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1566     }
1567 
1568     /** Returns a new instance of {@link Builder}. */
newBuilder()1569     public static Builder newBuilder() {
1570       return new Builder();
1571     }
1572 
1573     /** Builder that can be used to obtain new instances of {@link Parser}. */
1574     public static class Builder {
1575       private boolean allowUnknownFields = false;
1576       private boolean allowUnknownEnumValues = false;
1577       private boolean allowUnknownExtensions = false;
1578       private SingularOverwritePolicy singularOverwritePolicy =
1579           SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
1580       private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
1581       private TypeRegistry typeRegistry = TypeRegistry.getEmptyTypeRegistry();
1582 
1583       /**
1584        * Sets the TypeRegistry for resolving Any. If this is not set, TextFormat will not be able to
1585        * parse Any unless Any is write as bytes.
1586        *
1587        * @throws IllegalArgumentException if a registry is already set.
1588        */
setTypeRegistry(TypeRegistry typeRegistry)1589       public Builder setTypeRegistry(TypeRegistry typeRegistry) {
1590         this.typeRegistry = typeRegistry;
1591         return this;
1592       }
1593 
1594       /**
1595        * Set whether this parser will allow unknown fields. By default, an exception is thrown if an
1596        * unknown field is encountered. If this is set, the parser will only log a warning. Allow
1597        * unknown fields will also allow unknown extensions.
1598        *
1599        * <p>Use of this parameter is discouraged which may hide some errors (e.g.
1600        * spelling error on field name).
1601        */
setAllowUnknownFields(boolean allowUnknownFields)1602       public Builder setAllowUnknownFields(boolean allowUnknownFields) {
1603         this.allowUnknownFields = allowUnknownFields;
1604         return this;
1605       }
1606 
1607       /**
1608        * Set whether this parser will allow unknown extensions. By default, an
1609        * exception is thrown if unknown extension is encountered. If this is set true,
1610        * the parser will only log a warning. Allow unknown extensions does not mean
1611        * allow normal unknown fields.
1612        */
setAllowUnknownExtensions(boolean allowUnknownExtensions)1613       public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) {
1614         this.allowUnknownExtensions = allowUnknownExtensions;
1615         return this;
1616       }
1617 
1618       /** Sets parser behavior when a non-repeated field appears more than once. */
setSingularOverwritePolicy(SingularOverwritePolicy p)1619       public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
1620         this.singularOverwritePolicy = p;
1621         return this;
1622       }
1623 
setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1624       public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1625         this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1626         return this;
1627       }
1628 
build()1629       public Parser build() {
1630         return new Parser(
1631             typeRegistry,
1632             allowUnknownFields,
1633             allowUnknownEnumValues,
1634             allowUnknownExtensions,
1635             singularOverwritePolicy,
1636             parseInfoTreeBuilder);
1637       }
1638     }
1639 
1640     /**
1641      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1642      */
merge(final Readable input, final Message.Builder builder)1643     public void merge(final Readable input, final Message.Builder builder) throws IOException {
1644       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1645     }
1646 
1647     /**
1648      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1649      */
merge(final CharSequence input, final Message.Builder builder)1650     public void merge(final CharSequence input, final Message.Builder builder)
1651         throws ParseException {
1652       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1653     }
1654 
1655     /**
1656      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1657      * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1658      */
merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1659     public void merge(
1660         final Readable input,
1661         final ExtensionRegistry extensionRegistry,
1662         final Message.Builder builder)
1663         throws IOException {
1664       // Read the entire input to a String then parse that.
1665 
1666       // If StreamTokenizer was not so limited, or if there were a kind
1667       // of Reader that could read in chunks that match some particular regex,
1668       // or if we wanted to write a custom Reader to tokenize our stream, then
1669       // we would not have to read to one big String.  Alas, none of these is
1670       // the case.  Oh well.
1671 
1672       merge(toStringBuilder(input), extensionRegistry, builder);
1673     }
1674 
1675 
1676     private static final int BUFFER_SIZE = 4096;
1677 
1678     // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
1679     // overhead is worthwhile
toStringBuilder(final Readable input)1680     private static StringBuilder toStringBuilder(final Readable input) throws IOException {
1681       final StringBuilder text = new StringBuilder();
1682       final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
1683       while (true) {
1684         final int n = input.read(buffer);
1685         if (n == -1) {
1686           break;
1687         }
1688         buffer.flip();
1689         text.append(buffer, 0, n);
1690       }
1691       return text;
1692     }
1693 
1694     static final class UnknownField {
1695       static enum Type {
1696         FIELD, EXTENSION;
1697       }
1698 
1699       final String message;
1700       final Type type;
1701 
UnknownField(String message, Type type)1702       UnknownField(String message, Type type) {
1703         this.message = message;
1704         this.type = type;
1705       }
1706     }
1707 
1708     // Check both unknown fields and unknown extensions and log warning messages
1709     // or throw exceptions according to the flag.
checkUnknownFields(final List<UnknownField> unknownFields)1710     private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException {
1711       if (unknownFields.isEmpty()) {
1712         return;
1713       }
1714 
1715       StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:");
1716       for (UnknownField field : unknownFields) {
1717         msg.append('\n').append(field.message);
1718       }
1719 
1720       if (allowUnknownFields) {
1721         logger.warning(msg.toString());
1722         return;
1723       }
1724 
1725       int firstErrorIndex = 0;
1726       if (allowUnknownExtensions) {
1727         boolean allUnknownExtensions = true;
1728         for (UnknownField field : unknownFields) {
1729           if (field.type == UnknownField.Type.FIELD) {
1730             allUnknownExtensions = false;
1731             break;
1732           }
1733           ++firstErrorIndex;
1734         }
1735         if (allUnknownExtensions) {
1736           logger.warning(msg.toString());
1737           return;
1738         }
1739       }
1740 
1741       String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":");
1742       throw new ParseException(
1743           Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString());
1744     }
1745 
1746     /**
1747      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1748      * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1749      */
merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1750     public void merge(
1751         final CharSequence input,
1752         final ExtensionRegistry extensionRegistry,
1753         final Message.Builder builder)
1754         throws ParseException {
1755       final Tokenizer tokenizer = new Tokenizer(input);
1756       MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder);
1757 
1758       List<UnknownField> unknownFields = new ArrayList<UnknownField>();
1759 
1760       while (!tokenizer.atEnd()) {
1761         mergeField(tokenizer, extensionRegistry, target, unknownFields);
1762       }
1763 
1764       checkUnknownFields(unknownFields);
1765     }
1766 
1767 
1768     /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */
mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields)1769     private void mergeField(
1770         final Tokenizer tokenizer,
1771         final ExtensionRegistry extensionRegistry,
1772         final MessageReflection.MergeTarget target,
1773         List<UnknownField> unknownFields)
1774         throws ParseException {
1775       mergeField(
1776           tokenizer,
1777           extensionRegistry,
1778           target,
1779           parseInfoTreeBuilder,
1780           unknownFields);
1781     }
1782 
1783     /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */
mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1784     private void mergeField(
1785         final Tokenizer tokenizer,
1786         final ExtensionRegistry extensionRegistry,
1787         final MessageReflection.MergeTarget target,
1788         TextFormatParseInfoTree.Builder parseTreeBuilder,
1789         List<UnknownField> unknownFields)
1790         throws ParseException {
1791       FieldDescriptor field = null;
1792       int startLine = tokenizer.getLine();
1793       int startColumn = tokenizer.getColumn();
1794       final Descriptor type = target.getDescriptorForType();
1795       ExtensionRegistry.ExtensionInfo extension = null;
1796 
1797       if ("google.protobuf.Any".equals(type.getFullName()) && tokenizer.tryConsume("[")) {
1798         mergeAnyFieldValue(tokenizer, extensionRegistry, target, parseTreeBuilder, unknownFields,
1799             type);
1800         return;
1801       }
1802 
1803       if (tokenizer.tryConsume("[")) {
1804         // An extension.
1805         final StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier());
1806         while (tokenizer.tryConsume(".")) {
1807           name.append('.');
1808           name.append(tokenizer.consumeIdentifier());
1809         }
1810 
1811         extension = target.findExtensionByName(extensionRegistry, name.toString());
1812 
1813         if (extension == null) {
1814           String message =
1815               (tokenizer.getPreviousLine() + 1)
1816                   + ":"
1817                   + (tokenizer.getPreviousColumn() + 1)
1818                   + ":\t"
1819                   + type.getFullName()
1820                   + ".["
1821                   + name
1822                   + "]";
1823           unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION));
1824         } else {
1825           if (extension.descriptor.getContainingType() != type) {
1826             throw tokenizer.parseExceptionPreviousToken(
1827                 "Extension \""
1828                     + name
1829                     + "\" does not extend message type \""
1830                     + type.getFullName()
1831                     + "\".");
1832           }
1833           field = extension.descriptor;
1834         }
1835 
1836         tokenizer.consume("]");
1837       } else {
1838         final String name = tokenizer.consumeIdentifier();
1839         field = type.findFieldByName(name);
1840 
1841         // Group names are expected to be capitalized as they appear in the
1842         // .proto file, which actually matches their type names, not their field
1843         // names.
1844         if (field == null) {
1845           // Explicitly specify US locale so that this code does not break when
1846           // executing in Turkey.
1847           final String lowerName = name.toLowerCase(Locale.US);
1848           field = type.findFieldByName(lowerName);
1849           // If the case-insensitive match worked but the field is NOT a group,
1850           if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1851             field = null;
1852           }
1853         }
1854         // Again, special-case group names as described above.
1855         if (field != null
1856             && field.getType() == FieldDescriptor.Type.GROUP
1857             && !field.getMessageType().getName().equals(name)) {
1858           field = null;
1859         }
1860 
1861         if (field == null) {
1862           String message = (tokenizer.getPreviousLine() + 1)
1863                            + ":"
1864                            + (tokenizer.getPreviousColumn() + 1)
1865                            + ":\t"
1866                            + type.getFullName()
1867                            + "."
1868                            + name;
1869           unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD));
1870         }
1871       }
1872 
1873       // Skips unknown fields.
1874       if (field == null) {
1875         // Try to guess the type of this field.
1876         // If this field is not a message, there should be a ":" between the
1877         // field name and the field value and also the field value should not
1878         // start with "{" or "<" which indicates the beginning of a message body.
1879         // If there is no ":" or there is a "{" or "<" after ":", this field has
1880         // to be a message or the input is ill-formed.
1881         if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
1882           skipFieldValue(tokenizer);
1883         } else {
1884           skipFieldMessage(tokenizer);
1885         }
1886         return;
1887       }
1888 
1889       // Handle potential ':'.
1890       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1891         tokenizer.tryConsume(":"); // optional
1892         if (parseTreeBuilder != null) {
1893           TextFormatParseInfoTree.Builder childParseTreeBuilder =
1894               parseTreeBuilder.getBuilderForSubMessageField(field);
1895           consumeFieldValues(
1896               tokenizer,
1897               extensionRegistry,
1898               target,
1899               field,
1900               extension,
1901               childParseTreeBuilder,
1902               unknownFields);
1903         } else {
1904           consumeFieldValues(
1905               tokenizer,
1906               extensionRegistry,
1907               target,
1908               field,
1909               extension,
1910               parseTreeBuilder,
1911               unknownFields);
1912         }
1913       } else {
1914         tokenizer.consume(":"); // required
1915         consumeFieldValues(
1916             tokenizer,
1917             extensionRegistry,
1918             target,
1919             field,
1920             extension,
1921             parseTreeBuilder,
1922             unknownFields);
1923       }
1924 
1925       if (parseTreeBuilder != null) {
1926         parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn));
1927       }
1928 
1929       // For historical reasons, fields may optionally be separated by commas or
1930       // semicolons.
1931       if (!tokenizer.tryConsume(";")) {
1932         tokenizer.tryConsume(",");
1933       }
1934     }
1935 
1936     /**
1937      * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}.
1938      */
consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1939     private void consumeFieldValues(
1940         final Tokenizer tokenizer,
1941         final ExtensionRegistry extensionRegistry,
1942         final MessageReflection.MergeTarget target,
1943         final FieldDescriptor field,
1944         final ExtensionRegistry.ExtensionInfo extension,
1945         final TextFormatParseInfoTree.Builder parseTreeBuilder,
1946         List<UnknownField> unknownFields)
1947         throws ParseException {
1948       // Support specifying repeated field values as a comma-separated list.
1949       // Ex."foo: [1, 2, 3]"
1950       if (field.isRepeated() && tokenizer.tryConsume("[")) {
1951         if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty.
1952           while (true) {
1953             consumeFieldValue(
1954                 tokenizer,
1955                 extensionRegistry,
1956                 target,
1957                 field,
1958                 extension,
1959                 parseTreeBuilder,
1960                 unknownFields);
1961             if (tokenizer.tryConsume("]")) {
1962               // End of list.
1963               break;
1964             }
1965             tokenizer.consume(",");
1966           }
1967         }
1968       } else {
1969         consumeFieldValue(
1970             tokenizer,
1971             extensionRegistry,
1972             target,
1973             field,
1974             extension,
1975             parseTreeBuilder,
1976             unknownFields);
1977       }
1978     }
1979 
1980     /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */
consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1981     private void consumeFieldValue(
1982         final Tokenizer tokenizer,
1983         final ExtensionRegistry extensionRegistry,
1984         final MessageReflection.MergeTarget target,
1985         final FieldDescriptor field,
1986         final ExtensionRegistry.ExtensionInfo extension,
1987         final TextFormatParseInfoTree.Builder parseTreeBuilder,
1988         List<UnknownField> unknownFields)
1989         throws ParseException {
1990       if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES
1991           && !field.isRepeated()) {
1992         if (target.hasField(field)) {
1993           throw tokenizer.parseExceptionPreviousToken(
1994               "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten.");
1995         } else if (field.getContainingOneof() != null
1996             && target.hasOneof(field.getContainingOneof())) {
1997           Descriptors.OneofDescriptor oneof = field.getContainingOneof();
1998           throw tokenizer.parseExceptionPreviousToken(
1999               "Field \""
2000                   + field.getFullName()
2001                   + "\" is specified along with field \""
2002                   + target.getOneofFieldDescriptor(oneof).getFullName()
2003                   + "\", another member of oneof \""
2004                   + oneof.getName()
2005                   + "\".");
2006         }
2007       }
2008 
2009       Object value = null;
2010 
2011       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
2012         final String endToken;
2013         if (tokenizer.tryConsume("<")) {
2014           endToken = ">";
2015         } else {
2016           tokenizer.consume("{");
2017           endToken = "}";
2018         }
2019 
2020         // Try to parse human readable format of Any in the form: [type_url]: { ... }
2021         if (field.getMessageType().getFullName().equals("google.protobuf.Any")
2022             && tokenizer.tryConsume("[")) {
2023           // Use Proto reflection here since depending on Any would intoduce a cyclic dependency
2024           // (java_proto_library for any_java_proto depends on the protobuf_impl).
2025           Message anyBuilder = DynamicMessage.getDefaultInstance(field.getMessageType());
2026           MessageReflection.MergeTarget anyField = target.newMergeTargetForField(field, anyBuilder);
2027           mergeAnyFieldValue(tokenizer, extensionRegistry, anyField, parseTreeBuilder,
2028               unknownFields, field.getMessageType());
2029           value = anyField.finish();
2030           tokenizer.consume(endToken);
2031         } else {
2032           Message defaultInstance = (extension == null) ? null : extension.defaultInstance;
2033           MessageReflection.MergeTarget subField =
2034               target.newMergeTargetForField(field, defaultInstance);
2035 
2036           while (!tokenizer.tryConsume(endToken)) {
2037             if (tokenizer.atEnd()) {
2038               throw tokenizer.parseException("Expected \"" + endToken + "\".");
2039             }
2040             mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, unknownFields);
2041           }
2042 
2043           value = subField.finish();
2044         }
2045 
2046       } else {
2047         switch (field.getType()) {
2048           case INT32:
2049           case SINT32:
2050           case SFIXED32:
2051             value = tokenizer.consumeInt32();
2052             break;
2053 
2054           case INT64:
2055           case SINT64:
2056           case SFIXED64:
2057             value = tokenizer.consumeInt64();
2058             break;
2059 
2060           case UINT32:
2061           case FIXED32:
2062             value = tokenizer.consumeUInt32();
2063             break;
2064 
2065           case UINT64:
2066           case FIXED64:
2067             value = tokenizer.consumeUInt64();
2068             break;
2069 
2070           case FLOAT:
2071             value = tokenizer.consumeFloat();
2072             break;
2073 
2074           case DOUBLE:
2075             value = tokenizer.consumeDouble();
2076             break;
2077 
2078           case BOOL:
2079             value = tokenizer.consumeBoolean();
2080             break;
2081 
2082           case STRING:
2083             value = tokenizer.consumeString();
2084             break;
2085 
2086           case BYTES:
2087             value = tokenizer.consumeByteString();
2088             break;
2089 
2090           case ENUM:
2091             final EnumDescriptor enumType = field.getEnumType();
2092 
2093             if (tokenizer.lookingAtInteger()) {
2094               final int number = tokenizer.consumeInt32();
2095               value = enumType.findValueByNumber(number);
2096               if (value == null) {
2097                 String unknownValueMsg =
2098                     "Enum type \""
2099                         + enumType.getFullName()
2100                         + "\" has no value with number "
2101                         + number
2102                         + '.';
2103                 if (allowUnknownEnumValues) {
2104                   logger.warning(unknownValueMsg);
2105                   return;
2106                 } else {
2107                   throw tokenizer.parseExceptionPreviousToken(
2108                       "Enum type \""
2109                           + enumType.getFullName()
2110                           + "\" has no value with number "
2111                           + number
2112                           + '.');
2113                 }
2114               }
2115             } else {
2116               final String id = tokenizer.consumeIdentifier();
2117               value = enumType.findValueByName(id);
2118               if (value == null) {
2119                 String unknownValueMsg =
2120                     "Enum type \""
2121                         + enumType.getFullName()
2122                         + "\" has no value named \""
2123                         + id
2124                         + "\".";
2125                 if (allowUnknownEnumValues) {
2126                   logger.warning(unknownValueMsg);
2127                   return;
2128                 } else {
2129                   throw tokenizer.parseExceptionPreviousToken(unknownValueMsg);
2130                 }
2131               }
2132             }
2133 
2134             break;
2135 
2136           case MESSAGE:
2137           case GROUP:
2138             throw new RuntimeException("Can't get here.");
2139         }
2140       }
2141 
2142       if (field.isRepeated()) {
2143         // TODO(b/29122459): If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode,
2144         //     check for duplicate map keys here.
2145         target.addRepeatedField(field, value);
2146       } else {
2147         target.setField(field, value);
2148       }
2149     }
2150 
mergeAnyFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, MergeTarget target, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields, Descriptor anyDescriptor)2151     private void mergeAnyFieldValue(
2152         final Tokenizer tokenizer,
2153         final ExtensionRegistry extensionRegistry,
2154         MergeTarget target,
2155         final TextFormatParseInfoTree.Builder parseTreeBuilder,
2156         List<UnknownField> unknownFields,
2157         Descriptor anyDescriptor)
2158         throws ParseException {
2159       // Try to parse human readable format of Any in the form: [type_url]: { ... }
2160       StringBuilder typeUrlBuilder = new StringBuilder();
2161       // Parse the type_url inside [].
2162       while (true) {
2163         typeUrlBuilder.append(tokenizer.consumeIdentifier());
2164         if (tokenizer.tryConsume("]")) {
2165           break;
2166         }
2167         if (tokenizer.tryConsume("/")) {
2168           typeUrlBuilder.append("/");
2169         } else if (tokenizer.tryConsume(".")) {
2170           typeUrlBuilder.append(".");
2171         } else {
2172           throw tokenizer.parseExceptionPreviousToken("Expected a valid type URL.");
2173         }
2174       }
2175       tokenizer.tryConsume(":");
2176       final String anyEndToken;
2177       if (tokenizer.tryConsume("<")) {
2178         anyEndToken = ">";
2179       } else {
2180         tokenizer.consume("{");
2181         anyEndToken = "}";
2182       }
2183       String typeUrl = typeUrlBuilder.toString();
2184       Descriptor contentType = null;
2185       try {
2186         contentType = typeRegistry.getDescriptorForTypeUrl(typeUrl);
2187       } catch (InvalidProtocolBufferException e) {
2188         throw tokenizer.parseException("Invalid valid type URL. Found: " + typeUrl);
2189       }
2190       if (contentType == null) {
2191         throw tokenizer.parseException(
2192             "Unable to parse Any of type: "
2193                 + typeUrl
2194                 + ". Please make sure that the TypeRegistry contains the descriptors for the given"
2195                 + " types.");
2196       }
2197       Message.Builder contentBuilder =
2198           DynamicMessage.getDefaultInstance(contentType).newBuilderForType();
2199       MessageReflection.BuilderAdapter contentTarget =
2200           new MessageReflection.BuilderAdapter(contentBuilder);
2201       while (!tokenizer.tryConsume(anyEndToken)) {
2202         mergeField(tokenizer, extensionRegistry, contentTarget, parseTreeBuilder, unknownFields);
2203       }
2204 
2205       target.setField(anyDescriptor.findFieldByName("type_url"), typeUrlBuilder.toString());
2206       target.setField(
2207           anyDescriptor.findFieldByName("value"), contentBuilder.build().toByteString());
2208     }
2209 
2210     /** Skips the next field including the field's name and value. */
skipField(Tokenizer tokenizer)2211     private static void skipField(Tokenizer tokenizer) throws ParseException {
2212       if (tokenizer.tryConsume("[")) {
2213         // Extension name.
2214         do {
2215           tokenizer.consumeIdentifier();
2216         } while (tokenizer.tryConsume("."));
2217         tokenizer.consume("]");
2218       } else {
2219         tokenizer.consumeIdentifier();
2220       }
2221 
2222       // Try to guess the type of this field.
2223       // If this field is not a message, there should be a ":" between the
2224       // field name and the field value and also the field value should not
2225       // start with "{" or "<" which indicates the beginning of a message body.
2226       // If there is no ":" or there is a "{" or "<" after ":", this field has
2227       // to be a message or the input is ill-formed.
2228       if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) {
2229         skipFieldValue(tokenizer);
2230       } else {
2231         skipFieldMessage(tokenizer);
2232       }
2233       // For historical reasons, fields may optionally be separated by commas or
2234       // semicolons.
2235       if (!tokenizer.tryConsume(";")) {
2236         tokenizer.tryConsume(",");
2237       }
2238     }
2239 
2240     /**
2241      * Skips the whole body of a message including the beginning delimiter and the ending delimiter.
2242      */
skipFieldMessage(Tokenizer tokenizer)2243     private static void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
2244       final String delimiter;
2245       if (tokenizer.tryConsume("<")) {
2246         delimiter = ">";
2247       } else {
2248         tokenizer.consume("{");
2249         delimiter = "}";
2250       }
2251       while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
2252         skipField(tokenizer);
2253       }
2254       tokenizer.consume(delimiter);
2255     }
2256 
2257     /** Skips a field value. */
skipFieldValue(Tokenizer tokenizer)2258     private static void skipFieldValue(Tokenizer tokenizer) throws ParseException {
2259       if (tokenizer.tryConsumeString()) {
2260         while (tokenizer.tryConsumeString()) {}
2261         return;
2262       }
2263       if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean
2264           && !tokenizer.tryConsumeInt64() // includes int32
2265           && !tokenizer.tryConsumeUInt64() // includes uint32
2266           && !tokenizer.tryConsumeDouble()
2267           && !tokenizer.tryConsumeFloat()) {
2268         throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken);
2269       }
2270     }
2271   }
2272 
2273   // =================================================================
2274   // Utility functions
2275   //
2276   // Some of these methods are package-private because Descriptors.java uses
2277   // them.
2278 
2279   /**
2280    * Escapes bytes in the format used in protocol buffer text format, which is the same as the
2281    * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are
2282    * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which
2283    * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences.
2284    */
escapeBytes(ByteString input)2285   public static String escapeBytes(ByteString input) {
2286     return TextFormatEscaper.escapeBytes(input);
2287   }
2288 
2289   /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */
escapeBytes(byte[] input)2290   public static String escapeBytes(byte[] input) {
2291     return TextFormatEscaper.escapeBytes(input);
2292   }
2293 
2294   /**
2295    * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex
2296    * escapes (starting with "\x") are also recognized.
2297    */
unescapeBytes(final CharSequence charString)2298   public static ByteString unescapeBytes(final CharSequence charString)
2299       throws InvalidEscapeSequenceException {
2300     // First convert the Java character sequence to UTF-8 bytes.
2301     ByteString input = ByteString.copyFromUtf8(charString.toString());
2302     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
2303     // escapes can all be expressed with ASCII characters, so it is safe to
2304     // operate on bytes here.
2305     //
2306     // Unescaping the input byte array will result in a byte sequence that's no
2307     // longer than the input.  That's because each escape sequence is between
2308     // two and four bytes long and stands for a single byte.
2309     final byte[] result = new byte[input.size()];
2310     int pos = 0;
2311     for (int i = 0; i < input.size(); i++) {
2312       byte c = input.byteAt(i);
2313       if (c == '\\') {
2314         if (i + 1 < input.size()) {
2315           ++i;
2316           c = input.byteAt(i);
2317           if (isOctal(c)) {
2318             // Octal escape.
2319             int code = digitValue(c);
2320             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
2321               ++i;
2322               code = code * 8 + digitValue(input.byteAt(i));
2323             }
2324             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
2325               ++i;
2326               code = code * 8 + digitValue(input.byteAt(i));
2327             }
2328             // TODO: Check that 0 <= code && code <= 0xFF.
2329             result[pos++] = (byte) code;
2330           } else {
2331             switch (c) {
2332               case 'a':
2333                 result[pos++] = 0x07;
2334                 break;
2335               case 'b':
2336                 result[pos++] = '\b';
2337                 break;
2338               case 'f':
2339                 result[pos++] = '\f';
2340                 break;
2341               case 'n':
2342                 result[pos++] = '\n';
2343                 break;
2344               case 'r':
2345                 result[pos++] = '\r';
2346                 break;
2347               case 't':
2348                 result[pos++] = '\t';
2349                 break;
2350               case 'v':
2351                 result[pos++] = 0x0b;
2352                 break;
2353               case '\\':
2354                 result[pos++] = '\\';
2355                 break;
2356               case '\'':
2357                 result[pos++] = '\'';
2358                 break;
2359               case '"':
2360                 result[pos++] = '\"';
2361                 break;
2362 
2363               case 'x':
2364                 // hex escape
2365                 int code = 0;
2366                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
2367                   ++i;
2368                   code = digitValue(input.byteAt(i));
2369                 } else {
2370                   throw new InvalidEscapeSequenceException(
2371                       "Invalid escape sequence: '\\x' with no digits");
2372                 }
2373                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
2374                   ++i;
2375                   code = code * 16 + digitValue(input.byteAt(i));
2376                 }
2377                 result[pos++] = (byte) code;
2378                 break;
2379 
2380               case 'u':
2381                 // Unicode escape
2382                 ++i;
2383                 if (i + 3 < input.size()
2384                     && isHex(input.byteAt(i))
2385                     && isHex(input.byteAt(i + 1))
2386                     && isHex(input.byteAt(i + 2))
2387                     && isHex(input.byteAt(i + 3))) {
2388                   char ch =
2389                       (char)
2390                           (digitValue(input.byteAt(i)) << 12
2391                               | digitValue(input.byteAt(i + 1)) << 8
2392                               | digitValue(input.byteAt(i + 2)) << 4
2393                               | digitValue(input.byteAt(i + 3)));
2394                   if (Character.isSurrogate(ch)) {
2395                     throw new InvalidEscapeSequenceException(
2396                         "Invalid escape sequence: '\\u' refers to a surrogate");
2397                   }
2398                   byte[] chUtf8 = Character.toString(ch).getBytes(UTF_8);
2399                   System.arraycopy(chUtf8, 0, result, pos, chUtf8.length);
2400                   pos += chUtf8.length;
2401                   i += 3;
2402                 } else {
2403                   throw new InvalidEscapeSequenceException(
2404                       "Invalid escape sequence: '\\u' with too few hex chars");
2405                 }
2406                 break;
2407 
2408               case 'U':
2409                 // Unicode escape
2410                 ++i;
2411                 if (i + 7 >= input.size()) {
2412                   throw new InvalidEscapeSequenceException(
2413                       "Invalid escape sequence: '\\U' with too few hex chars");
2414                 }
2415                 int codepoint = 0;
2416                 for (int offset = i; offset < i + 8; offset++) {
2417                   byte b = input.byteAt(offset);
2418                   if (!isHex(b)) {
2419                     throw new InvalidEscapeSequenceException(
2420                         "Invalid escape sequence: '\\U' with too few hex chars");
2421                   }
2422                   codepoint = (codepoint << 4) | digitValue(b);
2423                 }
2424                 if (!Character.isValidCodePoint(codepoint)) {
2425                   throw new InvalidEscapeSequenceException(
2426                       "Invalid escape sequence: '\\U"
2427                           + input.substring(i, i + 8).toStringUtf8()
2428                           + "' is not a valid code point value");
2429                 }
2430                 Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(codepoint);
2431                 if (unicodeBlock.equals(Character.UnicodeBlock.LOW_SURROGATES)
2432                     || unicodeBlock.equals(Character.UnicodeBlock.HIGH_SURROGATES)
2433                     || unicodeBlock.equals(Character.UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES)) {
2434                   throw new InvalidEscapeSequenceException(
2435                       "Invalid escape sequence: '\\U"
2436                           + input.substring(i, i + 8).toStringUtf8()
2437                           + "' refers to a surrogate code unit");
2438                 }
2439                 int[] codepoints = new int[1];
2440                 codepoints[0] = codepoint;
2441                 byte[] chUtf8 = new String(codepoints, 0, 1).getBytes(UTF_8);
2442                 System.arraycopy(chUtf8, 0, result, pos, chUtf8.length);
2443                 pos += chUtf8.length;
2444                 i += 7;
2445                 break;
2446 
2447               default:
2448                 throw new InvalidEscapeSequenceException(
2449                     "Invalid escape sequence: '\\" + (char) c + '\'');
2450             }
2451           }
2452         } else {
2453           throw new InvalidEscapeSequenceException(
2454               "Invalid escape sequence: '\\' at end of string.");
2455         }
2456       } else {
2457         result[pos++] = c;
2458       }
2459     }
2460 
2461     return result.length == pos
2462         ? ByteString.wrap(result) // This reference has not been out of our control.
2463         : ByteString.copyFrom(result, 0, pos);
2464   }
2465 
2466   /**
2467    * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid
2468    * escape sequence is seen.
2469    */
2470   public static class InvalidEscapeSequenceException extends IOException {
2471     private static final long serialVersionUID = -8164033650142593304L;
2472 
InvalidEscapeSequenceException(final String description)2473     InvalidEscapeSequenceException(final String description) {
2474       super(description);
2475     }
2476   }
2477 
2478   /**
2479    * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are
2480    * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes,
2481    * it's weird.
2482    */
escapeText(final String input)2483   static String escapeText(final String input) {
2484     return escapeBytes(ByteString.copyFromUtf8(input));
2485   }
2486 
2487   /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */
escapeDoubleQuotesAndBackslashes(final String input)2488   public static String escapeDoubleQuotesAndBackslashes(final String input) {
2489     return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
2490   }
2491 
2492   /**
2493    * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes
2494    * (starting with "\x") are also recognized.
2495    */
unescapeText(final String input)2496   static String unescapeText(final String input) throws InvalidEscapeSequenceException {
2497     return unescapeBytes(input).toStringUtf8();
2498   }
2499 
2500   /** Is this an octal digit? */
isOctal(final byte c)2501   private static boolean isOctal(final byte c) {
2502     return '0' <= c && c <= '7';
2503   }
2504 
2505   /** Is this a hex digit? */
isHex(final byte c)2506   private static boolean isHex(final byte c) {
2507     return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
2508   }
2509 
2510   /**
2511    * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is
2512    * like {@code Character.digit()} but we don't accept non-ASCII digits.
2513    */
digitValue(final byte c)2514   private static int digitValue(final byte c) {
2515     if ('0' <= c && c <= '9') {
2516       return c - '0';
2517     } else if ('a' <= c && c <= 'z') {
2518       return c - 'a' + 10;
2519     } else {
2520       return c - 'A' + 10;
2521     }
2522   }
2523 
2524   /**
2525    * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code
2526    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2527    * and octal numbers, respectively.
2528    */
parseInt32(final String text)2529   static int parseInt32(final String text) throws NumberFormatException {
2530     return (int) parseInteger(text, true, false);
2531   }
2532 
2533   /**
2534    * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code
2535    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2536    * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned
2537    * since Java has no unsigned integer type.
2538    */
parseUInt32(final String text)2539   static int parseUInt32(final String text) throws NumberFormatException {
2540     return (int) parseInteger(text, false, false);
2541   }
2542 
2543   /**
2544    * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code
2545    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2546    * and octal numbers, respectively.
2547    */
parseInt64(final String text)2548   static long parseInt64(final String text) throws NumberFormatException {
2549     return parseInteger(text, true, true);
2550   }
2551 
2552   /**
2553    * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code
2554    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2555    * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned
2556    * since Java has no unsigned long type.
2557    */
parseUInt64(final String text)2558   static long parseUInt64(final String text) throws NumberFormatException {
2559     return parseInteger(text, false, true);
2560   }
2561 
parseInteger(final String text, final boolean isSigned, final boolean isLong)2562   private static long parseInteger(final String text, final boolean isSigned, final boolean isLong)
2563       throws NumberFormatException {
2564     int pos = 0;
2565 
2566     boolean negative = false;
2567     if (text.startsWith("-", pos)) {
2568       if (!isSigned) {
2569         throw new NumberFormatException("Number must be positive: " + text);
2570       }
2571       ++pos;
2572       negative = true;
2573     }
2574 
2575     int radix = 10;
2576     if (text.startsWith("0x", pos)) {
2577       pos += 2;
2578       radix = 16;
2579     } else if (text.startsWith("0", pos)) {
2580       radix = 8;
2581     }
2582 
2583     final String numberText = text.substring(pos);
2584 
2585     long result = 0;
2586     if (numberText.length() < 16) {
2587       // Can safely assume no overflow.
2588       result = Long.parseLong(numberText, radix);
2589       if (negative) {
2590         result = -result;
2591       }
2592 
2593       // Check bounds.
2594       // No need to check for 64-bit numbers since they'd have to be 16 chars
2595       // or longer to overflow.
2596       if (!isLong) {
2597         if (isSigned) {
2598           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
2599             throw new NumberFormatException(
2600                 "Number out of range for 32-bit signed integer: " + text);
2601           }
2602         } else {
2603           if (result >= (1L << 32) || result < 0) {
2604             throw new NumberFormatException(
2605                 "Number out of range for 32-bit unsigned integer: " + text);
2606           }
2607         }
2608       }
2609     } else {
2610       BigInteger bigValue = new BigInteger(numberText, radix);
2611       if (negative) {
2612         bigValue = bigValue.negate();
2613       }
2614 
2615       // Check bounds.
2616       if (!isLong) {
2617         if (isSigned) {
2618           if (bigValue.bitLength() > 31) {
2619             throw new NumberFormatException(
2620                 "Number out of range for 32-bit signed integer: " + text);
2621           }
2622         } else {
2623           if (bigValue.bitLength() > 32) {
2624             throw new NumberFormatException(
2625                 "Number out of range for 32-bit unsigned integer: " + text);
2626           }
2627         }
2628       } else {
2629         if (isSigned) {
2630           if (bigValue.bitLength() > 63) {
2631             throw new NumberFormatException(
2632                 "Number out of range for 64-bit signed integer: " + text);
2633           }
2634         } else {
2635           if (bigValue.bitLength() > 64) {
2636             throw new NumberFormatException(
2637                 "Number out of range for 64-bit unsigned integer: " + text);
2638           }
2639         }
2640       }
2641 
2642       result = bigValue.longValue();
2643     }
2644 
2645     return result;
2646   }
2647 }
2648