• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 package com.google.protobuf;
32 
33 import com.google.protobuf.Descriptors.Descriptor;
34 import com.google.protobuf.Descriptors.EnumDescriptor;
35 import com.google.protobuf.Descriptors.EnumValueDescriptor;
36 import com.google.protobuf.Descriptors.FieldDescriptor;
37 import java.io.IOException;
38 import java.math.BigInteger;
39 import java.nio.CharBuffer;
40 import java.util.ArrayList;
41 import java.util.List;
42 import java.util.Locale;
43 import java.util.Map;
44 import java.util.logging.Logger;
45 import java.util.regex.Matcher;
46 import java.util.regex.Pattern;
47 
48 /**
49  * Provide text parsing and formatting support for proto2 instances. The implementation largely
50  * follows google/protobuf/text_format.cc.
51  *
52  * @author wenboz@google.com Wenbo Zhu
53  * @author kenton@google.com Kenton Varda
54  */
55 public final class TextFormat {
TextFormat()56   private TextFormat() {}
57 
58   private static final Logger logger = Logger.getLogger(TextFormat.class.getName());
59 
60   /**
61    * Outputs a textual representation of the Protocol Message supplied into the parameter output.
62    * (This representation is the new version of the classic "ProtocolPrinter" output from the
63    * original Protocol Buffer system)
64    *
65    * @deprecated Use {@code printer().print(MessageOrBuilder, Appendable)}
66    */
67   @Deprecated
print(final MessageOrBuilder message, final Appendable output)68   public static void print(final MessageOrBuilder message, final Appendable output)
69       throws IOException {
70     printer().print(message, output);
71   }
72 
73   /**
74    * Outputs a textual representation of {@code fields} to {@code output}.
75    *
76    * @deprecated Use {@code printer().print(UnknownFieldSet, Appendable)}
77    */
78   @Deprecated
print(final UnknownFieldSet fields, final Appendable output)79   public static void print(final UnknownFieldSet fields, final Appendable output)
80       throws IOException {
81     printer().print(fields, output);
82   }
83 
84   /**
85    * Same as {@code print()}, except that non-ASCII characters are not escaped.
86    *
87    * @deprecated Use {@code printer().escapingNonAscii(false).print(MessageOrBuilder, Appendable)}
88    */
89   @Deprecated
printUnicode(final MessageOrBuilder message, final Appendable output)90   public static void printUnicode(final MessageOrBuilder message, final Appendable output)
91       throws IOException {
92     printer().escapingNonAscii(false).print(message, output);
93   }
94 
95   /**
96    * Same as {@code print()}, except that non-ASCII characters are not escaped.
97    *
98    * @deprecated Use {@code printer().escapingNonAscii(false).print(UnknownFieldSet, Appendable)}
99    */
100   @Deprecated
printUnicode(final UnknownFieldSet fields, final Appendable output)101   public static void printUnicode(final UnknownFieldSet fields, final Appendable output)
102       throws IOException {
103     printer().escapingNonAscii(false).print(fields, output);
104   }
105 
106   /**
107    * Generates a human readable form of this message, useful for debugging and other purposes, with
108    * no newline characters. This is just a trivial wrapper around
109    * {@link TextFormat.Printer#shortDebugString(MessageOrBuilder)}.
110    */
shortDebugString(final MessageOrBuilder message)111   public static String shortDebugString(final MessageOrBuilder message) {
112     return printer().shortDebugString(message);
113   }
114 
115   /**
116    * Generates a human readable form of the field, useful for debugging and other purposes, with no
117    * newline characters.
118    *
119    * @deprecated Use {@code printer().shortDebugString(FieldDescriptor, Object)}
120    */
121   @Deprecated
shortDebugString(final FieldDescriptor field, final Object value)122   public static String shortDebugString(final FieldDescriptor field, final Object value) {
123     return printer().shortDebugString(field, value);
124   }
125 
126   /**
127    * Generates a human readable form of the unknown fields, useful for debugging and other purposes,
128    * with no newline characters.
129    *
130    * @deprecated Use {@code printer().shortDebugString(UnknownFieldSet)}
131    */
132   @Deprecated
shortDebugString(final UnknownFieldSet fields)133   public static String shortDebugString(final UnknownFieldSet fields) {
134     return printer().shortDebugString(fields);
135   }
136 
137   /**
138    * Like {@code print()}, but writes directly to a {@code String} and returns it.
139    *
140    * @deprecated Use {@link MessageOrBuilder#toString()}
141    */
142   @Deprecated
printToString(final MessageOrBuilder message)143   public static String printToString(final MessageOrBuilder message) {
144     return printer().printToString(message);
145   }
146 
147   /**
148    * Like {@code print()}, but writes directly to a {@code String} and returns it.
149    *
150    * @deprecated Use {@link UnknownFieldSet#toString()}
151    */
152   @Deprecated
printToString(final UnknownFieldSet fields)153   public static String printToString(final UnknownFieldSet fields) {
154     return printer().printToString(fields);
155   }
156 
157   /**
158    * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not
159    * escaped in backslash+octals.
160    *
161    * @deprecated Use {@code printer().escapingNonAscii(false).printToString(MessageOrBuilder)}
162    */
163   @Deprecated
printToUnicodeString(final MessageOrBuilder message)164   public static String printToUnicodeString(final MessageOrBuilder message) {
165     return printer().escapingNonAscii(false).printToString(message);
166   }
167 
168   /**
169    * Same as {@code printToString()}, except that non-ASCII characters in string type fields are not
170    * escaped in backslash+octals.
171    *
172    * @deprecated Use {@code printer().escapingNonAscii(false).printToString(UnknownFieldSet)}
173    */
174   @Deprecated
printToUnicodeString(final UnknownFieldSet fields)175   public static String printToUnicodeString(final UnknownFieldSet fields) {
176     return printer().escapingNonAscii(false).printToString(fields);
177   }
178 
179   /** @deprecated Use {@code printer().printField(FieldDescriptor, Object, Appendable)} */
180   @Deprecated
printField( final FieldDescriptor field, final Object value, final Appendable output)181   public static void printField(
182       final FieldDescriptor field, final Object value, final Appendable output) throws IOException {
183     printer().printField(field, value, output);
184   }
185 
186   /** @deprecated Use {@code printer().printFieldToString(FieldDescriptor, Object)} */
187   @Deprecated
printFieldToString(final FieldDescriptor field, final Object value)188   public static String printFieldToString(final FieldDescriptor field, final Object value) {
189     return printer().printFieldToString(field, value);
190   }
191 
192   /**
193    * Outputs a unicode textual representation of the value of given field value.
194    *
195    * <p>Same as {@code printFieldValue()}, except that non-ASCII characters in string type fields
196    * are not escaped in backslash+octals.
197    *
198    * @deprecated Use {@code printer().escapingNonAscii(false).printFieldValue(FieldDescriptor,
199    *     Object, Appendable)}
200    * @param field the descriptor of the field
201    * @param value the value of the field
202    * @param output the output to which to append the formatted value
203    * @throws ClassCastException if the value is not appropriate for the given field descriptor
204    * @throws IOException if there is an exception writing to the output
205    */
206   @Deprecated
printUnicodeFieldValue( final FieldDescriptor field, final Object value, final Appendable output)207   public static void printUnicodeFieldValue(
208       final FieldDescriptor field, final Object value, final Appendable output) throws IOException {
209     printer().escapingNonAscii(false).printFieldValue(field, value, output);
210   }
211 
212   /**
213    * Outputs a textual representation of the value of given field value.
214    *
215    * @deprecated Use {@code printer().printFieldValue(FieldDescriptor, Object, Appendable)}
216    * @param field the descriptor of the field
217    * @param value the value of the field
218    * @param output the output to which to append the formatted value
219    * @throws ClassCastException if the value is not appropriate for the given field descriptor
220    * @throws IOException if there is an exception writing to the output
221    */
222   @Deprecated
printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)223   public static void printFieldValue(
224       final FieldDescriptor field, final Object value, final Appendable output) throws IOException {
225     printer().printFieldValue(field, value, output);
226   }
227 
228   /**
229    * Outputs a textual representation of the value of an unknown field.
230    *
231    * @param tag the field's tag number
232    * @param value the value of the field
233    * @param output the output to which to append the formatted value
234    * @throws ClassCastException if the value is not appropriate for the given field descriptor
235    * @throws IOException if there is an exception writing to the output
236    */
printUnknownFieldValue( final int tag, final Object value, final Appendable output)237   public static void printUnknownFieldValue(
238       final int tag, final Object value, final Appendable output) throws IOException {
239     printUnknownFieldValue(tag, value, multiLineOutput(output));
240   }
241 
printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)242   private static void printUnknownFieldValue(
243       final int tag, final Object value, final TextGenerator generator) throws IOException {
244     switch (WireFormat.getTagWireType(tag)) {
245       case WireFormat.WIRETYPE_VARINT:
246         generator.print(unsignedToString((Long) value));
247         break;
248       case WireFormat.WIRETYPE_FIXED32:
249         generator.print(String.format((Locale) null, "0x%08x", (Integer) value));
250         break;
251       case WireFormat.WIRETYPE_FIXED64:
252         generator.print(String.format((Locale) null, "0x%016x", (Long) value));
253         break;
254       case WireFormat.WIRETYPE_LENGTH_DELIMITED:
255         try {
256           // Try to parse and print the field as an embedded message
257           UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
258           generator.print("{");
259           generator.eol();
260           generator.indent();
261           Printer.printUnknownFields(message, generator);
262           generator.outdent();
263           generator.print("}");
264         } catch (InvalidProtocolBufferException e) {
265           // If not parseable as a message, print as a String
266           generator.print("\"");
267           generator.print(escapeBytes((ByteString) value));
268           generator.print("\"");
269         }
270         break;
271       case WireFormat.WIRETYPE_START_GROUP:
272         Printer.printUnknownFields((UnknownFieldSet) value, generator);
273         break;
274       default:
275         throw new IllegalArgumentException("Bad tag: " + tag);
276     }
277   }
278 
279   /** Printer instance which escapes non-ASCII characters. */
printer()280   public static Printer printer() {
281     return Printer.DEFAULT;
282   }
283 
284   /** Helper class for converting protobufs to text. */
285   public static final class Printer {
286 
287     // Printer instance which escapes non-ASCII characters.
288     private static final Printer DEFAULT = new Printer(true);
289 
290     /** Whether to escape non ASCII characters with backslash and octal. */
291     private final boolean escapeNonAscii;
292 
Printer(boolean escapeNonAscii)293     private Printer(boolean escapeNonAscii) {
294       this.escapeNonAscii = escapeNonAscii;
295     }
296 
297     /**
298      * Return a new Printer instance with the specified escape mode.
299      *
300      * @param escapeNonAscii If true, the new Printer will escape non-ASCII characters (this is the
301      *     default behavior. If false, the new Printer will print non-ASCII characters as is. In
302      *     either case, the new Printer still escapes newlines and quotes in strings.
303      * @return a new Printer that clones all other configurations from the current {@link Printer},
304      *     with the escape mode set to the given parameter.
305      */
escapingNonAscii(boolean escapeNonAscii)306     public Printer escapingNonAscii(boolean escapeNonAscii) {
307       return new Printer(escapeNonAscii);
308     }
309 
310     /**
311      * Outputs a textual representation of the Protocol Message supplied into the parameter output.
312      * (This representation is the new version of the classic "ProtocolPrinter" output from the
313      * original Protocol Buffer system)
314      */
print(final MessageOrBuilder message, final Appendable output)315     public void print(final MessageOrBuilder message, final Appendable output) throws IOException {
316       print(message, multiLineOutput(output));
317     }
318 
319     /** Outputs a textual representation of {@code fields} to {@code output}. */
print(final UnknownFieldSet fields, final Appendable output)320     public void print(final UnknownFieldSet fields, final Appendable output) throws IOException {
321       printUnknownFields(fields, multiLineOutput(output));
322     }
323 
print(final MessageOrBuilder message, final TextGenerator generator)324     private void print(final MessageOrBuilder message, final TextGenerator generator)
325         throws IOException {
326       printMessage(message, generator);
327     }
328 
printFieldToString(final FieldDescriptor field, final Object value)329     public String printFieldToString(final FieldDescriptor field, final Object value) {
330       try {
331         final StringBuilder text = new StringBuilder();
332         printField(field, value, text);
333         return text.toString();
334       } catch (IOException e) {
335         throw new IllegalStateException(e);
336       }
337     }
338 
printField(final FieldDescriptor field, final Object value, final Appendable output)339     public void printField(final FieldDescriptor field, final Object value, final Appendable output)
340         throws IOException {
341       printField(field, value, multiLineOutput(output));
342     }
343 
printField( final FieldDescriptor field, final Object value, final TextGenerator generator)344     private void printField(
345         final FieldDescriptor field, final Object value, final TextGenerator generator)
346         throws IOException {
347       if (field.isRepeated()) {
348         // Repeated field.  Print each element.
349         for (Object element : (List<?>) value) {
350           printSingleField(field, element, generator);
351         }
352       } else {
353         printSingleField(field, value, generator);
354       }
355     }
356 
357     /**
358      * Outputs a textual representation of the value of given field value.
359      *
360      * @param field the descriptor of the field
361      * @param value the value of the field
362      * @param output the output to which to append the formatted value
363      * @throws ClassCastException if the value is not appropriate for the given field descriptor
364      * @throws IOException if there is an exception writing to the output
365      */
printFieldValue( final FieldDescriptor field, final Object value, final Appendable output)366     public void printFieldValue(
367         final FieldDescriptor field, final Object value, final Appendable output)
368         throws IOException {
369       printFieldValue(field, value, multiLineOutput(output));
370     }
371 
printFieldValue( final FieldDescriptor field, final Object value, final TextGenerator generator)372     private void printFieldValue(
373         final FieldDescriptor field, final Object value, final TextGenerator generator)
374         throws IOException {
375       switch (field.getType()) {
376         case INT32:
377         case SINT32:
378         case SFIXED32:
379           generator.print(((Integer) value).toString());
380           break;
381 
382         case INT64:
383         case SINT64:
384         case SFIXED64:
385           generator.print(((Long) value).toString());
386           break;
387 
388         case BOOL:
389           generator.print(((Boolean) value).toString());
390           break;
391 
392         case FLOAT:
393           generator.print(((Float) value).toString());
394           break;
395 
396         case DOUBLE:
397           generator.print(((Double) value).toString());
398           break;
399 
400         case UINT32:
401         case FIXED32:
402           generator.print(unsignedToString((Integer) value));
403           break;
404 
405         case UINT64:
406         case FIXED64:
407           generator.print(unsignedToString((Long) value));
408           break;
409 
410         case STRING:
411           generator.print("\"");
412           generator.print(
413               escapeNonAscii
414                   ? TextFormatEscaper.escapeText((String) value)
415                   : escapeDoubleQuotesAndBackslashes((String) value).replace("\n", "\\n"));
416           generator.print("\"");
417           break;
418 
419         case BYTES:
420           generator.print("\"");
421           if (value instanceof ByteString) {
422             generator.print(escapeBytes((ByteString) value));
423           } else {
424             generator.print(escapeBytes((byte[]) value));
425           }
426           generator.print("\"");
427           break;
428 
429         case ENUM:
430           generator.print(((EnumValueDescriptor) value).getName());
431           break;
432 
433         case MESSAGE:
434         case GROUP:
435           print((Message) value, generator);
436           break;
437       }
438     }
439 
440     /** Like {@code print()}, but writes directly to a {@code String} and returns it. */
printToString(final MessageOrBuilder message)441     public String printToString(final MessageOrBuilder message) {
442       try {
443         final StringBuilder text = new StringBuilder();
444         print(message, text);
445         return text.toString();
446       } catch (IOException e) {
447         throw new IllegalStateException(e);
448       }
449     }
450     /** Like {@code print()}, but writes directly to a {@code String} and returns it. */
printToString(final UnknownFieldSet fields)451     public String printToString(final UnknownFieldSet fields) {
452       try {
453         final StringBuilder text = new StringBuilder();
454         print(fields, text);
455         return text.toString();
456       } catch (IOException e) {
457         throw new IllegalStateException(e);
458       }
459     }
460 
461     /**
462      * Generates a human readable form of this message, useful for debugging and other purposes,
463      * with no newline characters.
464      */
shortDebugString(final MessageOrBuilder message)465     public String shortDebugString(final MessageOrBuilder message) {
466       try {
467         final StringBuilder text = new StringBuilder();
468         print(message, singleLineOutput(text));
469         return text.toString();
470       } catch (IOException e) {
471         throw new IllegalStateException(e);
472       }
473     }
474 
475     /**
476      * Generates a human readable form of the field, useful for debugging and other purposes, with
477      * no newline characters.
478      */
shortDebugString(final FieldDescriptor field, final Object value)479     public String shortDebugString(final FieldDescriptor field, final Object value) {
480       try {
481         final StringBuilder text = new StringBuilder();
482         printField(field, value, singleLineOutput(text));
483         return text.toString();
484       } catch (IOException e) {
485         throw new IllegalStateException(e);
486       }
487     }
488 
489     /**
490      * Generates a human readable form of the unknown fields, useful for debugging and other
491      * purposes, with no newline characters.
492      */
shortDebugString(final UnknownFieldSet fields)493     public String shortDebugString(final UnknownFieldSet fields) {
494       try {
495         final StringBuilder text = new StringBuilder();
496         printUnknownFields(fields, singleLineOutput(text));
497         return text.toString();
498       } catch (IOException e) {
499         throw new IllegalStateException(e);
500       }
501     }
502 
printUnknownFieldValue( final int tag, final Object value, final TextGenerator generator)503     private static void printUnknownFieldValue(
504         final int tag, final Object value, final TextGenerator generator) throws IOException {
505       switch (WireFormat.getTagWireType(tag)) {
506         case WireFormat.WIRETYPE_VARINT:
507           generator.print(unsignedToString((Long) value));
508           break;
509         case WireFormat.WIRETYPE_FIXED32:
510           generator.print(String.format((Locale) null, "0x%08x", (Integer) value));
511           break;
512         case WireFormat.WIRETYPE_FIXED64:
513           generator.print(String.format((Locale) null, "0x%016x", (Long) value));
514           break;
515         case WireFormat.WIRETYPE_LENGTH_DELIMITED:
516           try {
517             // Try to parse and print the field as an embedded message
518             UnknownFieldSet message = UnknownFieldSet.parseFrom((ByteString) value);
519             generator.print("{");
520             generator.eol();
521             generator.indent();
522             printUnknownFields(message, generator);
523             generator.outdent();
524             generator.print("}");
525           } catch (InvalidProtocolBufferException e) {
526             // If not parseable as a message, print as a String
527             generator.print("\"");
528             generator.print(escapeBytes((ByteString) value));
529             generator.print("\"");
530           }
531           break;
532         case WireFormat.WIRETYPE_START_GROUP:
533           printUnknownFields((UnknownFieldSet) value, generator);
534           break;
535         default:
536           throw new IllegalArgumentException("Bad tag: " + tag);
537       }
538     }
539 
printMessage(final MessageOrBuilder message, final TextGenerator generator)540     private void printMessage(final MessageOrBuilder message, final TextGenerator generator)
541         throws IOException {
542       for (Map.Entry<FieldDescriptor, Object> field : message.getAllFields().entrySet()) {
543         printField(field.getKey(), field.getValue(), generator);
544       }
545       printUnknownFields(message.getUnknownFields(), generator);
546     }
547 
printSingleField( final FieldDescriptor field, final Object value, final TextGenerator generator)548     private void printSingleField(
549         final FieldDescriptor field, final Object value, final TextGenerator generator)
550         throws IOException {
551       if (field.isExtension()) {
552         generator.print("[");
553         // We special-case MessageSet elements for compatibility with proto1.
554         if (field.getContainingType().getOptions().getMessageSetWireFormat()
555             && (field.getType() == FieldDescriptor.Type.MESSAGE)
556             && (field.isOptional())
557             // object equality
558             && (field.getExtensionScope() == field.getMessageType())) {
559           generator.print(field.getMessageType().getFullName());
560         } else {
561           generator.print(field.getFullName());
562         }
563         generator.print("]");
564       } else {
565         if (field.getType() == FieldDescriptor.Type.GROUP) {
566           // Groups must be serialized with their original capitalization.
567           generator.print(field.getMessageType().getName());
568         } else {
569           generator.print(field.getName());
570         }
571       }
572 
573       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
574         generator.print(" {");
575         generator.eol();
576         generator.indent();
577       } else {
578         generator.print(": ");
579       }
580 
581       printFieldValue(field, value, generator);
582 
583       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
584         generator.outdent();
585         generator.print("}");
586       }
587       generator.eol();
588     }
589 
printUnknownFields( final UnknownFieldSet unknownFields, final TextGenerator generator)590     private static void printUnknownFields(
591         final UnknownFieldSet unknownFields, final TextGenerator generator) throws IOException {
592       for (Map.Entry<Integer, UnknownFieldSet.Field> entry : unknownFields.asMap().entrySet()) {
593         final int number = entry.getKey();
594         final UnknownFieldSet.Field field = entry.getValue();
595         printUnknownField(number, WireFormat.WIRETYPE_VARINT, field.getVarintList(), generator);
596         printUnknownField(number, WireFormat.WIRETYPE_FIXED32, field.getFixed32List(), generator);
597         printUnknownField(number, WireFormat.WIRETYPE_FIXED64, field.getFixed64List(), generator);
598         printUnknownField(
599             number,
600             WireFormat.WIRETYPE_LENGTH_DELIMITED,
601             field.getLengthDelimitedList(),
602             generator);
603         for (final UnknownFieldSet value : field.getGroupList()) {
604           generator.print(entry.getKey().toString());
605           generator.print(" {");
606           generator.eol();
607           generator.indent();
608           printUnknownFields(value, generator);
609           generator.outdent();
610           generator.print("}");
611           generator.eol();
612         }
613       }
614     }
615 
printUnknownField( final int number, final int wireType, final List<?> values, final TextGenerator generator)616     private static void printUnknownField(
617         final int number, final int wireType, final List<?> values, final TextGenerator generator)
618         throws IOException {
619       for (final Object value : values) {
620         generator.print(String.valueOf(number));
621         generator.print(": ");
622         printUnknownFieldValue(wireType, value, generator);
623         generator.eol();
624       }
625     }
626   }
627 
628   /** Convert an unsigned 32-bit integer to a string. */
unsignedToString(final int value)629   public static String unsignedToString(final int value) {
630     if (value >= 0) {
631       return Integer.toString(value);
632     } else {
633       return Long.toString(value & 0x00000000FFFFFFFFL);
634     }
635   }
636 
637   /** Convert an unsigned 64-bit integer to a string. */
unsignedToString(final long value)638   public static String unsignedToString(final long value) {
639     if (value >= 0) {
640       return Long.toString(value);
641     } else {
642       // Pull off the most-significant bit so that BigInteger doesn't think
643       // the number is negative, then set it again using setBit().
644       return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL).setBit(63).toString();
645     }
646   }
647 
multiLineOutput(Appendable output)648   private static TextGenerator multiLineOutput(Appendable output) {
649     return new TextGenerator(output, false);
650   }
651 
singleLineOutput(Appendable output)652   private static TextGenerator singleLineOutput(Appendable output) {
653     return new TextGenerator(output, true);
654   }
655 
656   /** An inner class for writing text to the output stream. */
657   private static final class TextGenerator {
658     private final Appendable output;
659     private final StringBuilder indent = new StringBuilder();
660     private final boolean singleLineMode;
661     // While technically we are "at the start of a line" at the very beginning of the output, all
662     // we would do in response to this is emit the (zero length) indentation, so it has no effect.
663     // Setting it false here does however suppress an unwanted leading space in single-line mode.
664     private boolean atStartOfLine = false;
665 
TextGenerator(final Appendable output, boolean singleLineMode)666     private TextGenerator(final Appendable output, boolean singleLineMode) {
667       this.output = output;
668       this.singleLineMode = singleLineMode;
669     }
670 
671     /**
672      * Indent text by two spaces. After calling Indent(), two spaces will be inserted at the
673      * beginning of each line of text. Indent() may be called multiple times to produce deeper
674      * indents.
675      */
indent()676     public void indent() {
677       indent.append("  ");
678     }
679 
680     /** Reduces the current indent level by two spaces, or crashes if the indent level is zero. */
outdent()681     public void outdent() {
682       final int length = indent.length();
683       if (length == 0) {
684         throw new IllegalArgumentException(" Outdent() without matching Indent().");
685       }
686       indent.setLength(length - 2);
687     }
688 
689     /**
690      * Print text to the output stream. Bare newlines are never expected to be passed to this
691      * method; to indicate the end of a line, call "eol()".
692      */
print(final CharSequence text)693     public void print(final CharSequence text) throws IOException {
694       if (atStartOfLine) {
695         atStartOfLine = false;
696         output.append(singleLineMode ? " " : indent);
697       }
698       output.append(text);
699     }
700 
701     /**
702      * Signifies reaching the "end of the current line" in the output. In single-line mode, this
703      * does not result in a newline being emitted, but ensures that a separating space is written
704      * before the next output.
705      */
eol()706     public void eol() throws IOException {
707       if (!singleLineMode) {
708         output.append("\n");
709       }
710       atStartOfLine = true;
711     }
712   }
713 
714   // =================================================================
715   // Parsing
716 
717   /**
718    * Represents a stream of tokens parsed from a {@code String}.
719    *
720    * <p>The Java standard library provides many classes that you might think would be useful for
721    * implementing this, but aren't. For example:
722    *
723    * <ul>
724    *   <li>{@code java.io.StreamTokenizer}: This almost does what we want -- or, at least, something
725    *       that would get us close to what we want -- except for one fatal flaw: It automatically
726    *       un-escapes strings using Java escape sequences, which do not include all the escape
727    *       sequences we need to support (e.g. '\x').
728    *   <li>{@code java.util.Scanner}: This seems like a great way at least to parse regular
729    *       expressions out of a stream (so we wouldn't have to load the entire input into a single
730    *       string before parsing). Sadly, {@code Scanner} requires that tokens be delimited with
731    *       some delimiter. Thus, although the text "foo:" should parse to two tokens ("foo" and
732    *       ":"), {@code Scanner} would recognize it only as a single token. Furthermore, {@code
733    *       Scanner} provides no way to inspect the contents of delimiters, making it impossible to
734    *       keep track of line and column numbers.
735    * </ul>
736    *
737    * <p>Luckily, Java's regular expression support does manage to be useful to us. (Barely: We need
738    * {@code Matcher.usePattern()}, which is new in Java 1.5.) So, we can use that, at least.
739    * Unfortunately, this implies that we need to have the entire input in one contiguous string.
740    */
741   private static final class Tokenizer {
742     private final CharSequence text;
743     private final Matcher matcher;
744     private String currentToken;
745 
746     // The character index within this.text at which the current token begins.
747     private int pos = 0;
748 
749     // The line and column numbers of the current token.
750     private int line = 0;
751     private int column = 0;
752 
753     // The line and column numbers of the previous token (allows throwing
754     // errors *after* consuming).
755     private int previousLine = 0;
756     private int previousColumn = 0;
757 
758     // We use possessive quantifiers (*+ and ++) because otherwise the Java
759     // regex matcher has stack overflows on large inputs.
760     private static final Pattern WHITESPACE = Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
761     private static final Pattern TOKEN =
762         Pattern.compile(
763             "[a-zA-Z_][0-9a-zA-Z_+-]*+|" // an identifier
764                 + "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" // a number
765                 + "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" // a double-quoted string
766                 + "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)", // a single-quoted string
767             Pattern.MULTILINE);
768 
769     private static final Pattern DOUBLE_INFINITY =
770         Pattern.compile("-?inf(inity)?", Pattern.CASE_INSENSITIVE);
771     private static final Pattern FLOAT_INFINITY =
772         Pattern.compile("-?inf(inity)?f?", Pattern.CASE_INSENSITIVE);
773     private static final Pattern FLOAT_NAN = Pattern.compile("nanf?", Pattern.CASE_INSENSITIVE);
774 
775     /** Construct a tokenizer that parses tokens from the given text. */
Tokenizer(final CharSequence text)776     private Tokenizer(final CharSequence text) {
777       this.text = text;
778       this.matcher = WHITESPACE.matcher(text);
779       skipWhitespace();
780       nextToken();
781     }
782 
getPreviousLine()783     int getPreviousLine() {
784       return previousLine;
785     }
786 
getPreviousColumn()787     int getPreviousColumn() {
788       return previousColumn;
789     }
790 
getLine()791     int getLine() {
792       return line;
793     }
794 
getColumn()795     int getColumn() {
796       return column;
797     }
798 
799     /** Are we at the end of the input? */
atEnd()800     public boolean atEnd() {
801       return currentToken.length() == 0;
802     }
803 
804     /** Advance to the next token. */
nextToken()805     public void nextToken() {
806       previousLine = line;
807       previousColumn = column;
808 
809       // Advance the line counter to the current position.
810       while (pos < matcher.regionStart()) {
811         if (text.charAt(pos) == '\n') {
812           ++line;
813           column = 0;
814         } else {
815           ++column;
816         }
817         ++pos;
818       }
819 
820       // Match the next token.
821       if (matcher.regionStart() == matcher.regionEnd()) {
822         // EOF
823         currentToken = "";
824       } else {
825         matcher.usePattern(TOKEN);
826         if (matcher.lookingAt()) {
827           currentToken = matcher.group();
828           matcher.region(matcher.end(), matcher.regionEnd());
829         } else {
830           // Take one character.
831           currentToken = String.valueOf(text.charAt(pos));
832           matcher.region(pos + 1, matcher.regionEnd());
833         }
834 
835         skipWhitespace();
836       }
837     }
838 
839     /** Skip over any whitespace so that the matcher region starts at the next token. */
skipWhitespace()840     private void skipWhitespace() {
841       matcher.usePattern(WHITESPACE);
842       if (matcher.lookingAt()) {
843         matcher.region(matcher.end(), matcher.regionEnd());
844       }
845     }
846 
847     /**
848      * If the next token exactly matches {@code token}, consume it and return {@code true}.
849      * Otherwise, return {@code false} without doing anything.
850      */
tryConsume(final String token)851     public boolean tryConsume(final String token) {
852       if (currentToken.equals(token)) {
853         nextToken();
854         return true;
855       } else {
856         return false;
857       }
858     }
859 
860     /**
861      * If the next token exactly matches {@code token}, consume it. Otherwise, throw a {@link
862      * ParseException}.
863      */
consume(final String token)864     public void consume(final String token) throws ParseException {
865       if (!tryConsume(token)) {
866         throw parseException("Expected \"" + token + "\".");
867       }
868     }
869 
870     /** Returns {@code true} if the next token is an integer, but does not consume it. */
lookingAtInteger()871     public boolean lookingAtInteger() {
872       if (currentToken.length() == 0) {
873         return false;
874       }
875 
876       final char c = currentToken.charAt(0);
877       return ('0' <= c && c <= '9') || c == '-' || c == '+';
878     }
879 
880     /** Returns {@code true} if the current token's text is equal to that specified. */
lookingAt(String text)881     public boolean lookingAt(String text) {
882       return currentToken.equals(text);
883     }
884 
885     /**
886      * If the next token is an identifier, consume it and return its value. Otherwise, throw a
887      * {@link ParseException}.
888      */
consumeIdentifier()889     public String consumeIdentifier() throws ParseException {
890       for (int i = 0; i < currentToken.length(); i++) {
891         final char c = currentToken.charAt(i);
892         if (('a' <= c && c <= 'z')
893             || ('A' <= c && c <= 'Z')
894             || ('0' <= c && c <= '9')
895             || (c == '_')
896             || (c == '.')) {
897           // OK
898         } else {
899           throw parseException("Expected identifier. Found '" + currentToken + "'");
900         }
901       }
902 
903       final String result = currentToken;
904       nextToken();
905       return result;
906     }
907 
908     /**
909      * If the next token is an identifier, consume it and return {@code true}. Otherwise, return
910      * {@code false} without doing anything.
911      */
tryConsumeIdentifier()912     public boolean tryConsumeIdentifier() {
913       try {
914         consumeIdentifier();
915         return true;
916       } catch (ParseException e) {
917         return false;
918       }
919     }
920 
921     /**
922      * If the next token is a 32-bit signed integer, consume it and return its value. Otherwise,
923      * throw a {@link ParseException}.
924      */
consumeInt32()925     public int consumeInt32() throws ParseException {
926       try {
927         final int result = parseInt32(currentToken);
928         nextToken();
929         return result;
930       } catch (NumberFormatException e) {
931         throw integerParseException(e);
932       }
933     }
934 
935     /**
936      * If the next token is a 32-bit unsigned integer, consume it and return its value. Otherwise,
937      * throw a {@link ParseException}.
938      */
consumeUInt32()939     public int consumeUInt32() throws ParseException {
940       try {
941         final int result = parseUInt32(currentToken);
942         nextToken();
943         return result;
944       } catch (NumberFormatException e) {
945         throw integerParseException(e);
946       }
947     }
948 
949     /**
950      * If the next token is a 64-bit signed integer, consume it and return its value. Otherwise,
951      * throw a {@link ParseException}.
952      */
consumeInt64()953     public long consumeInt64() throws ParseException {
954       try {
955         final long result = parseInt64(currentToken);
956         nextToken();
957         return result;
958       } catch (NumberFormatException e) {
959         throw integerParseException(e);
960       }
961     }
962 
963     /**
964      * If the next token is a 64-bit signed integer, consume it and return {@code true}. Otherwise,
965      * return {@code false} without doing anything.
966      */
tryConsumeInt64()967     public boolean tryConsumeInt64() {
968       try {
969         consumeInt64();
970         return true;
971       } catch (ParseException e) {
972         return false;
973       }
974     }
975 
976     /**
977      * If the next token is a 64-bit unsigned integer, consume it and return its value. Otherwise,
978      * throw a {@link ParseException}.
979      */
consumeUInt64()980     public long consumeUInt64() throws ParseException {
981       try {
982         final long result = parseUInt64(currentToken);
983         nextToken();
984         return result;
985       } catch (NumberFormatException e) {
986         throw integerParseException(e);
987       }
988     }
989 
990     /**
991      * If the next token is a 64-bit unsigned integer, consume it and return {@code true}.
992      * Otherwise, return {@code false} without doing anything.
993      */
tryConsumeUInt64()994     public boolean tryConsumeUInt64() {
995       try {
996         consumeUInt64();
997         return true;
998       } catch (ParseException e) {
999         return false;
1000       }
1001     }
1002 
1003     /**
1004      * If the next token is a double, consume it and return its value. Otherwise, throw a {@link
1005      * ParseException}.
1006      */
consumeDouble()1007     public double consumeDouble() throws ParseException {
1008       // We need to parse infinity and nan separately because
1009       // Double.parseDouble() does not accept "inf", "infinity", or "nan".
1010       if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
1011         final boolean negative = currentToken.startsWith("-");
1012         nextToken();
1013         return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
1014       }
1015       if (currentToken.equalsIgnoreCase("nan")) {
1016         nextToken();
1017         return Double.NaN;
1018       }
1019       try {
1020         final double result = Double.parseDouble(currentToken);
1021         nextToken();
1022         return result;
1023       } catch (NumberFormatException e) {
1024         throw floatParseException(e);
1025       }
1026     }
1027 
1028     /**
1029      * If the next token is a double, consume it and return {@code true}. Otherwise, return {@code
1030      * false} without doing anything.
1031      */
tryConsumeDouble()1032     public boolean tryConsumeDouble() {
1033       try {
1034         consumeDouble();
1035         return true;
1036       } catch (ParseException e) {
1037         return false;
1038       }
1039     }
1040 
1041     /**
1042      * If the next token is a float, consume it and return its value. Otherwise, throw a {@link
1043      * ParseException}.
1044      */
consumeFloat()1045     public float consumeFloat() throws ParseException {
1046       // We need to parse infinity and nan separately because
1047       // Float.parseFloat() does not accept "inf", "infinity", or "nan".
1048       if (FLOAT_INFINITY.matcher(currentToken).matches()) {
1049         final boolean negative = currentToken.startsWith("-");
1050         nextToken();
1051         return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
1052       }
1053       if (FLOAT_NAN.matcher(currentToken).matches()) {
1054         nextToken();
1055         return Float.NaN;
1056       }
1057       try {
1058         final float result = Float.parseFloat(currentToken);
1059         nextToken();
1060         return result;
1061       } catch (NumberFormatException e) {
1062         throw floatParseException(e);
1063       }
1064     }
1065 
1066     /**
1067      * If the next token is a float, consume it and return {@code true}. Otherwise, return {@code
1068      * false} without doing anything.
1069      */
tryConsumeFloat()1070     public boolean tryConsumeFloat() {
1071       try {
1072         consumeFloat();
1073         return true;
1074       } catch (ParseException e) {
1075         return false;
1076       }
1077     }
1078 
1079     /**
1080      * If the next token is a boolean, consume it and return its value. Otherwise, throw a {@link
1081      * ParseException}.
1082      */
consumeBoolean()1083     public boolean consumeBoolean() throws ParseException {
1084       if (currentToken.equals("true")
1085           || currentToken.equals("True")
1086           || currentToken.equals("t")
1087           || currentToken.equals("1")) {
1088         nextToken();
1089         return true;
1090       } else if (currentToken.equals("false")
1091           || currentToken.equals("False")
1092           || currentToken.equals("f")
1093           || currentToken.equals("0")) {
1094         nextToken();
1095         return false;
1096       } else {
1097         throw parseException("Expected \"true\" or \"false\". Found \"" + currentToken + "\".");
1098       }
1099     }
1100 
1101     /**
1102      * If the next token is a string, consume it and return its (unescaped) value. Otherwise, throw
1103      * a {@link ParseException}.
1104      */
consumeString()1105     public String consumeString() throws ParseException {
1106       return consumeByteString().toStringUtf8();
1107     }
1108 
1109     /** If the next token is a string, consume it and return true. Otherwise, return false. */
tryConsumeString()1110     public boolean tryConsumeString() {
1111       try {
1112         consumeString();
1113         return true;
1114       } catch (ParseException e) {
1115         return false;
1116       }
1117     }
1118 
1119     /**
1120      * If the next token is a string, consume it, unescape it as a {@link ByteString}, and return
1121      * it. Otherwise, throw a {@link ParseException}.
1122      */
consumeByteString()1123     public ByteString consumeByteString() throws ParseException {
1124       List<ByteString> list = new ArrayList<ByteString>();
1125       consumeByteString(list);
1126       while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
1127         consumeByteString(list);
1128       }
1129       return ByteString.copyFrom(list);
1130     }
1131 
1132     /**
1133      * Like {@link #consumeByteString()} but adds each token of the string to the given list. String
1134      * literals (whether bytes or text) may come in multiple adjacent tokens which are automatically
1135      * concatenated, like in C or Python.
1136      */
consumeByteString(List<ByteString> list)1137     private void consumeByteString(List<ByteString> list) throws ParseException {
1138       final char quote = currentToken.length() > 0 ? currentToken.charAt(0) : '\0';
1139       if (quote != '\"' && quote != '\'') {
1140         throw parseException("Expected string.");
1141       }
1142 
1143       if (currentToken.length() < 2 || currentToken.charAt(currentToken.length() - 1) != quote) {
1144         throw parseException("String missing ending quote.");
1145       }
1146 
1147       try {
1148         final String escaped = currentToken.substring(1, currentToken.length() - 1);
1149         final ByteString result = unescapeBytes(escaped);
1150         nextToken();
1151         list.add(result);
1152       } catch (InvalidEscapeSequenceException e) {
1153         throw parseException(e.getMessage());
1154       }
1155     }
1156 
1157     /**
1158      * Returns a {@link ParseException} with the current line and column numbers in the description,
1159      * suitable for throwing.
1160      */
parseException(final String description)1161     public ParseException parseException(final String description) {
1162       // Note:  People generally prefer one-based line and column numbers.
1163       return new ParseException(line + 1, column + 1, description);
1164     }
1165 
1166     /**
1167      * Returns a {@link ParseException} with the line and column numbers of the previous token in
1168      * the description, suitable for throwing.
1169      */
parseExceptionPreviousToken(final String description)1170     public ParseException parseExceptionPreviousToken(final String description) {
1171       // Note:  People generally prefer one-based line and column numbers.
1172       return new ParseException(previousLine + 1, previousColumn + 1, description);
1173     }
1174 
1175     /**
1176      * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException}
1177      * when trying to parse an integer.
1178      */
integerParseException(final NumberFormatException e)1179     private ParseException integerParseException(final NumberFormatException e) {
1180       return parseException("Couldn't parse integer: " + e.getMessage());
1181     }
1182 
1183     /**
1184      * Constructs an appropriate {@link ParseException} for the given {@code NumberFormatException}
1185      * when trying to parse a float or double.
1186      */
floatParseException(final NumberFormatException e)1187     private ParseException floatParseException(final NumberFormatException e) {
1188       return parseException("Couldn't parse number: " + e.getMessage());
1189     }
1190 
1191     /**
1192      * Returns a {@link UnknownFieldParseException} with the line and column numbers of the previous
1193      * token in the description, and the unknown field name, suitable for throwing.
1194      */
unknownFieldParseExceptionPreviousToken( final String unknownField, final String description)1195     public UnknownFieldParseException unknownFieldParseExceptionPreviousToken(
1196         final String unknownField, final String description) {
1197       // Note:  People generally prefer one-based line and column numbers.
1198       return new UnknownFieldParseException(
1199           previousLine + 1, previousColumn + 1, unknownField, description);
1200     }
1201   }
1202 
1203   /** Thrown when parsing an invalid text format message. */
1204   public static class ParseException extends IOException {
1205     private static final long serialVersionUID = 3196188060225107702L;
1206 
1207     private final int line;
1208     private final int column;
1209 
1210     /** Create a new instance, with -1 as the line and column numbers. */
ParseException(final String message)1211     public ParseException(final String message) {
1212       this(-1, -1, message);
1213     }
1214 
1215     /**
1216      * Create a new instance
1217      *
1218      * @param line the line number where the parse error occurred, using 1-offset.
1219      * @param column the column number where the parser error occurred, using 1-offset.
1220      */
ParseException(final int line, final int column, final String message)1221     public ParseException(final int line, final int column, final String message) {
1222       super(Integer.toString(line) + ":" + column + ": " + message);
1223       this.line = line;
1224       this.column = column;
1225     }
1226 
1227     /**
1228      * Return the line where the parse exception occurred, or -1 when none is provided. The value is
1229      * specified as 1-offset, so the first line is line 1.
1230      */
getLine()1231     public int getLine() {
1232       return line;
1233     }
1234 
1235     /**
1236      * Return the column where the parse exception occurred, or -1 when none is provided. The value
1237      * is specified as 1-offset, so the first line is line 1.
1238      */
getColumn()1239     public int getColumn() {
1240       return column;
1241     }
1242   }
1243 
1244   /** Thrown when encountering an unknown field while parsing a text format message. */
1245   public static class UnknownFieldParseException extends ParseException {
1246     private final String unknownField;
1247 
1248     /**
1249      * Create a new instance, with -1 as the line and column numbers, and an empty unknown field
1250      * name.
1251      */
UnknownFieldParseException(final String message)1252     public UnknownFieldParseException(final String message) {
1253       this(-1, -1, "", message);
1254     }
1255 
1256     /**
1257      * Create a new instance
1258      *
1259      * @param line the line number where the parse error occurred, using 1-offset.
1260      * @param column the column number where the parser error occurred, using 1-offset.
1261      * @param unknownField the name of the unknown field found while parsing.
1262      */
UnknownFieldParseException( final int line, final int column, final String unknownField, final String message)1263     public UnknownFieldParseException(
1264         final int line, final int column, final String unknownField, final String message) {
1265       super(line, column, message);
1266       this.unknownField = unknownField;
1267     }
1268 
1269     /**
1270      * Return the name of the unknown field encountered while parsing the protocol buffer string.
1271      */
getUnknownField()1272     public String getUnknownField() {
1273       return unknownField;
1274     }
1275   }
1276 
1277   private static final Parser PARSER = Parser.newBuilder().build();
1278 
1279   /**
1280    * Return a {@link Parser} instance which can parse text-format messages. The returned instance is
1281    * thread-safe.
1282    */
getParser()1283   public static Parser getParser() {
1284     return PARSER;
1285   }
1286 
1287   /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */
merge(final Readable input, final Message.Builder builder)1288   public static void merge(final Readable input, final Message.Builder builder) throws IOException {
1289     PARSER.merge(input, builder);
1290   }
1291 
1292   /** Parse a text-format message from {@code input} and merge the contents into {@code builder}. */
merge(final CharSequence input, final Message.Builder builder)1293   public static void merge(final CharSequence input, final Message.Builder builder)
1294       throws ParseException {
1295     PARSER.merge(input, builder);
1296   }
1297 
1298   /**
1299    * Parse a text-format message from {@code input}.
1300    *
1301    * @return the parsed message, guaranteed initialized
1302    */
parse(final CharSequence input, final Class<T> protoClass)1303   public static <T extends Message> T parse(final CharSequence input, final Class<T> protoClass)
1304       throws ParseException {
1305     Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType();
1306     merge(input, builder);
1307     @SuppressWarnings("unchecked")
1308     T output = (T) builder.build();
1309     return output;
1310   }
1311 
1312   /**
1313    * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1314    * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1315    */
merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1316   public static void merge(
1317       final Readable input,
1318       final ExtensionRegistry extensionRegistry,
1319       final Message.Builder builder)
1320       throws IOException {
1321     PARSER.merge(input, extensionRegistry, builder);
1322   }
1323 
1324 
1325   /**
1326    * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1327    * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1328    */
merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1329   public static void merge(
1330       final CharSequence input,
1331       final ExtensionRegistry extensionRegistry,
1332       final Message.Builder builder)
1333       throws ParseException {
1334     PARSER.merge(input, extensionRegistry, builder);
1335   }
1336 
1337   /**
1338    * Parse a text-format message from {@code input}. Extensions will be recognized if they are
1339    * registered in {@code extensionRegistry}.
1340    *
1341    * @return the parsed message, guaranteed initialized
1342    */
parse( final CharSequence input, final ExtensionRegistry extensionRegistry, final Class<T> protoClass)1343   public static <T extends Message> T parse(
1344       final CharSequence input,
1345       final ExtensionRegistry extensionRegistry,
1346       final Class<T> protoClass)
1347       throws ParseException {
1348     Message.Builder builder = Internal.getDefaultInstance(protoClass).newBuilderForType();
1349     merge(input, extensionRegistry, builder);
1350     @SuppressWarnings("unchecked")
1351     T output = (T) builder.build();
1352     return output;
1353   }
1354 
1355 
1356   /**
1357    * Parser for text-format proto2 instances. This class is thread-safe. The implementation largely
1358    * follows google/protobuf/text_format.cc.
1359    *
1360    * <p>Use {@link TextFormat#getParser()} to obtain the default parser, or {@link Builder} to
1361    * control the parser behavior.
1362    */
1363   public static class Parser {
1364     /**
1365      * Determines if repeated values for non-repeated fields and oneofs are permitted. For example,
1366      * given required/optional field "foo" and a oneof containing "baz" and "qux":
1367      *
1368      * <ul>
1369      *   <li>"foo: 1 foo: 2"
1370      *   <li>"baz: 1 qux: 2"
1371      *   <li>merging "foo: 2" into a proto in which foo is already set, or
1372      *   <li>merging "qux: 2" into a proto in which baz is already set.
1373      * </ul>
1374      */
1375     public enum SingularOverwritePolicy {
1376       /**
1377        * Later values are merged with earlier values. For primitive fields or conflicting oneofs,
1378        * the last value is retained.
1379        */
1380       ALLOW_SINGULAR_OVERWRITES,
1381       /** An error is issued. */
1382       FORBID_SINGULAR_OVERWRITES
1383     }
1384 
1385     private final boolean allowUnknownFields;
1386     private final boolean allowUnknownEnumValues;
1387     private final boolean allowUnknownExtensions;
1388     private final SingularOverwritePolicy singularOverwritePolicy;
1389     private TextFormatParseInfoTree.Builder parseInfoTreeBuilder;
1390 
Parser( boolean allowUnknownFields, boolean allowUnknownEnumValues, boolean allowUnknownExtensions, SingularOverwritePolicy singularOverwritePolicy, TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1391     private Parser(
1392         boolean allowUnknownFields,
1393         boolean allowUnknownEnumValues,
1394         boolean allowUnknownExtensions,
1395         SingularOverwritePolicy singularOverwritePolicy,
1396         TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1397       this.allowUnknownFields = allowUnknownFields;
1398       this.allowUnknownEnumValues = allowUnknownEnumValues;
1399       this.allowUnknownExtensions = allowUnknownExtensions;
1400       this.singularOverwritePolicy = singularOverwritePolicy;
1401       this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1402     }
1403 
1404     /** Returns a new instance of {@link Builder}. */
newBuilder()1405     public static Builder newBuilder() {
1406       return new Builder();
1407     }
1408 
1409     /** Builder that can be used to obtain new instances of {@link Parser}. */
1410     public static class Builder {
1411       private boolean allowUnknownFields = false;
1412       private boolean allowUnknownEnumValues = false;
1413       private boolean allowUnknownExtensions = false;
1414       private SingularOverwritePolicy singularOverwritePolicy =
1415           SingularOverwritePolicy.ALLOW_SINGULAR_OVERWRITES;
1416       private TextFormatParseInfoTree.Builder parseInfoTreeBuilder = null;
1417 
1418       /**
1419        * Set whether this parser will allow unknown fields. By default, an exception is thrown if an
1420        * unknown field is encountered. If this is set, the parser will only log a warning. Allow
1421        * unknown fields will also allow unknown extensions.
1422        *
1423        * <p>Use of this parameter is discouraged which may hide some errors (e.g.
1424        * spelling error on field name).
1425        */
setAllowUnknownFields(boolean allowUnknownFields)1426       public Builder setAllowUnknownFields(boolean allowUnknownFields) {
1427         this.allowUnknownFields = allowUnknownFields;
1428         return this;
1429       }
1430 
1431       /**
1432        * Set whether this parser will allow unknown extensions. By default, an
1433        * exception is thrown if unknown extension is encountered. If this is set true,
1434        * the parser will only log a warning. Allow unknown extensions does not mean
1435        * allow normal unknown fields.
1436        */
setAllowUnknownExtensions(boolean allowUnknownExtensions)1437       public Builder setAllowUnknownExtensions(boolean allowUnknownExtensions) {
1438         this.allowUnknownExtensions = allowUnknownExtensions;
1439         return this;
1440       }
1441 
1442       /** Sets parser behavior when a non-repeated field appears more than once. */
setSingularOverwritePolicy(SingularOverwritePolicy p)1443       public Builder setSingularOverwritePolicy(SingularOverwritePolicy p) {
1444         this.singularOverwritePolicy = p;
1445         return this;
1446       }
1447 
setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder)1448       public Builder setParseInfoTreeBuilder(TextFormatParseInfoTree.Builder parseInfoTreeBuilder) {
1449         this.parseInfoTreeBuilder = parseInfoTreeBuilder;
1450         return this;
1451       }
1452 
build()1453       public Parser build() {
1454         return new Parser(
1455             allowUnknownFields,
1456             allowUnknownEnumValues,
1457             allowUnknownExtensions,
1458             singularOverwritePolicy,
1459             parseInfoTreeBuilder);
1460       }
1461     }
1462 
1463     /**
1464      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1465      */
merge(final Readable input, final Message.Builder builder)1466     public void merge(final Readable input, final Message.Builder builder) throws IOException {
1467       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1468     }
1469 
1470     /**
1471      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1472      */
merge(final CharSequence input, final Message.Builder builder)1473     public void merge(final CharSequence input, final Message.Builder builder)
1474         throws ParseException {
1475       merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
1476     }
1477 
1478     /**
1479      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1480      * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1481      */
merge( final Readable input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1482     public void merge(
1483         final Readable input,
1484         final ExtensionRegistry extensionRegistry,
1485         final Message.Builder builder)
1486         throws IOException {
1487       // Read the entire input to a String then parse that.
1488 
1489       // If StreamTokenizer were not quite so crippled, or if there were a kind
1490       // of Reader that could read in chunks that match some particular regex,
1491       // or if we wanted to write a custom Reader to tokenize our stream, then
1492       // we would not have to read to one big String.  Alas, none of these is
1493       // the case.  Oh well.
1494 
1495       merge(toStringBuilder(input), extensionRegistry, builder);
1496     }
1497 
1498 
1499     private static final int BUFFER_SIZE = 4096;
1500 
1501     // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
1502     // overhead is worthwhile
toStringBuilder(final Readable input)1503     private static StringBuilder toStringBuilder(final Readable input) throws IOException {
1504       final StringBuilder text = new StringBuilder();
1505       final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
1506       while (true) {
1507         final int n = input.read(buffer);
1508         if (n == -1) {
1509           break;
1510         }
1511         buffer.flip();
1512         text.append(buffer, 0, n);
1513       }
1514       return text;
1515     }
1516 
1517     static final class UnknownField {
1518       static enum Type {
1519         FIELD, EXTENSION;
1520       }
1521 
1522       final String message;
1523       final Type type;
1524 
UnknownField(String message, Type type)1525       UnknownField(String message, Type type) {
1526         this.message = message;
1527         this.type = type;
1528       }
1529     }
1530 
1531     // Check both unknown fields and unknown extensions and log warning messages
1532     // or throw exceptions according to the flag.
checkUnknownFields(final List<UnknownField> unknownFields)1533     private void checkUnknownFields(final List<UnknownField> unknownFields) throws ParseException {
1534       if (unknownFields.isEmpty()) {
1535         return;
1536       }
1537 
1538       StringBuilder msg = new StringBuilder("Input contains unknown fields and/or extensions:");
1539       for (UnknownField field : unknownFields) {
1540         msg.append('\n').append(field.message);
1541       }
1542 
1543       if (allowUnknownFields) {
1544         logger.warning(msg.toString());
1545         return;
1546       }
1547 
1548       int firstErrorIndex = 0;
1549       if (allowUnknownExtensions) {
1550         boolean allUnknownExtensions = true;
1551         for (UnknownField field : unknownFields) {
1552           if (field.type == UnknownField.Type.FIELD) {
1553             allUnknownExtensions = false;
1554             break;
1555           }
1556           ++firstErrorIndex;
1557         }
1558         if (allUnknownExtensions) {
1559           logger.warning(msg.toString());
1560           return;
1561         }
1562       }
1563 
1564       String[] lineColumn = unknownFields.get(firstErrorIndex).message.split(":");
1565       throw new ParseException(
1566           Integer.parseInt(lineColumn[0]), Integer.parseInt(lineColumn[1]), msg.toString());
1567     }
1568 
1569     /**
1570      * Parse a text-format message from {@code input} and merge the contents into {@code builder}.
1571      * Extensions will be recognized if they are registered in {@code extensionRegistry}.
1572      */
merge( final CharSequence input, final ExtensionRegistry extensionRegistry, final Message.Builder builder)1573     public void merge(
1574         final CharSequence input,
1575         final ExtensionRegistry extensionRegistry,
1576         final Message.Builder builder)
1577         throws ParseException {
1578       final Tokenizer tokenizer = new Tokenizer(input);
1579       MessageReflection.BuilderAdapter target = new MessageReflection.BuilderAdapter(builder);
1580 
1581       List<UnknownField> unknownFields = new ArrayList<UnknownField>();
1582 
1583       while (!tokenizer.atEnd()) {
1584         mergeField(tokenizer, extensionRegistry, target, unknownFields);
1585       }
1586 
1587       checkUnknownFields(unknownFields);
1588     }
1589 
1590 
1591     /** Parse a single field from {@code tokenizer} and merge it into {@code builder}. */
mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, List<UnknownField> unknownFields)1592     private void mergeField(
1593         final Tokenizer tokenizer,
1594         final ExtensionRegistry extensionRegistry,
1595         final MessageReflection.MergeTarget target,
1596         List<UnknownField> unknownFields)
1597         throws ParseException {
1598       mergeField(
1599           tokenizer,
1600           extensionRegistry,
1601           target,
1602           parseInfoTreeBuilder,
1603           unknownFields);
1604     }
1605 
1606     /** Parse a single field from {@code tokenizer} and merge it into {@code target}. */
mergeField( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1607     private void mergeField(
1608         final Tokenizer tokenizer,
1609         final ExtensionRegistry extensionRegistry,
1610         final MessageReflection.MergeTarget target,
1611         TextFormatParseInfoTree.Builder parseTreeBuilder,
1612         List<UnknownField> unknownFields)
1613         throws ParseException {
1614       FieldDescriptor field = null;
1615       int startLine = tokenizer.getLine();
1616       int startColumn = tokenizer.getColumn();
1617       final Descriptor type = target.getDescriptorForType();
1618       ExtensionRegistry.ExtensionInfo extension = null;
1619 
1620       if (tokenizer.tryConsume("[")) {
1621         // An extension.
1622         final StringBuilder name = new StringBuilder(tokenizer.consumeIdentifier());
1623         while (tokenizer.tryConsume(".")) {
1624           name.append('.');
1625           name.append(tokenizer.consumeIdentifier());
1626         }
1627 
1628         extension = target.findExtensionByName(extensionRegistry, name.toString());
1629 
1630         if (extension == null) {
1631           String message =
1632               (tokenizer.getPreviousLine() + 1)
1633                   + ":"
1634                   + (tokenizer.getPreviousColumn() + 1)
1635                   + ":\t"
1636                   + type.getFullName()
1637                   + ".["
1638                   + name
1639                   + "]";
1640           unknownFields.add(new UnknownField(message, UnknownField.Type.EXTENSION));
1641         } else {
1642           if (extension.descriptor.getContainingType() != type) {
1643             throw tokenizer.parseExceptionPreviousToken(
1644                 "Extension \""
1645                     + name
1646                     + "\" does not extend message type \""
1647                     + type.getFullName()
1648                     + "\".");
1649           }
1650           field = extension.descriptor;
1651         }
1652 
1653         tokenizer.consume("]");
1654       } else {
1655         final String name = tokenizer.consumeIdentifier();
1656         field = type.findFieldByName(name);
1657 
1658         // Group names are expected to be capitalized as they appear in the
1659         // .proto file, which actually matches their type names, not their field
1660         // names.
1661         if (field == null) {
1662           // Explicitly specify US locale so that this code does not break when
1663           // executing in Turkey.
1664           final String lowerName = name.toLowerCase(Locale.US);
1665           field = type.findFieldByName(lowerName);
1666           // If the case-insensitive match worked but the field is NOT a group,
1667           if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1668             field = null;
1669           }
1670         }
1671         // Again, special-case group names as described above.
1672         if (field != null
1673             && field.getType() == FieldDescriptor.Type.GROUP
1674             && !field.getMessageType().getName().equals(name)) {
1675           field = null;
1676         }
1677 
1678         if (field == null) {
1679           String message = (tokenizer.getPreviousLine() + 1)
1680                            + ":"
1681                            + (tokenizer.getPreviousColumn() + 1)
1682                            + ":\t"
1683                            + type.getFullName()
1684                            + "."
1685                            + name;
1686           unknownFields.add(new UnknownField(message, UnknownField.Type.FIELD));
1687         }
1688       }
1689 
1690       // Skips unknown fields.
1691       if (field == null) {
1692         // Try to guess the type of this field.
1693         // If this field is not a message, there should be a ":" between the
1694         // field name and the field value and also the field value should not
1695         // start with "{" or "<" which indicates the beginning of a message body.
1696         // If there is no ":" or there is a "{" or "<" after ":", this field has
1697         // to be a message or the input is ill-formed.
1698         if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
1699           skipFieldValue(tokenizer);
1700         } else {
1701           skipFieldMessage(tokenizer);
1702         }
1703         return;
1704       }
1705 
1706       // Handle potential ':'.
1707       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1708         tokenizer.tryConsume(":"); // optional
1709         if (parseTreeBuilder != null) {
1710           TextFormatParseInfoTree.Builder childParseTreeBuilder =
1711               parseTreeBuilder.getBuilderForSubMessageField(field);
1712           consumeFieldValues(
1713               tokenizer,
1714               extensionRegistry,
1715               target,
1716               field,
1717               extension,
1718               childParseTreeBuilder,
1719               unknownFields);
1720         } else {
1721           consumeFieldValues(
1722               tokenizer,
1723               extensionRegistry,
1724               target,
1725               field,
1726               extension,
1727               parseTreeBuilder,
1728               unknownFields);
1729         }
1730       } else {
1731         tokenizer.consume(":"); // required
1732         consumeFieldValues(
1733             tokenizer,
1734             extensionRegistry,
1735             target,
1736             field,
1737             extension,
1738             parseTreeBuilder,
1739             unknownFields);
1740       }
1741 
1742       if (parseTreeBuilder != null) {
1743         parseTreeBuilder.setLocation(field, TextFormatParseLocation.create(startLine, startColumn));
1744       }
1745 
1746       // For historical reasons, fields may optionally be separated by commas or
1747       // semicolons.
1748       if (!tokenizer.tryConsume(";")) {
1749         tokenizer.tryConsume(",");
1750       }
1751     }
1752 
1753     /**
1754      * Parse a one or more field values from {@code tokenizer} and merge it into {@code builder}.
1755      */
consumeFieldValues( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1756     private void consumeFieldValues(
1757         final Tokenizer tokenizer,
1758         final ExtensionRegistry extensionRegistry,
1759         final MessageReflection.MergeTarget target,
1760         final FieldDescriptor field,
1761         final ExtensionRegistry.ExtensionInfo extension,
1762         final TextFormatParseInfoTree.Builder parseTreeBuilder,
1763         List<UnknownField> unknownFields)
1764         throws ParseException {
1765       // Support specifying repeated field values as a comma-separated list.
1766       // Ex."foo: [1, 2, 3]"
1767       if (field.isRepeated() && tokenizer.tryConsume("[")) {
1768         if (!tokenizer.tryConsume("]")) { // Allow "foo: []" to be treated as empty.
1769           while (true) {
1770             consumeFieldValue(
1771                 tokenizer,
1772                 extensionRegistry,
1773                 target,
1774                 field,
1775                 extension,
1776                 parseTreeBuilder,
1777                 unknownFields);
1778             if (tokenizer.tryConsume("]")) {
1779               // End of list.
1780               break;
1781             }
1782             tokenizer.consume(",");
1783           }
1784         }
1785       } else {
1786         consumeFieldValue(
1787             tokenizer,
1788             extensionRegistry,
1789             target,
1790             field,
1791             extension,
1792             parseTreeBuilder,
1793             unknownFields);
1794       }
1795     }
1796 
1797     /** Parse a single field value from {@code tokenizer} and merge it into {@code builder}. */
consumeFieldValue( final Tokenizer tokenizer, final ExtensionRegistry extensionRegistry, final MessageReflection.MergeTarget target, final FieldDescriptor field, final ExtensionRegistry.ExtensionInfo extension, final TextFormatParseInfoTree.Builder parseTreeBuilder, List<UnknownField> unknownFields)1798     private void consumeFieldValue(
1799         final Tokenizer tokenizer,
1800         final ExtensionRegistry extensionRegistry,
1801         final MessageReflection.MergeTarget target,
1802         final FieldDescriptor field,
1803         final ExtensionRegistry.ExtensionInfo extension,
1804         final TextFormatParseInfoTree.Builder parseTreeBuilder,
1805         List<UnknownField> unknownFields)
1806         throws ParseException {
1807       if (singularOverwritePolicy == SingularOverwritePolicy.FORBID_SINGULAR_OVERWRITES
1808           && !field.isRepeated()) {
1809         if (target.hasField(field)) {
1810           throw tokenizer.parseExceptionPreviousToken(
1811               "Non-repeated field \"" + field.getFullName() + "\" cannot be overwritten.");
1812         } else if (field.getContainingOneof() != null
1813             && target.hasOneof(field.getContainingOneof())) {
1814           Descriptors.OneofDescriptor oneof = field.getContainingOneof();
1815           throw tokenizer.parseExceptionPreviousToken(
1816               "Field \""
1817                   + field.getFullName()
1818                   + "\" is specified along with field \""
1819                   + target.getOneofFieldDescriptor(oneof).getFullName()
1820                   + "\", another member of oneof \""
1821                   + oneof.getName()
1822                   + "\".");
1823         }
1824       }
1825 
1826       Object value = null;
1827 
1828       if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1829         final String endToken;
1830         if (tokenizer.tryConsume("<")) {
1831           endToken = ">";
1832         } else {
1833           tokenizer.consume("{");
1834           endToken = "}";
1835         }
1836 
1837           Message defaultInstance = (extension == null) ? null : extension.defaultInstance;
1838           MessageReflection.MergeTarget subField =
1839               target.newMergeTargetForField(field, defaultInstance);
1840 
1841           while (!tokenizer.tryConsume(endToken)) {
1842             if (tokenizer.atEnd()) {
1843               throw tokenizer.parseException("Expected \"" + endToken + "\".");
1844             }
1845             mergeField(tokenizer, extensionRegistry, subField, parseTreeBuilder, unknownFields);
1846           }
1847 
1848           value = subField.finish();
1849       } else {
1850         switch (field.getType()) {
1851           case INT32:
1852           case SINT32:
1853           case SFIXED32:
1854             value = tokenizer.consumeInt32();
1855             break;
1856 
1857           case INT64:
1858           case SINT64:
1859           case SFIXED64:
1860             value = tokenizer.consumeInt64();
1861             break;
1862 
1863           case UINT32:
1864           case FIXED32:
1865             value = tokenizer.consumeUInt32();
1866             break;
1867 
1868           case UINT64:
1869           case FIXED64:
1870             value = tokenizer.consumeUInt64();
1871             break;
1872 
1873           case FLOAT:
1874             value = tokenizer.consumeFloat();
1875             break;
1876 
1877           case DOUBLE:
1878             value = tokenizer.consumeDouble();
1879             break;
1880 
1881           case BOOL:
1882             value = tokenizer.consumeBoolean();
1883             break;
1884 
1885           case STRING:
1886             value = tokenizer.consumeString();
1887             break;
1888 
1889           case BYTES:
1890             value = tokenizer.consumeByteString();
1891             break;
1892 
1893           case ENUM:
1894             final EnumDescriptor enumType = field.getEnumType();
1895 
1896             if (tokenizer.lookingAtInteger()) {
1897               final int number = tokenizer.consumeInt32();
1898               value = enumType.findValueByNumber(number);
1899               if (value == null) {
1900                 String unknownValueMsg =
1901                     "Enum type \""
1902                         + enumType.getFullName()
1903                         + "\" has no value with number "
1904                         + number
1905                         + '.';
1906                 if (allowUnknownEnumValues) {
1907                   logger.warning(unknownValueMsg);
1908                   return;
1909                 } else {
1910                   throw tokenizer.parseExceptionPreviousToken(
1911                       "Enum type \""
1912                           + enumType.getFullName()
1913                           + "\" has no value with number "
1914                           + number
1915                           + '.');
1916                 }
1917               }
1918             } else {
1919               final String id = tokenizer.consumeIdentifier();
1920               value = enumType.findValueByName(id);
1921               if (value == null) {
1922                 String unknownValueMsg =
1923                     "Enum type \""
1924                         + enumType.getFullName()
1925                         + "\" has no value named \""
1926                         + id
1927                         + "\".";
1928                 if (allowUnknownEnumValues) {
1929                   logger.warning(unknownValueMsg);
1930                   return;
1931                 } else {
1932                   throw tokenizer.parseExceptionPreviousToken(unknownValueMsg);
1933                 }
1934               }
1935             }
1936 
1937             break;
1938 
1939           case MESSAGE:
1940           case GROUP:
1941             throw new RuntimeException("Can't get here.");
1942         }
1943       }
1944 
1945       if (field.isRepeated()) {
1946         // TODO(b/29122459): If field.isMapField() and FORBID_SINGULAR_OVERWRITES mode,
1947         //     check for duplicate map keys here.
1948         target.addRepeatedField(field, value);
1949       } else {
1950         target.setField(field, value);
1951       }
1952     }
1953 
1954 
1955     /** Skips the next field including the field's name and value. */
skipField(Tokenizer tokenizer)1956     private void skipField(Tokenizer tokenizer) throws ParseException {
1957       if (tokenizer.tryConsume("[")) {
1958         // Extension name.
1959         do {
1960           tokenizer.consumeIdentifier();
1961         } while (tokenizer.tryConsume("."));
1962         tokenizer.consume("]");
1963       } else {
1964         tokenizer.consumeIdentifier();
1965       }
1966 
1967       // Try to guess the type of this field.
1968       // If this field is not a message, there should be a ":" between the
1969       // field name and the field value and also the field value should not
1970       // start with "{" or "<" which indicates the beginning of a message body.
1971       // If there is no ":" or there is a "{" or "<" after ":", this field has
1972       // to be a message or the input is ill-formed.
1973       if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) {
1974         skipFieldValue(tokenizer);
1975       } else {
1976         skipFieldMessage(tokenizer);
1977       }
1978       // For historical reasons, fields may optionally be separated by commas or
1979       // semicolons.
1980       if (!tokenizer.tryConsume(";")) {
1981         tokenizer.tryConsume(",");
1982       }
1983     }
1984 
1985     /**
1986      * Skips the whole body of a message including the beginning delimiter and the ending delimiter.
1987      */
skipFieldMessage(Tokenizer tokenizer)1988     private void skipFieldMessage(Tokenizer tokenizer) throws ParseException {
1989       final String delimiter;
1990       if (tokenizer.tryConsume("<")) {
1991         delimiter = ">";
1992       } else {
1993         tokenizer.consume("{");
1994         delimiter = "}";
1995       }
1996       while (!tokenizer.lookingAt(">") && !tokenizer.lookingAt("}")) {
1997         skipField(tokenizer);
1998       }
1999       tokenizer.consume(delimiter);
2000     }
2001 
2002     /** Skips a field value. */
skipFieldValue(Tokenizer tokenizer)2003     private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
2004       if (tokenizer.tryConsumeString()) {
2005         while (tokenizer.tryConsumeString()) {}
2006         return;
2007       }
2008       if (!tokenizer.tryConsumeIdentifier() // includes enum & boolean
2009           && !tokenizer.tryConsumeInt64() // includes int32
2010           && !tokenizer.tryConsumeUInt64() // includes uint32
2011           && !tokenizer.tryConsumeDouble()
2012           && !tokenizer.tryConsumeFloat()) {
2013         throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken);
2014       }
2015     }
2016   }
2017 
2018   // =================================================================
2019   // Utility functions
2020   //
2021   // Some of these methods are package-private because Descriptors.java uses
2022   // them.
2023 
2024   /**
2025    * Escapes bytes in the format used in protocol buffer text format, which is the same as the
2026    * format used for C string literals. All bytes that are not printable 7-bit ASCII characters are
2027    * escaped, as well as backslash, single-quote, and double-quote characters. Characters for which
2028    * no defined short-hand escape sequence is defined will be escaped using 3-digit octal sequences.
2029    */
escapeBytes(ByteString input)2030   public static String escapeBytes(ByteString input) {
2031     return TextFormatEscaper.escapeBytes(input);
2032   }
2033 
2034   /** Like {@link #escapeBytes(ByteString)}, but used for byte array. */
escapeBytes(byte[] input)2035   public static String escapeBytes(byte[] input) {
2036     return TextFormatEscaper.escapeBytes(input);
2037   }
2038 
2039   /**
2040    * Un-escape a byte sequence as escaped using {@link #escapeBytes(ByteString)}. Two-digit hex
2041    * escapes (starting with "\x") are also recognized.
2042    */
unescapeBytes(final CharSequence charString)2043   public static ByteString unescapeBytes(final CharSequence charString)
2044       throws InvalidEscapeSequenceException {
2045     // First convert the Java character sequence to UTF-8 bytes.
2046     ByteString input = ByteString.copyFromUtf8(charString.toString());
2047     // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
2048     // escapes can all be expressed with ASCII characters, so it is safe to
2049     // operate on bytes here.
2050     //
2051     // Unescaping the input byte array will result in a byte sequence that's no
2052     // longer than the input.  That's because each escape sequence is between
2053     // two and four bytes long and stands for a single byte.
2054     final byte[] result = new byte[input.size()];
2055     int pos = 0;
2056     for (int i = 0; i < input.size(); i++) {
2057       byte c = input.byteAt(i);
2058       if (c == '\\') {
2059         if (i + 1 < input.size()) {
2060           ++i;
2061           c = input.byteAt(i);
2062           if (isOctal(c)) {
2063             // Octal escape.
2064             int code = digitValue(c);
2065             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
2066               ++i;
2067               code = code * 8 + digitValue(input.byteAt(i));
2068             }
2069             if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
2070               ++i;
2071               code = code * 8 + digitValue(input.byteAt(i));
2072             }
2073             // TODO: Check that 0 <= code && code <= 0xFF.
2074             result[pos++] = (byte) code;
2075           } else {
2076             switch (c) {
2077               case 'a':
2078                 result[pos++] = 0x07;
2079                 break;
2080               case 'b':
2081                 result[pos++] = '\b';
2082                 break;
2083               case 'f':
2084                 result[pos++] = '\f';
2085                 break;
2086               case 'n':
2087                 result[pos++] = '\n';
2088                 break;
2089               case 'r':
2090                 result[pos++] = '\r';
2091                 break;
2092               case 't':
2093                 result[pos++] = '\t';
2094                 break;
2095               case 'v':
2096                 result[pos++] = 0x0b;
2097                 break;
2098               case '\\':
2099                 result[pos++] = '\\';
2100                 break;
2101               case '\'':
2102                 result[pos++] = '\'';
2103                 break;
2104               case '"':
2105                 result[pos++] = '\"';
2106                 break;
2107 
2108               case 'x':
2109                 // hex escape
2110                 int code = 0;
2111                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
2112                   ++i;
2113                   code = digitValue(input.byteAt(i));
2114                 } else {
2115                   throw new InvalidEscapeSequenceException(
2116                       "Invalid escape sequence: '\\x' with no digits");
2117                 }
2118                 if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
2119                   ++i;
2120                   code = code * 16 + digitValue(input.byteAt(i));
2121                 }
2122                 result[pos++] = (byte) code;
2123                 break;
2124 
2125               default:
2126                 throw new InvalidEscapeSequenceException(
2127                     "Invalid escape sequence: '\\" + (char) c + '\'');
2128             }
2129           }
2130         } else {
2131           throw new InvalidEscapeSequenceException(
2132               "Invalid escape sequence: '\\' at end of string.");
2133         }
2134       } else {
2135         result[pos++] = c;
2136       }
2137     }
2138 
2139     return result.length == pos
2140         ? ByteString.wrap(result) // This reference has not been out of our control.
2141         : ByteString.copyFrom(result, 0, pos);
2142   }
2143 
2144   /**
2145    * Thrown by {@link TextFormat#unescapeBytes} and {@link TextFormat#unescapeText} when an invalid
2146    * escape sequence is seen.
2147    */
2148   public static class InvalidEscapeSequenceException extends IOException {
2149     private static final long serialVersionUID = -8164033650142593304L;
2150 
InvalidEscapeSequenceException(final String description)2151     InvalidEscapeSequenceException(final String description) {
2152       super(description);
2153     }
2154   }
2155 
2156   /**
2157    * Like {@link #escapeBytes(ByteString)}, but escapes a text string. Non-ASCII characters are
2158    * first encoded as UTF-8, then each byte is escaped individually as a 3-digit octal escape. Yes,
2159    * it's weird.
2160    */
escapeText(final String input)2161   static String escapeText(final String input) {
2162     return escapeBytes(ByteString.copyFromUtf8(input));
2163   }
2164 
2165   /** Escape double quotes and backslashes in a String for emittingUnicode output of a message. */
escapeDoubleQuotesAndBackslashes(final String input)2166   public static String escapeDoubleQuotesAndBackslashes(final String input) {
2167     return TextFormatEscaper.escapeDoubleQuotesAndBackslashes(input);
2168   }
2169 
2170   /**
2171    * Un-escape a text string as escaped using {@link #escapeText(String)}. Two-digit hex escapes
2172    * (starting with "\x") are also recognized.
2173    */
unescapeText(final String input)2174   static String unescapeText(final String input) throws InvalidEscapeSequenceException {
2175     return unescapeBytes(input).toStringUtf8();
2176   }
2177 
2178   /** Is this an octal digit? */
isOctal(final byte c)2179   private static boolean isOctal(final byte c) {
2180     return '0' <= c && c <= '7';
2181   }
2182 
2183   /** Is this a hex digit? */
isHex(final byte c)2184   private static boolean isHex(final byte c) {
2185     return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
2186   }
2187 
2188   /**
2189    * Interpret a character as a digit (in any base up to 36) and return the numeric value. This is
2190    * like {@code Character.digit()} but we don't accept non-ASCII digits.
2191    */
digitValue(final byte c)2192   private static int digitValue(final byte c) {
2193     if ('0' <= c && c <= '9') {
2194       return c - '0';
2195     } else if ('a' <= c && c <= 'z') {
2196       return c - 'a' + 10;
2197     } else {
2198       return c - 'A' + 10;
2199     }
2200   }
2201 
2202   /**
2203    * Parse a 32-bit signed integer from the text. Unlike the Java standard {@code
2204    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2205    * and octal numbers, respectively.
2206    */
parseInt32(final String text)2207   static int parseInt32(final String text) throws NumberFormatException {
2208     return (int) parseInteger(text, true, false);
2209   }
2210 
2211   /**
2212    * Parse a 32-bit unsigned integer from the text. Unlike the Java standard {@code
2213    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2214    * and octal numbers, respectively. The result is coerced to a (signed) {@code int} when returned
2215    * since Java has no unsigned integer type.
2216    */
parseUInt32(final String text)2217   static int parseUInt32(final String text) throws NumberFormatException {
2218     return (int) parseInteger(text, false, false);
2219   }
2220 
2221   /**
2222    * Parse a 64-bit signed integer from the text. Unlike the Java standard {@code
2223    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2224    * and octal numbers, respectively.
2225    */
parseInt64(final String text)2226   static long parseInt64(final String text) throws NumberFormatException {
2227     return parseInteger(text, true, true);
2228   }
2229 
2230   /**
2231    * Parse a 64-bit unsigned integer from the text. Unlike the Java standard {@code
2232    * Integer.parseInt()}, this function recognizes the prefixes "0x" and "0" to signify hexadecimal
2233    * and octal numbers, respectively. The result is coerced to a (signed) {@code long} when returned
2234    * since Java has no unsigned long type.
2235    */
parseUInt64(final String text)2236   static long parseUInt64(final String text) throws NumberFormatException {
2237     return parseInteger(text, false, true);
2238   }
2239 
parseInteger(final String text, final boolean isSigned, final boolean isLong)2240   private static long parseInteger(final String text, final boolean isSigned, final boolean isLong)
2241       throws NumberFormatException {
2242     int pos = 0;
2243 
2244     boolean negative = false;
2245     if (text.startsWith("-", pos)) {
2246       if (!isSigned) {
2247         throw new NumberFormatException("Number must be positive: " + text);
2248       }
2249       ++pos;
2250       negative = true;
2251     }
2252 
2253     int radix = 10;
2254     if (text.startsWith("0x", pos)) {
2255       pos += 2;
2256       radix = 16;
2257     } else if (text.startsWith("0", pos)) {
2258       radix = 8;
2259     }
2260 
2261     final String numberText = text.substring(pos);
2262 
2263     long result = 0;
2264     if (numberText.length() < 16) {
2265       // Can safely assume no overflow.
2266       result = Long.parseLong(numberText, radix);
2267       if (negative) {
2268         result = -result;
2269       }
2270 
2271       // Check bounds.
2272       // No need to check for 64-bit numbers since they'd have to be 16 chars
2273       // or longer to overflow.
2274       if (!isLong) {
2275         if (isSigned) {
2276           if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
2277             throw new NumberFormatException(
2278                 "Number out of range for 32-bit signed integer: " + text);
2279           }
2280         } else {
2281           if (result >= (1L << 32) || result < 0) {
2282             throw new NumberFormatException(
2283                 "Number out of range for 32-bit unsigned integer: " + text);
2284           }
2285         }
2286       }
2287     } else {
2288       BigInteger bigValue = new BigInteger(numberText, radix);
2289       if (negative) {
2290         bigValue = bigValue.negate();
2291       }
2292 
2293       // Check bounds.
2294       if (!isLong) {
2295         if (isSigned) {
2296           if (bigValue.bitLength() > 31) {
2297             throw new NumberFormatException(
2298                 "Number out of range for 32-bit signed integer: " + text);
2299           }
2300         } else {
2301           if (bigValue.bitLength() > 32) {
2302             throw new NumberFormatException(
2303                 "Number out of range for 32-bit unsigned integer: " + text);
2304           }
2305         }
2306       } else {
2307         if (isSigned) {
2308           if (bigValue.bitLength() > 63) {
2309             throw new NumberFormatException(
2310                 "Number out of range for 64-bit signed integer: " + text);
2311           }
2312         } else {
2313           if (bigValue.bitLength() > 64) {
2314             throw new NumberFormatException(
2315                 "Number out of range for 64-bit unsigned integer: " + text);
2316           }
2317         }
2318       }
2319 
2320       result = bigValue.longValue();
2321     }
2322 
2323     return result;
2324   }
2325 }
2326