• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <float.h>
36 #include <math.h>
37 #include <stdio.h>
38 #include <stack>
39 #include <limits>
40 
41 #include <google/protobuf/text_format.h>
42 
43 #include <google/protobuf/descriptor.h>
44 #include <google/protobuf/io/coded_stream.h>
45 #include <google/protobuf/io/zero_copy_stream.h>
46 #include <google/protobuf/io/zero_copy_stream_impl.h>
47 #include <google/protobuf/unknown_field_set.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/tokenizer.h>
50 #include <google/protobuf/stubs/strutil.h>
51 
52 namespace google {
53 namespace protobuf {
54 
DebugString() const55 string Message::DebugString() const {
56   string debug_string;
57 
58   TextFormat::PrintToString(*this, &debug_string);
59 
60   return debug_string;
61 }
62 
ShortDebugString() const63 string Message::ShortDebugString() const {
64   string debug_string;
65 
66   TextFormat::Printer printer;
67   printer.SetSingleLineMode(true);
68 
69   printer.PrintToString(*this, &debug_string);
70   // Single line mode currently might have an extra space at the end.
71   if (debug_string.size() > 0 &&
72       debug_string[debug_string.size() - 1] == ' ') {
73     debug_string.resize(debug_string.size() - 1);
74   }
75 
76   return debug_string;
77 }
78 
Utf8DebugString() const79 string Message::Utf8DebugString() const {
80   string debug_string;
81 
82   TextFormat::Printer printer;
83   printer.SetUseUtf8StringEscaping(true);
84 
85   printer.PrintToString(*this, &debug_string);
86 
87   return debug_string;
88 }
89 
PrintDebugString() const90 void Message::PrintDebugString() const {
91   printf("%s", DebugString().c_str());
92 }
93 
94 
95 // ===========================================================================
96 // Internal class for parsing an ASCII representation of a Protocol Message.
97 // This class makes use of the Protocol Message compiler's tokenizer found
98 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
99 // method is *not* thread-safe and should only be used in a single thread at
100 // a time.
101 
102 // Makes code slightly more readable.  The meaning of "DO(foo)" is
103 // "Execute foo and fail if it fails.", where failure is indicated by
104 // returning false. Borrowed from parser.cc (Thanks Kenton!).
105 #define DO(STATEMENT) if (STATEMENT) {} else return false
106 
107 class TextFormat::Parser::ParserImpl {
108  public:
109 
110   // Determines if repeated values for a non-repeated field are
111   // permitted, e.g., the string "foo: 1 foo: 2" for a
112   // required/optional field named "foo".
113   enum SingularOverwritePolicy {
114     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
115     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
116   };
117 
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,SingularOverwritePolicy singular_overwrite_policy)118   ParserImpl(const Descriptor* root_message_type,
119              io::ZeroCopyInputStream* input_stream,
120              io::ErrorCollector* error_collector,
121              SingularOverwritePolicy singular_overwrite_policy)
122     : error_collector_(error_collector),
123       tokenizer_error_collector_(this),
124       tokenizer_(input_stream, &tokenizer_error_collector_),
125       root_message_type_(root_message_type),
126       singular_overwrite_policy_(singular_overwrite_policy),
127       had_errors_(false) {
128     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
129     // for floats.
130     tokenizer_.set_allow_f_after_float(true);
131 
132     // '#' starts a comment.
133     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
134 
135     // Consume the starting token.
136     tokenizer_.Next();
137   }
~ParserImpl()138   ~ParserImpl() { }
139 
140   // Parses the ASCII representation specified in input and saves the
141   // information into the output pointer (a Message). Returns
142   // false if an error occurs (an error will also be logged to
143   // GOOGLE_LOG(ERROR)).
Parse(Message * output)144   bool Parse(Message* output) {
145     // Consume fields until we cannot do so anymore.
146     while(true) {
147       if (LookingAtType(io::Tokenizer::TYPE_END)) {
148         return !had_errors_;
149       }
150 
151       DO(ConsumeField(output));
152     }
153   }
154 
ParseField(const FieldDescriptor * field,Message * output)155   bool ParseField(const FieldDescriptor* field, Message* output) {
156     bool suc;
157     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
158       suc = ConsumeFieldMessage(output, output->GetReflection(), field);
159     } else {
160       suc = ConsumeFieldValue(output, output->GetReflection(), field);
161     }
162     return suc && LookingAtType(io::Tokenizer::TYPE_END);
163   }
164 
ReportError(int line,int col,const string & message)165   void ReportError(int line, int col, const string& message) {
166     had_errors_ = true;
167     if (error_collector_ == NULL) {
168       if (line >= 0) {
169         GOOGLE_LOG(ERROR) << "Error parsing text-format "
170                    << root_message_type_->full_name()
171                    << ": " << (line + 1) << ":"
172                    << (col + 1) << ": " << message;
173       } else {
174         GOOGLE_LOG(ERROR) << "Error parsing text-format "
175                    << root_message_type_->full_name()
176                    << ": " << message;
177       }
178     } else {
179       error_collector_->AddError(line, col, message);
180     }
181   }
182 
ReportWarning(int line,int col,const string & message)183   void ReportWarning(int line, int col, const string& message) {
184     if (error_collector_ == NULL) {
185       if (line >= 0) {
186         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
187                      << root_message_type_->full_name()
188                      << ": " << (line + 1) << ":"
189                      << (col + 1) << ": " << message;
190       } else {
191         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
192                      << root_message_type_->full_name()
193                      << ": " << message;
194       }
195     } else {
196       error_collector_->AddWarning(line, col, message);
197     }
198   }
199 
200  private:
201   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
202 
203   // Reports an error with the given message with information indicating
204   // the position (as derived from the current token).
ReportError(const string & message)205   void ReportError(const string& message) {
206     ReportError(tokenizer_.current().line, tokenizer_.current().column,
207                 message);
208   }
209 
210   // Reports a warning with the given message with information indicating
211   // the position (as derived from the current token).
ReportWarning(const string & message)212   void ReportWarning(const string& message) {
213     ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
214                   message);
215   }
216 
217   // Consumes the specified message with the given starting delimeter.
218   // This method checks to see that the end delimeter at the conclusion of
219   // the consumption matches the starting delimeter passed in here.
ConsumeMessage(Message * message,const string delimeter)220   bool ConsumeMessage(Message* message, const string delimeter) {
221     while (!LookingAt(">") &&  !LookingAt("}")) {
222       DO(ConsumeField(message));
223     }
224 
225     // Confirm that we have a valid ending delimeter.
226     DO(Consume(delimeter));
227 
228     return true;
229   }
230 
231   // Consumes the current field (as returned by the tokenizer) on the
232   // passed in message.
ConsumeField(Message * message)233   bool ConsumeField(Message* message) {
234     const Reflection* reflection = message->GetReflection();
235     const Descriptor* descriptor = message->GetDescriptor();
236 
237     string field_name;
238 
239     const FieldDescriptor* field = NULL;
240 
241     if (TryConsume("[")) {
242       // Extension.
243       DO(ConsumeIdentifier(&field_name));
244       while (TryConsume(".")) {
245         string part;
246         DO(ConsumeIdentifier(&part));
247         field_name += ".";
248         field_name += part;
249       }
250       DO(Consume("]"));
251 
252       field = reflection->FindKnownExtensionByName(field_name);
253 
254       if (field == NULL) {
255         ReportError("Extension \"" + field_name + "\" is not defined or "
256                     "is not an extension of \"" +
257                     descriptor->full_name() + "\".");
258         return false;
259       }
260     } else {
261       DO(ConsumeIdentifier(&field_name));
262 
263       field = descriptor->FindFieldByName(field_name);
264       // Group names are expected to be capitalized as they appear in the
265       // .proto file, which actually matches their type names, not their field
266       // names.
267       if (field == NULL) {
268         string lower_field_name = field_name;
269         LowerString(&lower_field_name);
270         field = descriptor->FindFieldByName(lower_field_name);
271         // If the case-insensitive match worked but the field is NOT a group,
272         if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
273           field = NULL;
274         }
275       }
276       // Again, special-case group names as described above.
277       if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
278           && field->message_type()->name() != field_name) {
279         field = NULL;
280       }
281 
282       if (field == NULL) {
283         ReportError("Message type \"" + descriptor->full_name() +
284                     "\" has no field named \"" + field_name + "\".");
285         return false;
286       }
287     }
288 
289     // Fail if the field is not repeated and it has already been specified.
290     if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
291         !field->is_repeated() && reflection->HasField(*message, field)) {
292       ReportError("Non-repeated field \"" + field_name +
293                   "\" is specified multiple times.");
294       return false;
295     }
296 
297     // Perform special handling for embedded message types.
298     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
299       // ':' is optional here.
300       TryConsume(":");
301       DO(ConsumeFieldMessage(message, reflection, field));
302     } else {
303       DO(Consume(":"));
304       DO(ConsumeFieldValue(message, reflection, field));
305     }
306 
307     if (field->options().deprecated()) {
308       ReportWarning("text format contains deprecated field \""
309                     + field_name + "\"");
310     }
311 
312     return true;
313   }
314 
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)315   bool ConsumeFieldMessage(Message* message,
316                            const Reflection* reflection,
317                            const FieldDescriptor* field) {
318     string delimeter;
319     if (TryConsume("<")) {
320       delimeter = ">";
321     } else {
322       DO(Consume("{"));
323       delimeter = "}";
324     }
325 
326     if (field->is_repeated()) {
327       DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
328     } else {
329       DO(ConsumeMessage(reflection->MutableMessage(message, field),
330                         delimeter));
331     }
332     return true;
333   }
334 
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)335   bool ConsumeFieldValue(Message* message,
336                          const Reflection* reflection,
337                          const FieldDescriptor* field) {
338 
339 // Define an easy to use macro for setting fields. This macro checks
340 // to see if the field is repeated (in which case we need to use the Add
341 // methods or not (in which case we need to use the Set methods).
342 #define SET_FIELD(CPPTYPE, VALUE)                                  \
343         if (field->is_repeated()) {                                \
344           reflection->Add##CPPTYPE(message, field, VALUE);         \
345         } else {                                                   \
346           reflection->Set##CPPTYPE(message, field, VALUE);         \
347         }                                                          \
348 
349     switch(field->cpp_type()) {
350       case FieldDescriptor::CPPTYPE_INT32: {
351         int64 value;
352         DO(ConsumeSignedInteger(&value, kint32max));
353         SET_FIELD(Int32, static_cast<int32>(value));
354         break;
355       }
356 
357       case FieldDescriptor::CPPTYPE_UINT32: {
358         uint64 value;
359         DO(ConsumeUnsignedInteger(&value, kuint32max));
360         SET_FIELD(UInt32, static_cast<uint32>(value));
361         break;
362       }
363 
364       case FieldDescriptor::CPPTYPE_INT64: {
365         int64 value;
366         DO(ConsumeSignedInteger(&value, kint64max));
367         SET_FIELD(Int64, value);
368         break;
369       }
370 
371       case FieldDescriptor::CPPTYPE_UINT64: {
372         uint64 value;
373         DO(ConsumeUnsignedInteger(&value, kuint64max));
374         SET_FIELD(UInt64, value);
375         break;
376       }
377 
378       case FieldDescriptor::CPPTYPE_FLOAT: {
379         double value;
380         DO(ConsumeDouble(&value));
381         SET_FIELD(Float, static_cast<float>(value));
382         break;
383       }
384 
385       case FieldDescriptor::CPPTYPE_DOUBLE: {
386         double value;
387         DO(ConsumeDouble(&value));
388         SET_FIELD(Double, value);
389         break;
390       }
391 
392       case FieldDescriptor::CPPTYPE_STRING: {
393         string value;
394         DO(ConsumeString(&value));
395         SET_FIELD(String, value);
396         break;
397       }
398 
399       case FieldDescriptor::CPPTYPE_BOOL: {
400         string value;
401         DO(ConsumeIdentifier(&value));
402 
403         if (value == "true") {
404           SET_FIELD(Bool, true);
405         } else if (value == "false") {
406           SET_FIELD(Bool, false);
407         } else {
408           ReportError("Invalid value for boolean field \"" + field->name()
409                       + "\". Value: \"" + value  + "\".");
410           return false;
411         }
412         break;
413       }
414 
415       case FieldDescriptor::CPPTYPE_ENUM: {
416         string value;
417         DO(ConsumeIdentifier(&value));
418 
419         // Find the enumeration value.
420         const EnumDescriptor* enum_type = field->enum_type();
421         const EnumValueDescriptor* enum_value
422             = enum_type->FindValueByName(value);
423 
424         if (enum_value == NULL) {
425           ReportError("Unknown enumeration value of \"" + value  + "\" for "
426                       "field \"" + field->name() + "\".");
427           return false;
428         }
429 
430         SET_FIELD(Enum, enum_value);
431         break;
432       }
433 
434       case FieldDescriptor::CPPTYPE_MESSAGE: {
435         // We should never get here. Put here instead of a default
436         // so that if new types are added, we get a nice compiler warning.
437         GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
438         break;
439       }
440     }
441 #undef SET_FIELD
442     return true;
443   }
444 
445   // Returns true if the current token's text is equal to that specified.
LookingAt(const string & text)446   bool LookingAt(const string& text) {
447     return tokenizer_.current().text == text;
448   }
449 
450   // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)451   bool LookingAtType(io::Tokenizer::TokenType token_type) {
452     return tokenizer_.current().type == token_type;
453   }
454 
455   // Consumes an identifier and saves its value in the identifier parameter.
456   // Returns false if the token is not of type IDENTFIER.
ConsumeIdentifier(string * identifier)457   bool ConsumeIdentifier(string* identifier) {
458     if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
459       ReportError("Expected identifier.");
460       return false;
461     }
462 
463     *identifier = tokenizer_.current().text;
464 
465     tokenizer_.Next();
466     return true;
467   }
468 
469   // Consumes a string and saves its value in the text parameter.
470   // Returns false if the token is not of type STRING.
ConsumeString(string * text)471   bool ConsumeString(string* text) {
472     if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
473       ReportError("Expected string.");
474       return false;
475     }
476 
477     text->clear();
478     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
479       io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
480 
481       tokenizer_.Next();
482     }
483 
484     return true;
485   }
486 
487   // Consumes a uint64 and saves its value in the value parameter.
488   // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64 * value,uint64 max_value)489   bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
490     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
491       ReportError("Expected integer.");
492       return false;
493     }
494 
495     if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
496                                      max_value, value)) {
497       ReportError("Integer out of range.");
498       return false;
499     }
500 
501     tokenizer_.Next();
502     return true;
503   }
504 
505   // Consumes an int64 and saves its value in the value parameter.
506   // Note that since the tokenizer does not support negative numbers,
507   // we actually may consume an additional token (for the minus sign) in this
508   // method. Returns false if the token is not an integer
509   // (signed or otherwise).
ConsumeSignedInteger(int64 * value,uint64 max_value)510   bool ConsumeSignedInteger(int64* value, uint64 max_value) {
511     bool negative = false;
512 
513     if (TryConsume("-")) {
514       negative = true;
515       // Two's complement always allows one more negative integer than
516       // positive.
517       ++max_value;
518     }
519 
520     uint64 unsigned_value;
521 
522     DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
523 
524     *value = static_cast<int64>(unsigned_value);
525 
526     if (negative) {
527       *value = -*value;
528     }
529 
530     return true;
531   }
532 
533   // Consumes a double and saves its value in the value parameter.
534   // Note that since the tokenizer does not support negative numbers,
535   // we actually may consume an additional token (for the minus sign) in this
536   // method. Returns false if the token is not a double
537   // (signed or otherwise).
ConsumeDouble(double * value)538   bool ConsumeDouble(double* value) {
539     bool negative = false;
540 
541     if (TryConsume("-")) {
542       negative = true;
543     }
544 
545     // A double can actually be an integer, according to the tokenizer.
546     // Therefore, we must check both cases here.
547     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
548       // We have found an integer value for the double.
549       uint64 integer_value;
550       DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
551 
552       *value = static_cast<double>(integer_value);
553     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
554       // We have found a float value for the double.
555       *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
556 
557       // Mark the current token as consumed.
558       tokenizer_.Next();
559     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
560       string text = tokenizer_.current().text;
561       LowerString(&text);
562       if (text == "inf" || text == "infinity") {
563         *value = std::numeric_limits<double>::infinity();
564         tokenizer_.Next();
565       } else if (text == "nan") {
566         *value = std::numeric_limits<double>::quiet_NaN();
567         tokenizer_.Next();
568       } else {
569         ReportError("Expected double.");
570         return false;
571       }
572     } else {
573       ReportError("Expected double.");
574       return false;
575     }
576 
577     if (negative) {
578       *value = -*value;
579     }
580 
581     return true;
582   }
583 
584   // Consumes a token and confirms that it matches that specified in the
585   // value parameter. Returns false if the token found does not match that
586   // which was specified.
Consume(const string & value)587   bool Consume(const string& value) {
588     const string& current_value = tokenizer_.current().text;
589 
590     if (current_value != value) {
591       ReportError("Expected \"" + value + "\", found \"" + current_value
592                   + "\".");
593       return false;
594     }
595 
596     tokenizer_.Next();
597 
598     return true;
599   }
600 
601   // Attempts to consume the supplied value. Returns false if a the
602   // token found does not match the value specified.
TryConsume(const string & value)603   bool TryConsume(const string& value) {
604     if (tokenizer_.current().text == value) {
605       tokenizer_.Next();
606       return true;
607     } else {
608       return false;
609     }
610   }
611 
612   // An internal instance of the Tokenizer's error collector, used to
613   // collect any base-level parse errors and feed them to the ParserImpl.
614   class ParserErrorCollector : public io::ErrorCollector {
615    public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)616     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
617         parser_(parser) { }
618 
~ParserErrorCollector()619     virtual ~ParserErrorCollector() { };
620 
AddError(int line,int column,const string & message)621     virtual void AddError(int line, int column, const string& message) {
622       parser_->ReportError(line, column, message);
623     }
624 
AddWarning(int line,int column,const string & message)625     virtual void AddWarning(int line, int column, const string& message) {
626       parser_->ReportWarning(line, column, message);
627     }
628 
629    private:
630     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
631     TextFormat::Parser::ParserImpl* parser_;
632   };
633 
634   io::ErrorCollector* error_collector_;
635   ParserErrorCollector tokenizer_error_collector_;
636   io::Tokenizer tokenizer_;
637   const Descriptor* root_message_type_;
638   SingularOverwritePolicy singular_overwrite_policy_;
639   bool had_errors_;
640 };
641 
642 #undef DO
643 
644 // ===========================================================================
645 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
646 // from the Printer found in //google/protobuf/io/printer.h
647 class TextFormat::Printer::TextGenerator {
648  public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)649   explicit TextGenerator(io::ZeroCopyOutputStream* output,
650                          int initial_indent_level)
651     : output_(output),
652       buffer_(NULL),
653       buffer_size_(0),
654       at_start_of_line_(true),
655       failed_(false),
656       indent_(""),
657       initial_indent_level_(initial_indent_level) {
658     indent_.resize(initial_indent_level_ * 2, ' ');
659   }
660 
~TextGenerator()661   ~TextGenerator() {
662     // Only BackUp() if we're sure we've successfully called Next() at least
663     // once.
664     if (buffer_size_ > 0) {
665       output_->BackUp(buffer_size_);
666     }
667   }
668 
669   // Indent text by two spaces.  After calling Indent(), two spaces will be
670   // inserted at the beginning of each line of text.  Indent() may be called
671   // multiple times to produce deeper indents.
Indent()672   void Indent() {
673     indent_ += "  ";
674   }
675 
676   // Reduces the current indent level by two spaces, or crashes if the indent
677   // level is zero.
Outdent()678   void Outdent() {
679     if (indent_.empty() ||
680         indent_.size() < initial_indent_level_ * 2) {
681       GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
682       return;
683     }
684 
685     indent_.resize(indent_.size() - 2);
686   }
687 
688   // Print text to the output stream.
Print(const string & str)689   void Print(const string& str) {
690     Print(str.data(), str.size());
691   }
692 
693   // Print text to the output stream.
Print(const char * text)694   void Print(const char* text) {
695     Print(text, strlen(text));
696   }
697 
698   // Print text to the output stream.
Print(const char * text,int size)699   void Print(const char* text, int size) {
700     int pos = 0;  // The number of bytes we've written so far.
701 
702     for (int i = 0; i < size; i++) {
703       if (text[i] == '\n') {
704         // Saw newline.  If there is more text, we may need to insert an indent
705         // here.  So, write what we have so far, including the '\n'.
706         Write(text + pos, i - pos + 1);
707         pos = i + 1;
708 
709         // Setting this true will cause the next Write() to insert an indent
710         // first.
711         at_start_of_line_ = true;
712       }
713     }
714 
715     // Write the rest.
716     Write(text + pos, size - pos);
717   }
718 
719   // True if any write to the underlying stream failed.  (We don't just
720   // crash in this case because this is an I/O failure, not a programming
721   // error.)
failed() const722   bool failed() const { return failed_; }
723 
724  private:
725   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
726 
Write(const char * data,int size)727   void Write(const char* data, int size) {
728     if (failed_) return;
729     if (size == 0) return;
730 
731     if (at_start_of_line_) {
732       // Insert an indent.
733       at_start_of_line_ = false;
734       Write(indent_.data(), indent_.size());
735       if (failed_) return;
736     }
737 
738     while (size > buffer_size_) {
739       // Data exceeds space in the buffer.  Copy what we can and request a
740       // new buffer.
741       memcpy(buffer_, data, buffer_size_);
742       data += buffer_size_;
743       size -= buffer_size_;
744       void* void_buffer;
745       failed_ = !output_->Next(&void_buffer, &buffer_size_);
746       if (failed_) return;
747       buffer_ = reinterpret_cast<char*>(void_buffer);
748     }
749 
750     // Buffer is big enough to receive the data; copy it.
751     memcpy(buffer_, data, size);
752     buffer_ += size;
753     buffer_size_ -= size;
754   }
755 
756   io::ZeroCopyOutputStream* const output_;
757   char* buffer_;
758   int buffer_size_;
759   bool at_start_of_line_;
760   bool failed_;
761 
762   string indent_;
763   int initial_indent_level_;
764 };
765 
766 // ===========================================================================
767 
Parser()768 TextFormat::Parser::Parser()
769   : error_collector_(NULL),
770     allow_partial_(false) {}
771 
~Parser()772 TextFormat::Parser::~Parser() {}
773 
Parse(io::ZeroCopyInputStream * input,Message * output)774 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
775                                Message* output) {
776   output->Clear();
777   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
778                     ParserImpl::FORBID_SINGULAR_OVERWRITES);
779   return MergeUsingImpl(input, output, &parser);
780 }
781 
ParseFromString(const string & input,Message * output)782 bool TextFormat::Parser::ParseFromString(const string& input,
783                                          Message* output) {
784   io::ArrayInputStream input_stream(input.data(), input.size());
785   return Parse(&input_stream, output);
786 }
787 
Merge(io::ZeroCopyInputStream * input,Message * output)788 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
789                                Message* output) {
790   ParserImpl parser(output->GetDescriptor(), input, error_collector_,
791                     ParserImpl::ALLOW_SINGULAR_OVERWRITES);
792   return MergeUsingImpl(input, output, &parser);
793 }
794 
MergeFromString(const string & input,Message * output)795 bool TextFormat::Parser::MergeFromString(const string& input,
796                                          Message* output) {
797   io::ArrayInputStream input_stream(input.data(), input.size());
798   return Merge(&input_stream, output);
799 }
800 
MergeUsingImpl(io::ZeroCopyInputStream * input,Message * output,ParserImpl * parser_impl)801 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
802                                         Message* output,
803                                         ParserImpl* parser_impl) {
804   if (!parser_impl->Parse(output)) return false;
805   if (!allow_partial_ && !output->IsInitialized()) {
806     vector<string> missing_fields;
807     output->FindInitializationErrors(&missing_fields);
808     parser_impl->ReportError(-1, 0, "Message missing required fields: " +
809                                     JoinStrings(missing_fields, ", "));
810     return false;
811   }
812   return true;
813 }
814 
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * output)815 bool TextFormat::Parser::ParseFieldValueFromString(
816     const string& input,
817     const FieldDescriptor* field,
818     Message* output) {
819   io::ArrayInputStream input_stream(input.data(), input.size());
820   ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
821                     ParserImpl::ALLOW_SINGULAR_OVERWRITES);
822   return parser.ParseField(field, output);
823 }
824 
Parse(io::ZeroCopyInputStream * input,Message * output)825 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
826                                     Message* output) {
827   return Parser().Parse(input, output);
828 }
829 
Merge(io::ZeroCopyInputStream * input,Message * output)830 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
831                                     Message* output) {
832   return Parser().Merge(input, output);
833 }
834 
ParseFromString(const string & input,Message * output)835 /* static */ bool TextFormat::ParseFromString(const string& input,
836                                               Message* output) {
837   return Parser().ParseFromString(input, output);
838 }
839 
MergeFromString(const string & input,Message * output)840 /* static */ bool TextFormat::MergeFromString(const string& input,
841                                               Message* output) {
842   return Parser().MergeFromString(input, output);
843 }
844 
845 // ===========================================================================
846 
Printer()847 TextFormat::Printer::Printer()
848   : initial_indent_level_(0),
849     single_line_mode_(false),
850     use_short_repeated_primitives_(false),
851     utf8_string_escaping_(false) {}
852 
~Printer()853 TextFormat::Printer::~Printer() {}
854 
PrintToString(const Message & message,string * output)855 bool TextFormat::Printer::PrintToString(const Message& message,
856                                         string* output) {
857   GOOGLE_DCHECK(output) << "output specified is NULL";
858 
859   output->clear();
860   io::StringOutputStream output_stream(output);
861 
862   bool result = Print(message, &output_stream);
863 
864   return result;
865 }
866 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output)867 bool TextFormat::Printer::PrintUnknownFieldsToString(
868     const UnknownFieldSet& unknown_fields,
869     string* output) {
870   GOOGLE_DCHECK(output) << "output specified is NULL";
871 
872   output->clear();
873   io::StringOutputStream output_stream(output);
874   return PrintUnknownFields(unknown_fields, &output_stream);
875 }
876 
Print(const Message & message,io::ZeroCopyOutputStream * output)877 bool TextFormat::Printer::Print(const Message& message,
878                                 io::ZeroCopyOutputStream* output) {
879   TextGenerator generator(output, initial_indent_level_);
880 
881   Print(message, generator);
882 
883   // Output false if the generator failed internally.
884   return !generator.failed();
885 }
886 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)887 bool TextFormat::Printer::PrintUnknownFields(
888     const UnknownFieldSet& unknown_fields,
889     io::ZeroCopyOutputStream* output) {
890   TextGenerator generator(output, initial_indent_level_);
891 
892   PrintUnknownFields(unknown_fields, generator);
893 
894   // Output false if the generator failed internally.
895   return !generator.failed();
896 }
897 
Print(const Message & message,TextGenerator & generator)898 void TextFormat::Printer::Print(const Message& message,
899                                 TextGenerator& generator) {
900   const Reflection* reflection = message.GetReflection();
901   vector<const FieldDescriptor*> fields;
902   reflection->ListFields(message, &fields);
903   for (int i = 0; i < fields.size(); i++) {
904     PrintField(message, reflection, fields[i], generator);
905   }
906   PrintUnknownFields(reflection->GetUnknownFields(message), generator);
907 }
908 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output)909 void TextFormat::Printer::PrintFieldValueToString(
910     const Message& message,
911     const FieldDescriptor* field,
912     int index,
913     string* output) {
914 
915   GOOGLE_DCHECK(output) << "output specified is NULL";
916 
917   output->clear();
918   io::StringOutputStream output_stream(output);
919   TextGenerator generator(&output_stream, initial_indent_level_);
920 
921   PrintFieldValue(message, message.GetReflection(), field, index, generator);
922 }
923 
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator)924 void TextFormat::Printer::PrintField(const Message& message,
925                                      const Reflection* reflection,
926                                      const FieldDescriptor* field,
927                                      TextGenerator& generator) {
928   if (use_short_repeated_primitives_ &&
929       field->is_repeated() &&
930       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
931       field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
932     PrintShortRepeatedField(message, reflection, field, generator);
933     return;
934   }
935 
936   int count = 0;
937 
938   if (field->is_repeated()) {
939     count = reflection->FieldSize(message, field);
940   } else if (reflection->HasField(message, field)) {
941     count = 1;
942   }
943 
944   for (int j = 0; j < count; ++j) {
945     PrintFieldName(message, reflection, field, generator);
946 
947     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
948       if (single_line_mode_) {
949         generator.Print(" { ");
950       } else {
951         generator.Print(" {\n");
952         generator.Indent();
953       }
954     } else {
955       generator.Print(": ");
956     }
957 
958     // Write the field value.
959     int field_index = j;
960     if (!field->is_repeated()) {
961       field_index = -1;
962     }
963 
964     PrintFieldValue(message, reflection, field, field_index, generator);
965 
966     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
967       if (single_line_mode_) {
968         generator.Print("} ");
969       } else {
970         generator.Outdent();
971         generator.Print("}\n");
972       }
973     } else {
974       if (single_line_mode_) {
975         generator.Print(" ");
976       } else {
977         generator.Print("\n");
978       }
979     }
980   }
981 }
982 
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator)983 void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
984                                                   const Reflection* reflection,
985                                                   const FieldDescriptor* field,
986                                                   TextGenerator& generator) {
987   // Print primitive repeated field in short form.
988   PrintFieldName(message, reflection, field, generator);
989 
990   int size = reflection->FieldSize(message, field);
991   generator.Print(": [");
992   for (int i = 0; i < size; i++) {
993     if (i > 0) generator.Print(", ");
994     PrintFieldValue(message, reflection, field, i, generator);
995   }
996   if (single_line_mode_) {
997     generator.Print("] ");
998   } else {
999     generator.Print("]\n");
1000   }
1001 }
1002 
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator)1003 void TextFormat::Printer::PrintFieldName(const Message& message,
1004                                          const Reflection* reflection,
1005                                          const FieldDescriptor* field,
1006                                          TextGenerator& generator) {
1007   if (field->is_extension()) {
1008     generator.Print("[");
1009     // We special-case MessageSet elements for compatibility with proto1.
1010     if (field->containing_type()->options().message_set_wire_format()
1011         && field->type() == FieldDescriptor::TYPE_MESSAGE
1012         && field->is_optional()
1013         && field->extension_scope() == field->message_type()) {
1014       generator.Print(field->message_type()->full_name());
1015     } else {
1016       generator.Print(field->full_name());
1017     }
1018     generator.Print("]");
1019   } else {
1020     if (field->type() == FieldDescriptor::TYPE_GROUP) {
1021       // Groups must be serialized with their original capitalization.
1022       generator.Print(field->message_type()->name());
1023     } else {
1024       generator.Print(field->name());
1025     }
1026   }
1027 }
1028 
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,TextGenerator & generator)1029 void TextFormat::Printer::PrintFieldValue(
1030     const Message& message,
1031     const Reflection* reflection,
1032     const FieldDescriptor* field,
1033     int index,
1034     TextGenerator& generator) {
1035   GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1036       << "Index must be -1 for non-repeated fields";
1037 
1038   switch (field->cpp_type()) {
1039 #define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING)                             \
1040       case FieldDescriptor::CPPTYPE_##CPPTYPE:                               \
1041         generator.Print(TO_STRING(field->is_repeated() ?                     \
1042           reflection->GetRepeated##METHOD(message, field, index) :           \
1043           reflection->Get##METHOD(message, field)));                         \
1044         break;                                                               \
1045 
1046       OUTPUT_FIELD( INT32,  Int32, SimpleItoa);
1047       OUTPUT_FIELD( INT64,  Int64, SimpleItoa);
1048       OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
1049       OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
1050       OUTPUT_FIELD( FLOAT,  Float, SimpleFtoa);
1051       OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
1052 #undef OUTPUT_FIELD
1053 
1054       case FieldDescriptor::CPPTYPE_STRING: {
1055         string scratch;
1056         const string& value = field->is_repeated() ?
1057             reflection->GetRepeatedStringReference(
1058               message, field, index, &scratch) :
1059             reflection->GetStringReference(message, field, &scratch);
1060 
1061         generator.Print("\"");
1062         if (utf8_string_escaping_) {
1063           generator.Print(strings::Utf8SafeCEscape(value));
1064         } else {
1065           generator.Print(CEscape(value));
1066         }
1067         generator.Print("\"");
1068 
1069         break;
1070       }
1071 
1072       case FieldDescriptor::CPPTYPE_BOOL:
1073         if (field->is_repeated()) {
1074           generator.Print(reflection->GetRepeatedBool(message, field, index)
1075                           ? "true" : "false");
1076         } else {
1077           generator.Print(reflection->GetBool(message, field)
1078                           ? "true" : "false");
1079         }
1080         break;
1081 
1082       case FieldDescriptor::CPPTYPE_ENUM:
1083         generator.Print(field->is_repeated() ?
1084           reflection->GetRepeatedEnum(message, field, index)->name() :
1085           reflection->GetEnum(message, field)->name());
1086         break;
1087 
1088       case FieldDescriptor::CPPTYPE_MESSAGE:
1089         Print(field->is_repeated() ?
1090                 reflection->GetRepeatedMessage(message, field, index) :
1091                 reflection->GetMessage(message, field),
1092               generator);
1093         break;
1094   }
1095 }
1096 
Print(const Message & message,io::ZeroCopyOutputStream * output)1097 /* static */ bool TextFormat::Print(const Message& message,
1098                                     io::ZeroCopyOutputStream* output) {
1099   return Printer().Print(message, output);
1100 }
1101 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)1102 /* static */ bool TextFormat::PrintUnknownFields(
1103     const UnknownFieldSet& unknown_fields,
1104     io::ZeroCopyOutputStream* output) {
1105   return Printer().PrintUnknownFields(unknown_fields, output);
1106 }
1107 
PrintToString(const Message & message,string * output)1108 /* static */ bool TextFormat::PrintToString(
1109     const Message& message, string* output) {
1110   return Printer().PrintToString(message, output);
1111 }
1112 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output)1113 /* static */ bool TextFormat::PrintUnknownFieldsToString(
1114     const UnknownFieldSet& unknown_fields, string* output) {
1115   return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1116 }
1117 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output)1118 /* static */ void TextFormat::PrintFieldValueToString(
1119     const Message& message,
1120     const FieldDescriptor* field,
1121     int index,
1122     string* output) {
1123   return Printer().PrintFieldValueToString(message, field, index, output);
1124 }
1125 
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * message)1126 /* static */ bool TextFormat::ParseFieldValueFromString(
1127     const string& input,
1128     const FieldDescriptor* field,
1129     Message* message) {
1130   return Parser().ParseFieldValueFromString(input, field, message);
1131 }
1132 
1133 // Prints an integer as hex with a fixed number of digits dependent on the
1134 // integer type.
1135 template<typename IntType>
PaddedHex(IntType value)1136 static string PaddedHex(IntType value) {
1137   string result;
1138   result.reserve(sizeof(value) * 2);
1139   for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1140     result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1141   }
1142   return result;
1143 }
1144 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,TextGenerator & generator)1145 void TextFormat::Printer::PrintUnknownFields(
1146     const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
1147   for (int i = 0; i < unknown_fields.field_count(); i++) {
1148     const UnknownField& field = unknown_fields.field(i);
1149     string field_number = SimpleItoa(field.number());
1150 
1151     switch (field.type()) {
1152       case UnknownField::TYPE_VARINT:
1153         generator.Print(field_number);
1154         generator.Print(": ");
1155         generator.Print(SimpleItoa(field.varint()));
1156         if (single_line_mode_) {
1157           generator.Print(" ");
1158         } else {
1159           generator.Print("\n");
1160         }
1161         break;
1162       case UnknownField::TYPE_FIXED32: {
1163         generator.Print(field_number);
1164         generator.Print(": 0x");
1165         char buffer[kFastToBufferSize];
1166         generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
1167         if (single_line_mode_) {
1168           generator.Print(" ");
1169         } else {
1170           generator.Print("\n");
1171         }
1172         break;
1173       }
1174       case UnknownField::TYPE_FIXED64: {
1175         generator.Print(field_number);
1176         generator.Print(": 0x");
1177         char buffer[kFastToBufferSize];
1178         generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
1179         if (single_line_mode_) {
1180           generator.Print(" ");
1181         } else {
1182           generator.Print("\n");
1183         }
1184         break;
1185       }
1186       case UnknownField::TYPE_LENGTH_DELIMITED: {
1187         generator.Print(field_number);
1188         const string& value = field.length_delimited();
1189         UnknownFieldSet embedded_unknown_fields;
1190         if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1191           // This field is parseable as a Message.
1192           // So it is probably an embedded message.
1193           if (single_line_mode_) {
1194             generator.Print(" { ");
1195           } else {
1196             generator.Print(" {\n");
1197             generator.Indent();
1198           }
1199           PrintUnknownFields(embedded_unknown_fields, generator);
1200           if (single_line_mode_) {
1201             generator.Print("} ");
1202           } else {
1203             generator.Outdent();
1204             generator.Print("}\n");
1205           }
1206         } else {
1207           // This field is not parseable as a Message.
1208           // So it is probably just a plain string.
1209           generator.Print(": \"");
1210           generator.Print(CEscape(value));
1211           generator.Print("\"");
1212           if (single_line_mode_) {
1213             generator.Print(" ");
1214           } else {
1215             generator.Print("\n");
1216           }
1217         }
1218         break;
1219       }
1220       case UnknownField::TYPE_GROUP:
1221         generator.Print(field_number);
1222         if (single_line_mode_) {
1223           generator.Print(" { ");
1224         } else {
1225           generator.Print(" {\n");
1226           generator.Indent();
1227         }
1228         PrintUnknownFields(field.group(), generator);
1229         if (single_line_mode_) {
1230           generator.Print("} ");
1231         } else {
1232           generator.Outdent();
1233           generator.Print("}\n");
1234         }
1235         break;
1236     }
1237   }
1238 }
1239 
1240 }  // namespace protobuf
1241 }  // namespace google
1242