• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/text_format.h>
36 
37 #include <float.h>
38 #include <stdio.h>
39 
40 #include <algorithm>
41 #include <atomic>
42 #include <climits>
43 #include <cmath>
44 #include <limits>
45 #include <vector>
46 
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/tokenizer.h>
49 #include <google/protobuf/io/zero_copy_stream.h>
50 #include <google/protobuf/io/zero_copy_stream_impl.h>
51 #include <google/protobuf/stubs/strutil.h>
52 #include <google/protobuf/any.h>
53 #include <google/protobuf/descriptor.h>
54 #include <google/protobuf/descriptor.pb.h>
55 #include <google/protobuf/dynamic_message.h>
56 #include <google/protobuf/io/strtod.h>
57 #include <google/protobuf/map_field.h>
58 #include <google/protobuf/message.h>
59 #include <google/protobuf/repeated_field.h>
60 #include <google/protobuf/unknown_field_set.h>
61 #include <google/protobuf/wire_format_lite.h>
62 #include <google/protobuf/stubs/map_util.h>
63 #include <google/protobuf/stubs/stl_util.h>
64 
65 // Must be included last.
66 #include <google/protobuf/port_def.inc>
67 
68 #define DEBUG_STRING_SILENT_MARKER "\t "
69 
70 namespace google {
71 namespace protobuf {
72 
73 namespace {
74 
IsHexNumber(const std::string & str)75 inline bool IsHexNumber(const std::string& str) {
76   return (str.length() >= 2 && str[0] == '0' &&
77           (str[1] == 'x' || str[1] == 'X'));
78 }
79 
IsOctNumber(const std::string & str)80 inline bool IsOctNumber(const std::string& str) {
81   return (str.length() >= 2 && str[0] == '0' &&
82           (str[1] >= '0' && str[1] < '8'));
83 }
84 
85 }  // namespace
86 
87 namespace internal {
88 // Controls insertion of DEBUG_STRING_SILENT_MARKER.
89 PROTOBUF_EXPORT std::atomic<bool> enable_debug_text_format_marker;
90 }  // namespace internal
91 
DebugString() const92 std::string Message::DebugString() const {
93   std::string debug_string;
94 
95   TextFormat::Printer printer;
96   printer.SetExpandAny(true);
97   printer.SetInsertSilentMarker(internal::enable_debug_text_format_marker.load(
98       std::memory_order_relaxed));
99 
100   printer.PrintToString(*this, &debug_string);
101 
102   return debug_string;
103 }
104 
ShortDebugString() const105 std::string Message::ShortDebugString() const {
106   std::string debug_string;
107 
108   TextFormat::Printer printer;
109   printer.SetSingleLineMode(true);
110   printer.SetExpandAny(true);
111   printer.SetInsertSilentMarker(internal::enable_debug_text_format_marker.load(
112       std::memory_order_relaxed));
113 
114   printer.PrintToString(*this, &debug_string);
115   // Single line mode currently might have an extra space at the end.
116   if (!debug_string.empty() && debug_string[debug_string.size() - 1] == ' ') {
117     debug_string.resize(debug_string.size() - 1);
118   }
119 
120   return debug_string;
121 }
122 
Utf8DebugString() const123 std::string Message::Utf8DebugString() const {
124   std::string debug_string;
125 
126   TextFormat::Printer printer;
127   printer.SetUseUtf8StringEscaping(true);
128   printer.SetExpandAny(true);
129   printer.SetInsertSilentMarker(internal::enable_debug_text_format_marker.load(
130       std::memory_order_relaxed));
131 
132   printer.PrintToString(*this, &debug_string);
133 
134   return debug_string;
135 }
136 
PrintDebugString() const137 void Message::PrintDebugString() const { printf("%s", DebugString().c_str()); }
138 
139 
140 // ===========================================================================
141 // Implementation of the parse information tree class.
RecordLocation(const FieldDescriptor * field,TextFormat::ParseLocationRange range)142 void TextFormat::ParseInfoTree::RecordLocation(
143     const FieldDescriptor* field, TextFormat::ParseLocationRange range) {
144   locations_[field].push_back(range);
145 }
146 
CreateNested(const FieldDescriptor * field)147 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
148     const FieldDescriptor* field) {
149   // Owned by us in the map.
150   auto& vec = nested_[field];
151   vec.emplace_back(new TextFormat::ParseInfoTree());
152   return vec.back().get();
153 }
154 
CheckFieldIndex(const FieldDescriptor * field,int index)155 void CheckFieldIndex(const FieldDescriptor* field, int index) {
156   if (field == nullptr) {
157     return;
158   }
159 
160   if (field->is_repeated() && index == -1) {
161     GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
162                 << "Field: " << field->name();
163   } else if (!field->is_repeated() && index != -1) {
164     GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
165                 << "Field: " << field->name();
166   }
167 }
168 
GetLocationRange(const FieldDescriptor * field,int index) const169 TextFormat::ParseLocationRange TextFormat::ParseInfoTree::GetLocationRange(
170     const FieldDescriptor* field, int index) const {
171   CheckFieldIndex(field, index);
172   if (index == -1) {
173     index = 0;
174   }
175 
176   const std::vector<TextFormat::ParseLocationRange>* locations =
177       FindOrNull(locations_, field);
178   if (locations == nullptr ||
179       index >= static_cast<int64_t>(locations->size())) {
180     return TextFormat::ParseLocationRange();
181   }
182 
183   return (*locations)[index];
184 }
185 
GetTreeForNested(const FieldDescriptor * field,int index) const186 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
187     const FieldDescriptor* field, int index) const {
188   CheckFieldIndex(field, index);
189   if (index == -1) {
190     index = 0;
191   }
192 
193   auto it = nested_.find(field);
194   if (it == nested_.end() || index >= static_cast<int64_t>(it->second.size())) {
195     return nullptr;
196   }
197 
198   return it->second[index].get();
199 }
200 
201 namespace {
202 // These functions implement the behavior of the "default" TextFormat::Finder,
203 // they are defined as standalone to be called when finder_ is nullptr.
DefaultFinderFindExtension(Message * message,const std::string & name)204 const FieldDescriptor* DefaultFinderFindExtension(Message* message,
205                                                   const std::string& name) {
206   const Descriptor* descriptor = message->GetDescriptor();
207   return descriptor->file()->pool()->FindExtensionByPrintableName(descriptor,
208                                                                   name);
209 }
210 
DefaultFinderFindExtensionByNumber(const Descriptor * descriptor,int number)211 const FieldDescriptor* DefaultFinderFindExtensionByNumber(
212     const Descriptor* descriptor, int number) {
213   return descriptor->file()->pool()->FindExtensionByNumber(descriptor, number);
214 }
215 
DefaultFinderFindAnyType(const Message & message,const std::string & prefix,const std::string & name)216 const Descriptor* DefaultFinderFindAnyType(const Message& message,
217                                            const std::string& prefix,
218                                            const std::string& name) {
219   if (prefix != internal::kTypeGoogleApisComPrefix &&
220       prefix != internal::kTypeGoogleProdComPrefix) {
221     return nullptr;
222   }
223   return message.GetDescriptor()->file()->pool()->FindMessageTypeByName(name);
224 }
225 }  // namespace
226 
227 // ===========================================================================
228 // Internal class for parsing an ASCII representation of a Protocol Message.
229 // This class makes use of the Protocol Message compiler's tokenizer found
230 // in //net/proto2/io/public/tokenizer.h. Note that class's Parse
231 // method is *not* thread-safe and should only be used in a single thread at
232 // a time.
233 
234 // Makes code slightly more readable.  The meaning of "DO(foo)" is
235 // "Execute foo and fail if it fails.", where failure is indicated by
236 // returning false. Borrowed from parser.cc (Thanks Kenton!).
237 #define DO(STATEMENT) \
238   if (STATEMENT) {    \
239   } else {            \
240     return false;     \
241   }
242 
243 class TextFormat::Parser::ParserImpl {
244  public:
245   // Determines if repeated values for non-repeated fields and
246   // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
247   // required/optional field named "foo", or "baz: 1 qux: 2"
248   // where "baz" and "qux" are members of the same oneof.
249   enum SingularOverwritePolicy {
250     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
251     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
252   };
253 
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,const TextFormat::Finder * finder,ParseInfoTree * parse_info_tree,SingularOverwritePolicy singular_overwrite_policy,bool allow_case_insensitive_field,bool allow_unknown_field,bool allow_unknown_extension,bool allow_unknown_enum,bool allow_field_number,bool allow_relaxed_whitespace,bool allow_partial,int recursion_limit)254   ParserImpl(const Descriptor* root_message_type,
255              io::ZeroCopyInputStream* input_stream,
256              io::ErrorCollector* error_collector,
257              const TextFormat::Finder* finder, ParseInfoTree* parse_info_tree,
258              SingularOverwritePolicy singular_overwrite_policy,
259              bool allow_case_insensitive_field, bool allow_unknown_field,
260              bool allow_unknown_extension, bool allow_unknown_enum,
261              bool allow_field_number, bool allow_relaxed_whitespace,
262              bool allow_partial, int recursion_limit)
263       : error_collector_(error_collector),
264         finder_(finder),
265         parse_info_tree_(parse_info_tree),
266         tokenizer_error_collector_(this),
267         tokenizer_(input_stream, &tokenizer_error_collector_),
268         root_message_type_(root_message_type),
269         singular_overwrite_policy_(singular_overwrite_policy),
270         allow_case_insensitive_field_(allow_case_insensitive_field),
271         allow_unknown_field_(allow_unknown_field),
272         allow_unknown_extension_(allow_unknown_extension),
273         allow_unknown_enum_(allow_unknown_enum),
274         allow_field_number_(allow_field_number),
275         allow_partial_(allow_partial),
276         initial_recursion_limit_(recursion_limit),
277         recursion_limit_(recursion_limit),
278         had_silent_marker_(false),
279         had_errors_(false) {
280     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
281     // for floats.
282     tokenizer_.set_allow_f_after_float(true);
283 
284     // '#' starts a comment.
285     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
286 
287     if (allow_relaxed_whitespace) {
288       tokenizer_.set_require_space_after_number(false);
289       tokenizer_.set_allow_multiline_strings(true);
290     }
291 
292     // Consume the starting token.
293     tokenizer_.Next();
294   }
~ParserImpl()295   ~ParserImpl() {}
296 
297   // Parses the ASCII representation specified in input and saves the
298   // information into the output pointer (a Message). Returns
299   // false if an error occurs (an error will also be logged to
300   // GOOGLE_LOG(ERROR)).
Parse(Message * output)301   bool Parse(Message* output) {
302     // Consume fields until we cannot do so anymore.
303     while (true) {
304       if (LookingAtType(io::Tokenizer::TYPE_END)) {
305         // Ensures recursion limit properly unwinded, but only for success
306         // cases. This implicitly avoids the check when `Parse` returns false
307         // via `DO(...)`.
308         GOOGLE_DCHECK(had_errors_ || recursion_limit_ == initial_recursion_limit_)
309             << "Recursion limit at end of parse should be "
310             << initial_recursion_limit_ << ", but was " << recursion_limit_
311             << ". Difference of " << initial_recursion_limit_ - recursion_limit_
312             << " stack frames not accounted for stack unwind.";
313 
314         return !had_errors_;
315       }
316 
317       DO(ConsumeField(output));
318     }
319   }
320 
ParseField(const FieldDescriptor * field,Message * output)321   bool ParseField(const FieldDescriptor* field, Message* output) {
322     bool suc;
323     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
324       suc = ConsumeFieldMessage(output, output->GetReflection(), field);
325     } else {
326       suc = ConsumeFieldValue(output, output->GetReflection(), field);
327     }
328     return suc && LookingAtType(io::Tokenizer::TYPE_END);
329   }
330 
ReportError(int line,int col,const std::string & message)331   void ReportError(int line, int col, const std::string& message) {
332     had_errors_ = true;
333     if (error_collector_ == nullptr) {
334       if (line >= 0) {
335         GOOGLE_LOG(ERROR) << "Error parsing text-format "
336                    << root_message_type_->full_name() << ": " << (line + 1)
337                    << ":" << (col + 1) << ": " << message;
338       } else {
339         GOOGLE_LOG(ERROR) << "Error parsing text-format "
340                    << root_message_type_->full_name() << ": " << message;
341       }
342     } else {
343       error_collector_->AddError(line, col, message);
344     }
345   }
346 
ReportWarning(int line,int col,const std::string & message)347   void ReportWarning(int line, int col, const std::string& message) {
348     if (error_collector_ == nullptr) {
349       if (line >= 0) {
350         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
351                      << root_message_type_->full_name() << ": " << (line + 1)
352                      << ":" << (col + 1) << ": " << message;
353       } else {
354         GOOGLE_LOG(WARNING) << "Warning parsing text-format "
355                      << root_message_type_->full_name() << ": " << message;
356       }
357     } else {
358       error_collector_->AddWarning(line, col, message);
359     }
360   }
361 
362  private:
363   static constexpr int32_t kint32max = std::numeric_limits<int32_t>::max();
364   static constexpr uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
365   static constexpr int64_t kint64min = std::numeric_limits<int64_t>::min();
366   static constexpr int64_t kint64max = std::numeric_limits<int64_t>::max();
367   static constexpr uint64_t kuint64max = std::numeric_limits<uint64_t>::max();
368 
369   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
370 
371   // Reports an error with the given message with information indicating
372   // the position (as derived from the current token).
ReportError(const std::string & message)373   void ReportError(const std::string& message) {
374     ReportError(tokenizer_.current().line, tokenizer_.current().column,
375                 message);
376   }
377 
378   // Reports a warning with the given message with information indicating
379   // the position (as derived from the current token).
ReportWarning(const std::string & message)380   void ReportWarning(const std::string& message) {
381     ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
382                   message);
383   }
384 
385   // Consumes the specified message with the given starting delimiter.
386   // This method checks to see that the end delimiter at the conclusion of
387   // the consumption matches the starting delimiter passed in here.
ConsumeMessage(Message * message,const std::string delimiter)388   bool ConsumeMessage(Message* message, const std::string delimiter) {
389     while (!LookingAt(">") && !LookingAt("}")) {
390       DO(ConsumeField(message));
391     }
392 
393     // Confirm that we have a valid ending delimiter.
394     DO(Consume(delimiter));
395     return true;
396   }
397 
398   // Consume either "<" or "{".
ConsumeMessageDelimiter(std::string * delimiter)399   bool ConsumeMessageDelimiter(std::string* delimiter) {
400     if (TryConsume("<")) {
401       *delimiter = ">";
402     } else {
403       DO(Consume("{"));
404       *delimiter = "}";
405     }
406     return true;
407   }
408 
409 
410   // Consumes the current field (as returned by the tokenizer) on the
411   // passed in message.
ConsumeField(Message * message)412   bool ConsumeField(Message* message) {
413     const Reflection* reflection = message->GetReflection();
414     const Descriptor* descriptor = message->GetDescriptor();
415 
416     std::string field_name;
417     bool reserved_field = false;
418     const FieldDescriptor* field = nullptr;
419     int start_line = tokenizer_.current().line;
420     int start_column = tokenizer_.current().column;
421 
422     const FieldDescriptor* any_type_url_field;
423     const FieldDescriptor* any_value_field;
424     if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field,
425                                          &any_value_field) &&
426         TryConsume("[")) {
427       std::string full_type_name, prefix;
428       DO(ConsumeAnyTypeUrl(&full_type_name, &prefix));
429       std::string prefix_and_full_type_name =
430           StrCat(prefix, full_type_name);
431       DO(ConsumeBeforeWhitespace("]"));
432       TryConsumeWhitespace();
433       // ':' is optional between message labels and values.
434       if (TryConsumeBeforeWhitespace(":")) {
435         TryConsumeWhitespace();
436       }
437       std::string serialized_value;
438       const Descriptor* value_descriptor =
439           finder_ ? finder_->FindAnyType(*message, prefix, full_type_name)
440                   : DefaultFinderFindAnyType(*message, prefix, full_type_name);
441       if (value_descriptor == nullptr) {
442         ReportError("Could not find type \"" + prefix_and_full_type_name +
443                     "\" stored in google.protobuf.Any.");
444         return false;
445       }
446       DO(ConsumeAnyValue(value_descriptor, &serialized_value));
447       if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
448         // Fail if any_type_url_field has already been specified.
449         if ((!any_type_url_field->is_repeated() &&
450              reflection->HasField(*message, any_type_url_field)) ||
451             (!any_value_field->is_repeated() &&
452              reflection->HasField(*message, any_value_field))) {
453           ReportError("Non-repeated Any specified multiple times.");
454           return false;
455         }
456       }
457       reflection->SetString(message, any_type_url_field,
458                             prefix_and_full_type_name);
459       reflection->SetString(message, any_value_field, serialized_value);
460       return true;
461     }
462     if (TryConsume("[")) {
463       // Extension.
464       DO(ConsumeFullTypeName(&field_name));
465       DO(ConsumeBeforeWhitespace("]"));
466       TryConsumeWhitespace();
467 
468       field = finder_ ? finder_->FindExtension(message, field_name)
469                       : DefaultFinderFindExtension(message, field_name);
470 
471       if (field == nullptr) {
472         if (!allow_unknown_field_ && !allow_unknown_extension_) {
473           ReportError("Extension \"" + field_name +
474                       "\" is not defined or "
475                       "is not an extension of \"" +
476                       descriptor->full_name() + "\".");
477           return false;
478         } else {
479           ReportWarning("Ignoring extension \"" + field_name +
480                         "\" which is not defined or is not an extension of \"" +
481                         descriptor->full_name() + "\".");
482         }
483       }
484     } else {
485       DO(ConsumeIdentifierBeforeWhitespace(&field_name));
486       TryConsumeWhitespace();
487 
488       int32_t field_number;
489       if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
490         if (descriptor->IsExtensionNumber(field_number)) {
491           field = finder_
492                       ? finder_->FindExtensionByNumber(descriptor, field_number)
493                       : DefaultFinderFindExtensionByNumber(descriptor,
494                                                            field_number);
495         } else if (descriptor->IsReservedNumber(field_number)) {
496           reserved_field = true;
497         } else {
498           field = descriptor->FindFieldByNumber(field_number);
499         }
500       } else {
501         field = descriptor->FindFieldByName(field_name);
502         // Group names are expected to be capitalized as they appear in the
503         // .proto file, which actually matches their type names, not their
504         // field names.
505         if (field == nullptr) {
506           std::string lower_field_name = field_name;
507           LowerString(&lower_field_name);
508           field = descriptor->FindFieldByName(lower_field_name);
509           // If the case-insensitive match worked but the field is NOT a group,
510           if (field != nullptr &&
511               field->type() != FieldDescriptor::TYPE_GROUP) {
512             field = nullptr;
513           }
514         }
515         // Again, special-case group names as described above.
516         if (field != nullptr && field->type() == FieldDescriptor::TYPE_GROUP &&
517             field->message_type()->name() != field_name) {
518           field = nullptr;
519         }
520 
521         if (field == nullptr && allow_case_insensitive_field_) {
522           std::string lower_field_name = field_name;
523           LowerString(&lower_field_name);
524           field = descriptor->FindFieldByLowercaseName(lower_field_name);
525         }
526 
527         if (field == nullptr) {
528           reserved_field = descriptor->IsReservedName(field_name);
529         }
530       }
531 
532       if (field == nullptr && !reserved_field) {
533         if (!allow_unknown_field_) {
534           ReportError("Message type \"" + descriptor->full_name() +
535                       "\" has no field named \"" + field_name + "\".");
536           return false;
537         } else {
538           ReportWarning("Message type \"" + descriptor->full_name() +
539                         "\" has no field named \"" + field_name + "\".");
540         }
541       }
542     }
543 
544     // Skips unknown or reserved fields.
545     if (field == nullptr) {
546       GOOGLE_CHECK(allow_unknown_field_ || allow_unknown_extension_ || reserved_field);
547 
548       // Try to guess the type of this field.
549       // If this field is not a message, there should be a ":" between the
550       // field name and the field value and also the field value should not
551       // start with "{" or "<" which indicates the beginning of a message body.
552       // If there is no ":" or there is a "{" or "<" after ":", this field has
553       // to be a message or the input is ill-formed.
554       if (TryConsumeBeforeWhitespace(":")) {
555         TryConsumeWhitespace();
556         if (!LookingAt("{") && !LookingAt("<")) {
557           return SkipFieldValue();
558         }
559       }
560       return SkipFieldMessage();
561     }
562 
563     if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
564       // Fail if the field is not repeated and it has already been specified.
565       if (!field->is_repeated() && reflection->HasField(*message, field)) {
566         ReportError("Non-repeated field \"" + field_name +
567                     "\" is specified multiple times.");
568         return false;
569       }
570       // Fail if the field is a member of a oneof and another member has already
571       // been specified.
572       const OneofDescriptor* oneof = field->containing_oneof();
573       if (oneof != nullptr && reflection->HasOneof(*message, oneof)) {
574         const FieldDescriptor* other_field =
575             reflection->GetOneofFieldDescriptor(*message, oneof);
576         ReportError("Field \"" + field_name +
577                     "\" is specified along with "
578                     "field \"" +
579                     other_field->name() +
580                     "\", another member "
581                     "of oneof \"" +
582                     oneof->name() + "\".");
583         return false;
584       }
585     }
586 
587     // Perform special handling for embedded message types.
588     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
589       // ':' is optional here.
590       bool consumed_semicolon = TryConsumeBeforeWhitespace(":");
591       if (consumed_semicolon) {
592         TryConsumeWhitespace();
593       }
594       if (consumed_semicolon && field->options().weak() &&
595           LookingAtType(io::Tokenizer::TYPE_STRING)) {
596         // we are getting a bytes string for a weak field.
597         std::string tmp;
598         DO(ConsumeString(&tmp));
599         MessageFactory* factory =
600             finder_ ? finder_->FindExtensionFactory(field) : nullptr;
601         reflection->MutableMessage(message, field, factory)
602             ->ParseFromString(tmp);
603         goto label_skip_parsing;
604       }
605     } else {
606       // ':' is required here.
607       DO(ConsumeBeforeWhitespace(":"));
608       TryConsumeWhitespace();
609     }
610 
611     if (field->is_repeated() && TryConsume("[")) {
612       // Short repeated format, e.g.  "foo: [1, 2, 3]".
613       if (!TryConsume("]")) {
614         // "foo: []" is treated as empty.
615         while (true) {
616           if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
617             // Perform special handling for embedded message types.
618             DO(ConsumeFieldMessage(message, reflection, field));
619           } else {
620             DO(ConsumeFieldValue(message, reflection, field));
621           }
622           if (TryConsume("]")) {
623             break;
624           }
625           DO(Consume(","));
626         }
627       }
628     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
629       DO(ConsumeFieldMessage(message, reflection, field));
630     } else {
631       DO(ConsumeFieldValue(message, reflection, field));
632     }
633   label_skip_parsing:
634     // For historical reasons, fields may optionally be separated by commas or
635     // semicolons.
636     TryConsume(";") || TryConsume(",");
637 
638     if (field->options().deprecated()) {
639       ReportWarning("text format contains deprecated field \"" + field_name +
640                     "\"");
641     }
642 
643     // If a parse info tree exists, add the location for the parsed
644     // field.
645     if (parse_info_tree_ != nullptr) {
646       int end_line = tokenizer_.previous().line;
647       int end_column = tokenizer_.previous().end_column;
648 
649       RecordLocation(parse_info_tree_, field,
650                      ParseLocationRange(ParseLocation(start_line, start_column),
651                                         ParseLocation(end_line, end_column)));
652     }
653 
654     return true;
655   }
656 
657   // Skips the next field including the field's name and value.
SkipField()658   bool SkipField() {
659     std::string field_name;
660     if (TryConsume("[")) {
661       // Extension name or type URL.
662       DO(ConsumeTypeUrlOrFullTypeName(&field_name));
663       DO(ConsumeBeforeWhitespace("]"));
664     } else {
665       DO(ConsumeIdentifierBeforeWhitespace(&field_name));
666     }
667     TryConsumeWhitespace();
668 
669     // Try to guess the type of this field.
670     // If this field is not a message, there should be a ":" between the
671     // field name and the field value and also the field value should not
672     // start with "{" or "<" which indicates the beginning of a message body.
673     // If there is no ":" or there is a "{" or "<" after ":", this field has
674     // to be a message or the input is ill-formed.
675     if (TryConsumeBeforeWhitespace(":")) {
676       TryConsumeWhitespace();
677       if (!LookingAt("{") && !LookingAt("<")) {
678         DO(SkipFieldValue());
679       } else {
680         DO(SkipFieldMessage());
681       }
682     } else {
683       DO(SkipFieldMessage());
684     }
685     // For historical reasons, fields may optionally be separated by commas or
686     // semicolons.
687     TryConsume(";") || TryConsume(",");
688     return true;
689   }
690 
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)691   bool ConsumeFieldMessage(Message* message, const Reflection* reflection,
692                            const FieldDescriptor* field) {
693     if (--recursion_limit_ < 0) {
694       ReportError(
695           StrCat("Message is too deep, the parser exceeded the "
696                        "configured recursion limit of ",
697                        initial_recursion_limit_, "."));
698       return false;
699     }
700     // If the parse information tree is not nullptr, create a nested one
701     // for the nested message.
702     ParseInfoTree* parent = parse_info_tree_;
703     if (parent != nullptr) {
704       parse_info_tree_ = CreateNested(parent, field);
705     }
706 
707     std::string delimiter;
708     DO(ConsumeMessageDelimiter(&delimiter));
709     MessageFactory* factory =
710         finder_ ? finder_->FindExtensionFactory(field) : nullptr;
711     if (field->is_repeated()) {
712       DO(ConsumeMessage(reflection->AddMessage(message, field, factory),
713                         delimiter));
714     } else {
715       DO(ConsumeMessage(reflection->MutableMessage(message, field, factory),
716                         delimiter));
717     }
718 
719     ++recursion_limit_;
720 
721     // Reset the parse information tree.
722     parse_info_tree_ = parent;
723     return true;
724   }
725 
726   // Skips the whole body of a message including the beginning delimiter and
727   // the ending delimiter.
SkipFieldMessage()728   bool SkipFieldMessage() {
729     if (--recursion_limit_ < 0) {
730       ReportError(
731           StrCat("Message is too deep, the parser exceeded the "
732                        "configured recursion limit of ",
733                        initial_recursion_limit_, "."));
734       return false;
735     }
736 
737     std::string delimiter;
738     DO(ConsumeMessageDelimiter(&delimiter));
739     while (!LookingAt(">") && !LookingAt("}")) {
740       DO(SkipField());
741     }
742     DO(Consume(delimiter));
743 
744     ++recursion_limit_;
745     return true;
746   }
747 
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)748   bool ConsumeFieldValue(Message* message, const Reflection* reflection,
749                          const FieldDescriptor* field) {
750 // Define an easy to use macro for setting fields. This macro checks
751 // to see if the field is repeated (in which case we need to use the Add
752 // methods or not (in which case we need to use the Set methods).
753 #define SET_FIELD(CPPTYPE, VALUE)                    \
754   if (field->is_repeated()) {                        \
755     reflection->Add##CPPTYPE(message, field, VALUE); \
756   } else {                                           \
757     reflection->Set##CPPTYPE(message, field, VALUE); \
758   }
759 
760     switch (field->cpp_type()) {
761       case FieldDescriptor::CPPTYPE_INT32: {
762         int64_t value;
763         DO(ConsumeSignedInteger(&value, kint32max));
764         SET_FIELD(Int32, static_cast<int32_t>(value));
765         break;
766       }
767 
768       case FieldDescriptor::CPPTYPE_UINT32: {
769         uint64_t value;
770         DO(ConsumeUnsignedInteger(&value, kuint32max));
771         SET_FIELD(UInt32, static_cast<uint32_t>(value));
772         break;
773       }
774 
775       case FieldDescriptor::CPPTYPE_INT64: {
776         int64_t value;
777         DO(ConsumeSignedInteger(&value, kint64max));
778         SET_FIELD(Int64, value);
779         break;
780       }
781 
782       case FieldDescriptor::CPPTYPE_UINT64: {
783         uint64_t value;
784         DO(ConsumeUnsignedInteger(&value, kuint64max));
785         SET_FIELD(UInt64, value);
786         break;
787       }
788 
789       case FieldDescriptor::CPPTYPE_FLOAT: {
790         double value;
791         DO(ConsumeDouble(&value));
792         SET_FIELD(Float, io::SafeDoubleToFloat(value));
793         break;
794       }
795 
796       case FieldDescriptor::CPPTYPE_DOUBLE: {
797         double value;
798         DO(ConsumeDouble(&value));
799         SET_FIELD(Double, value);
800         break;
801       }
802 
803       case FieldDescriptor::CPPTYPE_STRING: {
804         std::string value;
805         DO(ConsumeString(&value));
806         SET_FIELD(String, value);
807         break;
808       }
809 
810       case FieldDescriptor::CPPTYPE_BOOL: {
811         if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
812           uint64_t value;
813           DO(ConsumeUnsignedInteger(&value, 1));
814           SET_FIELD(Bool, value);
815         } else {
816           std::string value;
817           DO(ConsumeIdentifier(&value));
818           if (value == "true" || value == "True" || value == "t") {
819             SET_FIELD(Bool, true);
820           } else if (value == "false" || value == "False" || value == "f") {
821             SET_FIELD(Bool, false);
822           } else {
823             ReportError("Invalid value for boolean field \"" + field->name() +
824                         "\". Value: \"" + value + "\".");
825             return false;
826           }
827         }
828         break;
829       }
830 
831       case FieldDescriptor::CPPTYPE_ENUM: {
832         std::string value;
833         int64_t int_value = kint64max;
834         const EnumDescriptor* enum_type = field->enum_type();
835         const EnumValueDescriptor* enum_value = nullptr;
836 
837         if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
838           DO(ConsumeIdentifier(&value));
839           // Find the enumeration value.
840           enum_value = enum_type->FindValueByName(value);
841 
842         } else if (LookingAt("-") ||
843                    LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
844           DO(ConsumeSignedInteger(&int_value, kint32max));
845           value = StrCat(int_value);  // for error reporting
846           enum_value = enum_type->FindValueByNumber(int_value);
847         } else {
848           ReportError("Expected integer or identifier, got: " +
849                       tokenizer_.current().text);
850           return false;
851         }
852 
853         if (enum_value == nullptr) {
854           if (int_value != kint64max &&
855               reflection->SupportsUnknownEnumValues()) {
856             SET_FIELD(EnumValue, int_value);
857             return true;
858           } else if (!allow_unknown_enum_) {
859             ReportError("Unknown enumeration value of \"" + value +
860                         "\" for "
861                         "field \"" +
862                         field->name() + "\".");
863             return false;
864           } else {
865             ReportWarning("Unknown enumeration value of \"" + value +
866                           "\" for "
867                           "field \"" +
868                           field->name() + "\".");
869             return true;
870           }
871         }
872 
873         SET_FIELD(Enum, enum_value);
874         break;
875       }
876 
877       case FieldDescriptor::CPPTYPE_MESSAGE: {
878         // We should never get here. Put here instead of a default
879         // so that if new types are added, we get a nice compiler warning.
880         GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
881         break;
882       }
883     }
884 #undef SET_FIELD
885     return true;
886   }
887 
SkipFieldValue()888   bool SkipFieldValue() {
889     if (--recursion_limit_ < 0) {
890       ReportError(
891           StrCat("Message is too deep, the parser exceeded the "
892                        "configured recursion limit of ",
893                        initial_recursion_limit_, "."));
894       return false;
895     }
896 
897     if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
898       while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
899         tokenizer_.Next();
900       }
901       ++recursion_limit_;
902       return true;
903     }
904     if (TryConsume("[")) {
905       while (true) {
906         if (!LookingAt("{") && !LookingAt("<")) {
907           DO(SkipFieldValue());
908         } else {
909           DO(SkipFieldMessage());
910         }
911         if (TryConsume("]")) {
912           break;
913         }
914         DO(Consume(","));
915       }
916       ++recursion_limit_;
917       return true;
918     }
919     // Possible field values other than string:
920     //   12345        => TYPE_INTEGER
921     //   -12345       => TYPE_SYMBOL + TYPE_INTEGER
922     //   1.2345       => TYPE_FLOAT
923     //   -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
924     //   inf          => TYPE_IDENTIFIER
925     //   -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
926     //   TYPE_INTEGER => TYPE_IDENTIFIER
927     // Divides them into two group, one with TYPE_SYMBOL
928     // and the other without:
929     //   Group one:
930     //     12345        => TYPE_INTEGER
931     //     1.2345       => TYPE_FLOAT
932     //     inf          => TYPE_IDENTIFIER
933     //     TYPE_INTEGER => TYPE_IDENTIFIER
934     //   Group two:
935     //     -12345       => TYPE_SYMBOL + TYPE_INTEGER
936     //     -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
937     //     -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
938     // As we can see, the field value consists of an optional '-' and one of
939     // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
940     bool has_minus = TryConsume("-");
941     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
942         !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
943         !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
944       std::string text = tokenizer_.current().text;
945       ReportError("Cannot skip field value, unexpected token: " + text);
946       ++recursion_limit_;
947       return false;
948     }
949     // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
950     // value while other combinations all generate valid values.
951     // We check if the value of this combination is valid here.
952     // TYPE_IDENTIFIER after a '-' should be one of the float values listed
953     // below:
954     //   inf, inff, infinity, nan
955     if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
956       std::string text = tokenizer_.current().text;
957       LowerString(&text);
958       if (text != "inf" &&
959           text != "infinity" && text != "nan") {
960         ReportError("Invalid float number: " + text);
961         ++recursion_limit_;
962         return false;
963       }
964     }
965     tokenizer_.Next();
966     ++recursion_limit_;
967     return true;
968   }
969 
970   // Returns true if the current token's text is equal to that specified.
LookingAt(const std::string & text)971   bool LookingAt(const std::string& text) {
972     return tokenizer_.current().text == text;
973   }
974 
975   // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)976   bool LookingAtType(io::Tokenizer::TokenType token_type) {
977     return tokenizer_.current().type == token_type;
978   }
979 
980   // Consumes an identifier and saves its value in the identifier parameter.
981   // Returns false if the token is not of type IDENTIFIER.
ConsumeIdentifier(std::string * identifier)982   bool ConsumeIdentifier(std::string* identifier) {
983     if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
984       *identifier = tokenizer_.current().text;
985       tokenizer_.Next();
986       return true;
987     }
988 
989     // If allow_field_numer_ or allow_unknown_field_ is true, we should able
990     // to parse integer identifiers.
991     if ((allow_field_number_ || allow_unknown_field_ ||
992          allow_unknown_extension_) &&
993         LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
994       *identifier = tokenizer_.current().text;
995       tokenizer_.Next();
996       return true;
997     }
998 
999     ReportError("Expected identifier, got: " + tokenizer_.current().text);
1000     return false;
1001   }
1002 
1003   // Similar to `ConsumeIdentifier`, but any following whitespace token may
1004   // be reported.
ConsumeIdentifierBeforeWhitespace(std::string * identifier)1005   bool ConsumeIdentifierBeforeWhitespace(std::string* identifier) {
1006     tokenizer_.set_report_whitespace(true);
1007     bool result = ConsumeIdentifier(identifier);
1008     tokenizer_.set_report_whitespace(false);
1009     return result;
1010   }
1011 
1012   // Consume a string of form "<id1>.<id2>....<idN>".
ConsumeFullTypeName(std::string * name)1013   bool ConsumeFullTypeName(std::string* name) {
1014     DO(ConsumeIdentifier(name));
1015     while (TryConsume(".")) {
1016       std::string part;
1017       DO(ConsumeIdentifier(&part));
1018       *name += ".";
1019       *name += part;
1020     }
1021     return true;
1022   }
1023 
ConsumeTypeUrlOrFullTypeName(std::string * name)1024   bool ConsumeTypeUrlOrFullTypeName(std::string* name) {
1025     DO(ConsumeIdentifier(name));
1026     while (true) {
1027       std::string connector;
1028       if (TryConsume(".")) {
1029         connector = ".";
1030       } else if (TryConsume("/")) {
1031         connector = "/";
1032       } else {
1033         break;
1034       }
1035       std::string part;
1036       DO(ConsumeIdentifier(&part));
1037       *name += connector;
1038       *name += part;
1039     }
1040     return true;
1041   }
1042 
1043   // Consumes a string and saves its value in the text parameter.
1044   // Returns false if the token is not of type STRING.
ConsumeString(std::string * text)1045   bool ConsumeString(std::string* text) {
1046     if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
1047       ReportError("Expected string, got: " + tokenizer_.current().text);
1048       return false;
1049     }
1050 
1051     text->clear();
1052     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1053       io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
1054 
1055       tokenizer_.Next();
1056     }
1057 
1058     return true;
1059   }
1060 
1061   // Consumes a uint64_t and saves its value in the value parameter.
1062   // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64_t * value,uint64_t max_value)1063   bool ConsumeUnsignedInteger(uint64_t* value, uint64_t max_value) {
1064     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1065       ReportError("Expected integer, got: " + tokenizer_.current().text);
1066       return false;
1067     }
1068 
1069     if (!io::Tokenizer::ParseInteger(tokenizer_.current().text, max_value,
1070                                      value)) {
1071       ReportError("Integer out of range (" + tokenizer_.current().text + ")");
1072       return false;
1073     }
1074 
1075     tokenizer_.Next();
1076     return true;
1077   }
1078 
1079   // Consumes an int64_t and saves its value in the value parameter.
1080   // Note that since the tokenizer does not support negative numbers,
1081   // we actually may consume an additional token (for the minus sign) in this
1082   // method. Returns false if the token is not an integer
1083   // (signed or otherwise).
ConsumeSignedInteger(int64_t * value,uint64_t max_value)1084   bool ConsumeSignedInteger(int64_t* value, uint64_t max_value) {
1085     bool negative = false;
1086 
1087     if (TryConsume("-")) {
1088       negative = true;
1089       // Two's complement always allows one more negative integer than
1090       // positive.
1091       ++max_value;
1092     }
1093 
1094     uint64_t unsigned_value;
1095 
1096     DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
1097 
1098     if (negative) {
1099       if ((static_cast<uint64_t>(kint64max) + 1) == unsigned_value) {
1100         *value = kint64min;
1101       } else {
1102         *value = -static_cast<int64_t>(unsigned_value);
1103       }
1104     } else {
1105       *value = static_cast<int64_t>(unsigned_value);
1106     }
1107 
1108     return true;
1109   }
1110 
1111   // Consumes a double and saves its value in the value parameter.
1112   // Accepts decimal numbers only, rejects hex or oct numbers.
ConsumeUnsignedDecimalAsDouble(double * value,uint64_t max_value)1113   bool ConsumeUnsignedDecimalAsDouble(double* value, uint64_t max_value) {
1114     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1115       ReportError("Expected integer, got: " + tokenizer_.current().text);
1116       return false;
1117     }
1118 
1119     const std::string& text = tokenizer_.current().text;
1120     if (IsHexNumber(text) || IsOctNumber(text)) {
1121       ReportError("Expect a decimal number, got: " + text);
1122       return false;
1123     }
1124 
1125     uint64_t uint64_value;
1126     if (io::Tokenizer::ParseInteger(text, max_value, &uint64_value)) {
1127       *value = static_cast<double>(uint64_value);
1128     } else {
1129       // Uint64 overflow, attempt to parse as a double instead.
1130       *value = io::Tokenizer::ParseFloat(text);
1131     }
1132 
1133     tokenizer_.Next();
1134     return true;
1135   }
1136 
1137   // Consumes a double and saves its value in the value parameter.
1138   // Note that since the tokenizer does not support negative numbers,
1139   // we actually may consume an additional token (for the minus sign) in this
1140   // method. Returns false if the token is not a double
1141   // (signed or otherwise).
ConsumeDouble(double * value)1142   bool ConsumeDouble(double* value) {
1143     bool negative = false;
1144 
1145     if (TryConsume("-")) {
1146       negative = true;
1147     }
1148 
1149     // A double can actually be an integer, according to the tokenizer.
1150     // Therefore, we must check both cases here.
1151     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1152       // We have found an integer value for the double.
1153       DO(ConsumeUnsignedDecimalAsDouble(value, kuint64max));
1154     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
1155       // We have found a float value for the double.
1156       *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
1157 
1158       // Mark the current token as consumed.
1159       tokenizer_.Next();
1160     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1161       std::string text = tokenizer_.current().text;
1162       LowerString(&text);
1163       if (text == "inf" ||
1164           text == "infinity") {
1165         *value = std::numeric_limits<double>::infinity();
1166         tokenizer_.Next();
1167       } else if (text == "nan") {
1168         *value = std::numeric_limits<double>::quiet_NaN();
1169         tokenizer_.Next();
1170       } else {
1171         ReportError("Expected double, got: " + text);
1172         return false;
1173       }
1174     } else {
1175       ReportError("Expected double, got: " + tokenizer_.current().text);
1176       return false;
1177     }
1178 
1179     if (negative) {
1180       *value = -*value;
1181     }
1182 
1183     return true;
1184   }
1185 
1186   // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name"
1187   // or "type.googleprod.com/full.type.Name"
ConsumeAnyTypeUrl(std::string * full_type_name,std::string * prefix)1188   bool ConsumeAnyTypeUrl(std::string* full_type_name, std::string* prefix) {
1189     // TODO(saito) Extend Consume() to consume multiple tokens at once, so that
1190     // this code can be written as just DO(Consume(kGoogleApisTypePrefix)).
1191     DO(ConsumeIdentifier(prefix));
1192     while (TryConsume(".")) {
1193       std::string url;
1194       DO(ConsumeIdentifier(&url));
1195       *prefix += "." + url;
1196     }
1197     DO(Consume("/"));
1198     *prefix += "/";
1199     DO(ConsumeFullTypeName(full_type_name));
1200 
1201     return true;
1202   }
1203 
1204   // A helper function for reconstructing Any::value. Consumes a text of
1205   // full_type_name, then serializes it into serialized_value.
ConsumeAnyValue(const Descriptor * value_descriptor,std::string * serialized_value)1206   bool ConsumeAnyValue(const Descriptor* value_descriptor,
1207                        std::string* serialized_value) {
1208     DynamicMessageFactory factory;
1209     const Message* value_prototype = factory.GetPrototype(value_descriptor);
1210     if (value_prototype == nullptr) {
1211       return false;
1212     }
1213     std::unique_ptr<Message> value(value_prototype->New());
1214     std::string sub_delimiter;
1215     DO(ConsumeMessageDelimiter(&sub_delimiter));
1216     DO(ConsumeMessage(value.get(), sub_delimiter));
1217 
1218     if (allow_partial_) {
1219       value->AppendPartialToString(serialized_value);
1220     } else {
1221       if (!value->IsInitialized()) {
1222         ReportError(
1223             "Value of type \"" + value_descriptor->full_name() +
1224             "\" stored in google.protobuf.Any has missing required fields");
1225         return false;
1226       }
1227       value->AppendToString(serialized_value);
1228     }
1229     return true;
1230   }
1231 
1232   // Consumes a token and confirms that it matches that specified in the
1233   // value parameter. Returns false if the token found does not match that
1234   // which was specified.
Consume(const std::string & value)1235   bool Consume(const std::string& value) {
1236     const std::string& current_value = tokenizer_.current().text;
1237 
1238     if (current_value != value) {
1239       ReportError("Expected \"" + value + "\", found \"" + current_value +
1240                   "\".");
1241       return false;
1242     }
1243 
1244     tokenizer_.Next();
1245 
1246     return true;
1247   }
1248 
1249   // Similar to `Consume`, but the following token may be tokenized as
1250   // TYPE_WHITESPACE.
ConsumeBeforeWhitespace(const std::string & value)1251   bool ConsumeBeforeWhitespace(const std::string& value) {
1252     // Report whitespace after this token, but only once.
1253     tokenizer_.set_report_whitespace(true);
1254     bool result = Consume(value);
1255     tokenizer_.set_report_whitespace(false);
1256     return result;
1257   }
1258 
1259   // Attempts to consume the supplied value. Returns false if a the
1260   // token found does not match the value specified.
TryConsume(const std::string & value)1261   bool TryConsume(const std::string& value) {
1262     if (tokenizer_.current().text == value) {
1263       tokenizer_.Next();
1264       return true;
1265     } else {
1266       return false;
1267     }
1268   }
1269 
1270   // Similar to `TryConsume`, but the following token may be tokenized as
1271   // TYPE_WHITESPACE.
TryConsumeBeforeWhitespace(const std::string & value)1272   bool TryConsumeBeforeWhitespace(const std::string& value) {
1273     // Report whitespace after this token, but only once.
1274     tokenizer_.set_report_whitespace(true);
1275     bool result = TryConsume(value);
1276     tokenizer_.set_report_whitespace(false);
1277     return result;
1278   }
1279 
TryConsumeWhitespace()1280   bool TryConsumeWhitespace() {
1281     had_silent_marker_ = false;
1282     if (LookingAtType(io::Tokenizer::TYPE_WHITESPACE)) {
1283       if (tokenizer_.current().text == " " DEBUG_STRING_SILENT_MARKER) {
1284         had_silent_marker_ = true;
1285       }
1286       tokenizer_.Next();
1287       return true;
1288     }
1289     return false;
1290   }
1291 
1292   // An internal instance of the Tokenizer's error collector, used to
1293   // collect any base-level parse errors and feed them to the ParserImpl.
1294   class ParserErrorCollector : public io::ErrorCollector {
1295    public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)1296     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser)
1297         : parser_(parser) {}
1298 
~ParserErrorCollector()1299     ~ParserErrorCollector() override {}
1300 
AddError(int line,int column,const std::string & message)1301     void AddError(int line, int column, const std::string& message) override {
1302       parser_->ReportError(line, column, message);
1303     }
1304 
AddWarning(int line,int column,const std::string & message)1305     void AddWarning(int line, int column, const std::string& message) override {
1306       parser_->ReportWarning(line, column, message);
1307     }
1308 
1309    private:
1310     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
1311     TextFormat::Parser::ParserImpl* parser_;
1312   };
1313 
1314   io::ErrorCollector* error_collector_;
1315   const TextFormat::Finder* finder_;
1316   ParseInfoTree* parse_info_tree_;
1317   ParserErrorCollector tokenizer_error_collector_;
1318   io::Tokenizer tokenizer_;
1319   const Descriptor* root_message_type_;
1320   SingularOverwritePolicy singular_overwrite_policy_;
1321   const bool allow_case_insensitive_field_;
1322   const bool allow_unknown_field_;
1323   const bool allow_unknown_extension_;
1324   const bool allow_unknown_enum_;
1325   const bool allow_field_number_;
1326   const bool allow_partial_;
1327   const int initial_recursion_limit_;
1328   int recursion_limit_;
1329   bool had_silent_marker_;
1330   bool had_errors_;
1331 };
1332 
1333 // ===========================================================================
1334 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1335 // from the Printer found in //net/proto2/io/public/printer.h
1336 class TextFormat::Printer::TextGenerator
1337     : public TextFormat::BaseTextGenerator {
1338  public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)1339   explicit TextGenerator(io::ZeroCopyOutputStream* output,
1340                          int initial_indent_level)
1341       : output_(output),
1342         buffer_(nullptr),
1343         buffer_size_(0),
1344         at_start_of_line_(true),
1345         failed_(false),
1346         insert_silent_marker_(false),
1347         indent_level_(initial_indent_level),
1348         initial_indent_level_(initial_indent_level) {}
1349 
TextGenerator(io::ZeroCopyOutputStream * output,bool insert_silent_marker,int initial_indent_level)1350   explicit TextGenerator(io::ZeroCopyOutputStream* output,
1351                          bool insert_silent_marker, int initial_indent_level)
1352       : output_(output),
1353         buffer_(nullptr),
1354         buffer_size_(0),
1355         at_start_of_line_(true),
1356         failed_(false),
1357         insert_silent_marker_(insert_silent_marker),
1358         indent_level_(initial_indent_level),
1359         initial_indent_level_(initial_indent_level) {}
1360 
~TextGenerator()1361   ~TextGenerator() override {
1362     // Only BackUp() if we're sure we've successfully called Next() at least
1363     // once.
1364     if (!failed_) {
1365       output_->BackUp(buffer_size_);
1366     }
1367   }
1368 
1369   // Indent text by two spaces.  After calling Indent(), two spaces will be
1370   // inserted at the beginning of each line of text.  Indent() may be called
1371   // multiple times to produce deeper indents.
Indent()1372   void Indent() override { ++indent_level_; }
1373 
1374   // Reduces the current indent level by two spaces, or crashes if the indent
1375   // level is zero.
Outdent()1376   void Outdent() override {
1377     if (indent_level_ == 0 || indent_level_ < initial_indent_level_) {
1378       GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
1379       return;
1380     }
1381 
1382     --indent_level_;
1383   }
1384 
GetCurrentIndentationSize() const1385   size_t GetCurrentIndentationSize() const override {
1386     return 2 * indent_level_;
1387   }
1388 
1389   // Print text to the output stream.
Print(const char * text,size_t size)1390   void Print(const char* text, size_t size) override {
1391     if (indent_level_ > 0) {
1392       size_t pos = 0;  // The number of bytes we've written so far.
1393       for (size_t i = 0; i < size; i++) {
1394         if (text[i] == '\n') {
1395           // Saw newline.  If there is more text, we may need to insert an
1396           // indent here.  So, write what we have so far, including the '\n'.
1397           Write(text + pos, i - pos + 1);
1398           pos = i + 1;
1399 
1400           // Setting this true will cause the next Write() to insert an indent
1401           // first.
1402           at_start_of_line_ = true;
1403         }
1404       }
1405       // Write the rest.
1406       Write(text + pos, size - pos);
1407     } else {
1408       Write(text, size);
1409       if (size > 0 && text[size - 1] == '\n') {
1410         at_start_of_line_ = true;
1411       }
1412     }
1413   }
1414 
1415   // True if any write to the underlying stream failed.  (We don't just
1416   // crash in this case because this is an I/O failure, not a programming
1417   // error.)
failed() const1418   bool failed() const { return failed_; }
1419 
PrintMaybeWithMarker(StringPiece text)1420   void PrintMaybeWithMarker(StringPiece text) {
1421     Print(text.data(), text.size());
1422     if (ConsumeInsertSilentMarker()) {
1423       PrintLiteral(DEBUG_STRING_SILENT_MARKER);
1424     }
1425   }
1426 
PrintMaybeWithMarker(StringPiece text_head,StringPiece text_tail)1427   void PrintMaybeWithMarker(StringPiece text_head,
1428                             StringPiece text_tail) {
1429     Print(text_head.data(), text_head.size());
1430     if (ConsumeInsertSilentMarker()) {
1431       PrintLiteral(DEBUG_STRING_SILENT_MARKER);
1432     }
1433     Print(text_tail.data(), text_tail.size());
1434   }
1435 
1436  private:
1437   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
1438 
Write(const char * data,size_t size)1439   void Write(const char* data, size_t size) {
1440     if (failed_) return;
1441     if (size == 0) return;
1442 
1443     if (at_start_of_line_) {
1444       // Insert an indent.
1445       at_start_of_line_ = false;
1446       WriteIndent();
1447       if (failed_) return;
1448     }
1449 
1450     while (static_cast<int64_t>(size) > buffer_size_) {
1451       // Data exceeds space in the buffer.  Copy what we can and request a
1452       // new buffer.
1453       if (buffer_size_ > 0) {
1454         memcpy(buffer_, data, buffer_size_);
1455         data += buffer_size_;
1456         size -= buffer_size_;
1457       }
1458       void* void_buffer = nullptr;
1459       failed_ = !output_->Next(&void_buffer, &buffer_size_);
1460       if (failed_) return;
1461       buffer_ = reinterpret_cast<char*>(void_buffer);
1462     }
1463 
1464     // Buffer is big enough to receive the data; copy it.
1465     memcpy(buffer_, data, size);
1466     buffer_ += size;
1467     buffer_size_ -= size;
1468   }
1469 
WriteIndent()1470   void WriteIndent() {
1471     if (indent_level_ == 0) {
1472       return;
1473     }
1474     GOOGLE_DCHECK(!failed_);
1475     int size = GetCurrentIndentationSize();
1476 
1477     while (size > buffer_size_) {
1478       // Data exceeds space in the buffer. Write what we can and request a new
1479       // buffer.
1480       if (buffer_size_ > 0) {
1481         memset(buffer_, ' ', buffer_size_);
1482       }
1483       size -= buffer_size_;
1484       void* void_buffer;
1485       failed_ = !output_->Next(&void_buffer, &buffer_size_);
1486       if (failed_) return;
1487       buffer_ = reinterpret_cast<char*>(void_buffer);
1488     }
1489 
1490     // Buffer is big enough to receive the data; copy it.
1491     memset(buffer_, ' ', size);
1492     buffer_ += size;
1493     buffer_size_ -= size;
1494   }
1495 
1496   // Return the current value of insert_silent_marker_. If it is true, set it
1497   // to false as we assume that a silent marker is inserted after a call to this
1498   // function.
ConsumeInsertSilentMarker()1499   bool ConsumeInsertSilentMarker() {
1500     if (insert_silent_marker_) {
1501       insert_silent_marker_ = false;
1502       return true;
1503     }
1504     return false;
1505   }
1506 
1507   io::ZeroCopyOutputStream* const output_;
1508   char* buffer_;
1509   int buffer_size_;
1510   bool at_start_of_line_;
1511   bool failed_;
1512   // This flag is false when inserting silent marker is disabled or a silent
1513   // marker has been inserted.
1514   bool insert_silent_marker_;
1515 
1516   int indent_level_;
1517   int initial_indent_level_;
1518 };
1519 
1520 // ===========================================================================
1521 //  An internal field value printer that may insert a silent marker in
1522 //  DebugStrings.
1523 class TextFormat::Printer::DebugStringFieldValuePrinter
1524     : public TextFormat::FastFieldValuePrinter {
1525  public:
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1526   void PrintMessageStart(const Message& /*message*/, int /*field_index*/,
1527                          int /*field_count*/, bool single_line_mode,
1528                          BaseTextGenerator* generator) const override {
1529     // This is safe as only TextGenerator is used with
1530     // DebugStringFieldValuePrinter.
1531     TextGenerator* text_generator = static_cast<TextGenerator*>(generator);
1532     if (single_line_mode) {
1533       text_generator->PrintMaybeWithMarker(" ", "{ ");
1534     } else {
1535       text_generator->PrintMaybeWithMarker(" ", "{\n");
1536     }
1537   }
1538 };
1539 
1540 // ===========================================================================
1541 //  An internal field value printer that escape UTF8 strings.
1542 class TextFormat::Printer::FastFieldValuePrinterUtf8Escaping
1543     : public TextFormat::Printer::DebugStringFieldValuePrinter {
1544  public:
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const1545   void PrintString(const std::string& val,
1546                    TextFormat::BaseTextGenerator* generator) const override {
1547     generator->PrintLiteral("\"");
1548     generator->PrintString(strings::Utf8SafeCEscape(val));
1549     generator->PrintLiteral("\"");
1550   }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const1551   void PrintBytes(const std::string& val,
1552                   TextFormat::BaseTextGenerator* generator) const override {
1553     return FastFieldValuePrinter::PrintString(val, generator);
1554   }
1555 };
1556 
1557 // ===========================================================================
1558 // Implementation of the default Finder for extensions.
~Finder()1559 TextFormat::Finder::~Finder() {}
1560 
FindExtension(Message * message,const std::string & name) const1561 const FieldDescriptor* TextFormat::Finder::FindExtension(
1562     Message* message, const std::string& name) const {
1563   return DefaultFinderFindExtension(message, name);
1564 }
1565 
FindExtensionByNumber(const Descriptor * descriptor,int number) const1566 const FieldDescriptor* TextFormat::Finder::FindExtensionByNumber(
1567     const Descriptor* descriptor, int number) const {
1568   return DefaultFinderFindExtensionByNumber(descriptor, number);
1569 }
1570 
FindAnyType(const Message & message,const std::string & prefix,const std::string & name) const1571 const Descriptor* TextFormat::Finder::FindAnyType(
1572     const Message& message, const std::string& prefix,
1573     const std::string& name) const {
1574   return DefaultFinderFindAnyType(message, prefix, name);
1575 }
1576 
FindExtensionFactory(const FieldDescriptor *) const1577 MessageFactory* TextFormat::Finder::FindExtensionFactory(
1578     const FieldDescriptor* /*field*/) const {
1579   return nullptr;
1580 }
1581 
1582 // ===========================================================================
1583 
Parser()1584 TextFormat::Parser::Parser()
1585     : error_collector_(nullptr),
1586       finder_(nullptr),
1587       parse_info_tree_(nullptr),
1588       allow_partial_(false),
1589       allow_case_insensitive_field_(false),
1590       allow_unknown_field_(false),
1591       allow_unknown_extension_(false),
1592       allow_unknown_enum_(false),
1593       allow_field_number_(false),
1594       allow_relaxed_whitespace_(false),
1595       allow_singular_overwrites_(false),
1596       recursion_limit_(std::numeric_limits<int>::max()) {}
1597 
~Parser()1598 TextFormat::Parser::~Parser() {}
1599 
1600 namespace {
1601 
CheckParseInputSize(StringPiece input,io::ErrorCollector * error_collector)1602 bool CheckParseInputSize(StringPiece input,
1603                          io::ErrorCollector* error_collector) {
1604   if (input.size() > INT_MAX) {
1605     error_collector->AddError(
1606         -1, 0,
1607         StrCat(
1608             "Input size too large: ", static_cast<int64_t>(input.size()),
1609             " bytes", " > ", INT_MAX, " bytes."));
1610     return false;
1611   }
1612   return true;
1613 }
1614 
1615 }  // namespace
1616 
Parse(io::ZeroCopyInputStream * input,Message * output)1617 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1618                                Message* output) {
1619   output->Clear();
1620 
1621   ParserImpl::SingularOverwritePolicy overwrites_policy =
1622       allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1623                                  : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1624 
1625   ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1626                     parse_info_tree_, overwrites_policy,
1627                     allow_case_insensitive_field_, allow_unknown_field_,
1628                     allow_unknown_extension_, allow_unknown_enum_,
1629                     allow_field_number_, allow_relaxed_whitespace_,
1630                     allow_partial_, recursion_limit_);
1631   return MergeUsingImpl(input, output, &parser);
1632 }
1633 
ParseFromString(ConstStringParam input,Message * output)1634 bool TextFormat::Parser::ParseFromString(ConstStringParam input,
1635                                          Message* output) {
1636   DO(CheckParseInputSize(input, error_collector_));
1637   io::ArrayInputStream input_stream(input.data(), input.size());
1638   return Parse(&input_stream, output);
1639 }
1640 
Merge(io::ZeroCopyInputStream * input,Message * output)1641 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1642                                Message* output) {
1643   ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1644                     parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1645                     allow_case_insensitive_field_, allow_unknown_field_,
1646                     allow_unknown_extension_, allow_unknown_enum_,
1647                     allow_field_number_, allow_relaxed_whitespace_,
1648                     allow_partial_, recursion_limit_);
1649   return MergeUsingImpl(input, output, &parser);
1650 }
1651 
MergeFromString(ConstStringParam input,Message * output)1652 bool TextFormat::Parser::MergeFromString(ConstStringParam input,
1653                                          Message* output) {
1654   DO(CheckParseInputSize(input, error_collector_));
1655   io::ArrayInputStream input_stream(input.data(), input.size());
1656   return Merge(&input_stream, output);
1657 }
1658 
MergeUsingImpl(io::ZeroCopyInputStream *,Message * output,ParserImpl * parser_impl)1659 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1660                                         Message* output,
1661                                         ParserImpl* parser_impl) {
1662   if (!parser_impl->Parse(output)) return false;
1663   if (!allow_partial_ && !output->IsInitialized()) {
1664     std::vector<std::string> missing_fields;
1665     output->FindInitializationErrors(&missing_fields);
1666     parser_impl->ReportError(-1, 0,
1667                              "Message missing required fields: " +
1668                                  Join(missing_fields, ", "));
1669     return false;
1670   }
1671   return true;
1672 }
1673 
ParseFieldValueFromString(const std::string & input,const FieldDescriptor * field,Message * output)1674 bool TextFormat::Parser::ParseFieldValueFromString(const std::string& input,
1675                                                    const FieldDescriptor* field,
1676                                                    Message* output) {
1677   io::ArrayInputStream input_stream(input.data(), input.size());
1678   ParserImpl parser(
1679       output->GetDescriptor(), &input_stream, error_collector_, finder_,
1680       parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1681       allow_case_insensitive_field_, allow_unknown_field_,
1682       allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
1683       allow_relaxed_whitespace_, allow_partial_, recursion_limit_);
1684   return parser.ParseField(field, output);
1685 }
1686 
Parse(io::ZeroCopyInputStream * input,Message * output)1687 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1688                                     Message* output) {
1689   return Parser().Parse(input, output);
1690 }
1691 
Merge(io::ZeroCopyInputStream * input,Message * output)1692 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1693                                     Message* output) {
1694   return Parser().Merge(input, output);
1695 }
1696 
ParseFromString(ConstStringParam input,Message * output)1697 /* static */ bool TextFormat::ParseFromString(ConstStringParam input,
1698                                               Message* output) {
1699   return Parser().ParseFromString(input, output);
1700 }
1701 
MergeFromString(ConstStringParam input,Message * output)1702 /* static */ bool TextFormat::MergeFromString(ConstStringParam input,
1703                                               Message* output) {
1704   return Parser().MergeFromString(input, output);
1705 }
1706 
1707 #undef DO
1708 
1709 // ===========================================================================
1710 
~BaseTextGenerator()1711 TextFormat::BaseTextGenerator::~BaseTextGenerator() {}
1712 
1713 namespace {
1714 
1715 // A BaseTextGenerator that writes to a string.
1716 class StringBaseTextGenerator : public TextFormat::BaseTextGenerator {
1717  public:
Print(const char * text,size_t size)1718   void Print(const char* text, size_t size) override {
1719     output_.append(text, size);
1720   }
1721 
1722 // Some compilers do not support ref-qualifiers even in C++11 mode.
1723 // Disable the optimization for now and revisit it later.
1724 #if 0  // LANG_CXX11
1725   std::string Consume() && { return std::move(output_); }
1726 #else  // !LANG_CXX11
Get()1727   const std::string& Get() { return output_; }
1728 #endif  // LANG_CXX11
1729 
1730  private:
1731   std::string output_;
1732 };
1733 
1734 }  // namespace
1735 
1736 // The default implementation for FieldValuePrinter. We just delegate the
1737 // implementation to the default FastFieldValuePrinter to avoid duplicating the
1738 // logic.
FieldValuePrinter()1739 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
~FieldValuePrinter()1740 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
1741 
1742 #if 0  // LANG_CXX11
1743 #define FORWARD_IMPL(fn, ...)            \
1744   StringBaseTextGenerator generator;     \
1745   delegate_.fn(__VA_ARGS__, &generator); \
1746   return std::move(generator).Consume()
1747 #else  // !LANG_CXX11
1748 #define FORWARD_IMPL(fn, ...)            \
1749   StringBaseTextGenerator generator;     \
1750   delegate_.fn(__VA_ARGS__, &generator); \
1751   return generator.Get()
1752 #endif  // LANG_CXX11
1753 
PrintBool(bool val) const1754 std::string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1755   FORWARD_IMPL(PrintBool, val);
1756 }
PrintInt32(int32_t val) const1757 std::string TextFormat::FieldValuePrinter::PrintInt32(int32_t val) const {
1758   FORWARD_IMPL(PrintInt32, val);
1759 }
PrintUInt32(uint32_t val) const1760 std::string TextFormat::FieldValuePrinter::PrintUInt32(uint32_t val) const {
1761   FORWARD_IMPL(PrintUInt32, val);
1762 }
PrintInt64(int64_t val) const1763 std::string TextFormat::FieldValuePrinter::PrintInt64(int64_t val) const {
1764   FORWARD_IMPL(PrintInt64, val);
1765 }
PrintUInt64(uint64_t val) const1766 std::string TextFormat::FieldValuePrinter::PrintUInt64(uint64_t val) const {
1767   FORWARD_IMPL(PrintUInt64, val);
1768 }
PrintFloat(float val) const1769 std::string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1770   FORWARD_IMPL(PrintFloat, val);
1771 }
PrintDouble(double val) const1772 std::string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1773   FORWARD_IMPL(PrintDouble, val);
1774 }
PrintString(const std::string & val) const1775 std::string TextFormat::FieldValuePrinter::PrintString(
1776     const std::string& val) const {
1777   FORWARD_IMPL(PrintString, val);
1778 }
PrintBytes(const std::string & val) const1779 std::string TextFormat::FieldValuePrinter::PrintBytes(
1780     const std::string& val) const {
1781   return PrintString(val);
1782 }
PrintEnum(int32_t val,const std::string & name) const1783 std::string TextFormat::FieldValuePrinter::PrintEnum(
1784     int32_t val, const std::string& name) const {
1785   FORWARD_IMPL(PrintEnum, val, name);
1786 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field) const1787 std::string TextFormat::FieldValuePrinter::PrintFieldName(
1788     const Message& message, const Reflection* reflection,
1789     const FieldDescriptor* field) const {
1790   FORWARD_IMPL(PrintFieldName, message, reflection, field);
1791 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode) const1792 std::string TextFormat::FieldValuePrinter::PrintMessageStart(
1793     const Message& message, int field_index, int field_count,
1794     bool single_line_mode) const {
1795   FORWARD_IMPL(PrintMessageStart, message, field_index, field_count,
1796                single_line_mode);
1797 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode) const1798 std::string TextFormat::FieldValuePrinter::PrintMessageEnd(
1799     const Message& message, int field_index, int field_count,
1800     bool single_line_mode) const {
1801   FORWARD_IMPL(PrintMessageEnd, message, field_index, field_count,
1802                single_line_mode);
1803 }
1804 #undef FORWARD_IMPL
1805 
FastFieldValuePrinter()1806 TextFormat::FastFieldValuePrinter::FastFieldValuePrinter() {}
~FastFieldValuePrinter()1807 TextFormat::FastFieldValuePrinter::~FastFieldValuePrinter() {}
PrintBool(bool val,BaseTextGenerator * generator) const1808 void TextFormat::FastFieldValuePrinter::PrintBool(
1809     bool val, BaseTextGenerator* generator) const {
1810   if (val) {
1811     generator->PrintLiteral("true");
1812   } else {
1813     generator->PrintLiteral("false");
1814   }
1815 }
PrintInt32(int32_t val,BaseTextGenerator * generator) const1816 void TextFormat::FastFieldValuePrinter::PrintInt32(
1817     int32_t val, BaseTextGenerator* generator) const {
1818   generator->PrintString(StrCat(val));
1819 }
PrintUInt32(uint32_t val,BaseTextGenerator * generator) const1820 void TextFormat::FastFieldValuePrinter::PrintUInt32(
1821     uint32_t val, BaseTextGenerator* generator) const {
1822   generator->PrintString(StrCat(val));
1823 }
PrintInt64(int64_t val,BaseTextGenerator * generator) const1824 void TextFormat::FastFieldValuePrinter::PrintInt64(
1825     int64_t val, BaseTextGenerator* generator) const {
1826   generator->PrintString(StrCat(val));
1827 }
PrintUInt64(uint64_t val,BaseTextGenerator * generator) const1828 void TextFormat::FastFieldValuePrinter::PrintUInt64(
1829     uint64_t val, BaseTextGenerator* generator) const {
1830   generator->PrintString(StrCat(val));
1831 }
PrintFloat(float val,BaseTextGenerator * generator) const1832 void TextFormat::FastFieldValuePrinter::PrintFloat(
1833     float val, BaseTextGenerator* generator) const {
1834   generator->PrintString(!std::isnan(val) ? SimpleFtoa(val) : "nan");
1835 }
PrintDouble(double val,BaseTextGenerator * generator) const1836 void TextFormat::FastFieldValuePrinter::PrintDouble(
1837     double val, BaseTextGenerator* generator) const {
1838   generator->PrintString(!std::isnan(val) ? SimpleDtoa(val) : "nan");
1839 }
PrintEnum(int32_t,const std::string & name,BaseTextGenerator * generator) const1840 void TextFormat::FastFieldValuePrinter::PrintEnum(
1841     int32_t /*val*/, const std::string& name,
1842     BaseTextGenerator* generator) const {
1843   generator->PrintString(name);
1844 }
1845 
PrintString(const std::string & val,BaseTextGenerator * generator) const1846 void TextFormat::FastFieldValuePrinter::PrintString(
1847     const std::string& val, BaseTextGenerator* generator) const {
1848   generator->PrintLiteral("\"");
1849   generator->PrintString(CEscape(val));
1850   generator->PrintLiteral("\"");
1851 }
PrintBytes(const std::string & val,BaseTextGenerator * generator) const1852 void TextFormat::FastFieldValuePrinter::PrintBytes(
1853     const std::string& val, BaseTextGenerator* generator) const {
1854   PrintString(val, generator);
1855 }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const1856 void TextFormat::FastFieldValuePrinter::PrintFieldName(
1857     const Message& message, int /*field_index*/, int /*field_count*/,
1858     const Reflection* reflection, const FieldDescriptor* field,
1859     BaseTextGenerator* generator) const {
1860   PrintFieldName(message, reflection, field, generator);
1861 }
PrintFieldName(const Message &,const Reflection *,const FieldDescriptor * field,BaseTextGenerator * generator) const1862 void TextFormat::FastFieldValuePrinter::PrintFieldName(
1863     const Message& /*message*/, const Reflection* /*reflection*/,
1864     const FieldDescriptor* field, BaseTextGenerator* generator) const {
1865   if (field->is_extension()) {
1866     generator->PrintLiteral("[");
1867     generator->PrintString(field->PrintableNameForExtension());
1868     generator->PrintLiteral("]");
1869   } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
1870     // Groups must be serialized with their original capitalization.
1871     generator->PrintString(field->message_type()->name());
1872   } else {
1873     generator->PrintString(field->name());
1874   }
1875 }
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1876 void TextFormat::FastFieldValuePrinter::PrintMessageStart(
1877     const Message& /*message*/, int /*field_index*/, int /*field_count*/,
1878     bool single_line_mode, BaseTextGenerator* generator) const {
1879   if (single_line_mode) {
1880     generator->PrintLiteral(" { ");
1881   } else {
1882     generator->PrintLiteral(" {\n");
1883   }
1884 }
PrintMessageContent(const Message &,int,int,bool,BaseTextGenerator *) const1885 bool TextFormat::FastFieldValuePrinter::PrintMessageContent(
1886     const Message& /*message*/, int /*field_index*/, int /*field_count*/,
1887     bool /*single_line_mode*/, BaseTextGenerator* /*generator*/) const {
1888   return false;  // Use the default printing function.
1889 }
PrintMessageEnd(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1890 void TextFormat::FastFieldValuePrinter::PrintMessageEnd(
1891     const Message& /*message*/, int /*field_index*/, int /*field_count*/,
1892     bool single_line_mode, BaseTextGenerator* generator) const {
1893   if (single_line_mode) {
1894     generator->PrintLiteral("} ");
1895   } else {
1896     generator->PrintLiteral("}\n");
1897   }
1898 }
1899 
1900 namespace {
1901 
1902 // A legacy compatibility wrapper. Takes ownership of the delegate.
1903 class FieldValuePrinterWrapper : public TextFormat::FastFieldValuePrinter {
1904  public:
FieldValuePrinterWrapper(const TextFormat::FieldValuePrinter * delegate)1905   explicit FieldValuePrinterWrapper(
1906       const TextFormat::FieldValuePrinter* delegate)
1907       : delegate_(delegate) {}
1908 
SetDelegate(const TextFormat::FieldValuePrinter * delegate)1909   void SetDelegate(const TextFormat::FieldValuePrinter* delegate) {
1910     delegate_.reset(delegate);
1911   }
1912 
PrintBool(bool val,TextFormat::BaseTextGenerator * generator) const1913   void PrintBool(bool val,
1914                  TextFormat::BaseTextGenerator* generator) const override {
1915     generator->PrintString(delegate_->PrintBool(val));
1916   }
PrintInt32(int32_t val,TextFormat::BaseTextGenerator * generator) const1917   void PrintInt32(int32_t val,
1918                   TextFormat::BaseTextGenerator* generator) const override {
1919     generator->PrintString(delegate_->PrintInt32(val));
1920   }
PrintUInt32(uint32_t val,TextFormat::BaseTextGenerator * generator) const1921   void PrintUInt32(uint32_t val,
1922                    TextFormat::BaseTextGenerator* generator) const override {
1923     generator->PrintString(delegate_->PrintUInt32(val));
1924   }
PrintInt64(int64_t val,TextFormat::BaseTextGenerator * generator) const1925   void PrintInt64(int64_t val,
1926                   TextFormat::BaseTextGenerator* generator) const override {
1927     generator->PrintString(delegate_->PrintInt64(val));
1928   }
PrintUInt64(uint64_t val,TextFormat::BaseTextGenerator * generator) const1929   void PrintUInt64(uint64_t val,
1930                    TextFormat::BaseTextGenerator* generator) const override {
1931     generator->PrintString(delegate_->PrintUInt64(val));
1932   }
PrintFloat(float val,TextFormat::BaseTextGenerator * generator) const1933   void PrintFloat(float val,
1934                   TextFormat::BaseTextGenerator* generator) const override {
1935     generator->PrintString(delegate_->PrintFloat(val));
1936   }
PrintDouble(double val,TextFormat::BaseTextGenerator * generator) const1937   void PrintDouble(double val,
1938                    TextFormat::BaseTextGenerator* generator) const override {
1939     generator->PrintString(delegate_->PrintDouble(val));
1940   }
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const1941   void PrintString(const std::string& val,
1942                    TextFormat::BaseTextGenerator* generator) const override {
1943     generator->PrintString(delegate_->PrintString(val));
1944   }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const1945   void PrintBytes(const std::string& val,
1946                   TextFormat::BaseTextGenerator* generator) const override {
1947     generator->PrintString(delegate_->PrintBytes(val));
1948   }
PrintEnum(int32_t val,const std::string & name,TextFormat::BaseTextGenerator * generator) const1949   void PrintEnum(int32_t val, const std::string& name,
1950                  TextFormat::BaseTextGenerator* generator) const override {
1951     generator->PrintString(delegate_->PrintEnum(val, name));
1952   }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const1953   void PrintFieldName(const Message& message, int /*field_index*/,
1954                       int /*field_count*/, const Reflection* reflection,
1955                       const FieldDescriptor* field,
1956                       TextFormat::BaseTextGenerator* generator) const override {
1957     generator->PrintString(
1958         delegate_->PrintFieldName(message, reflection, field));
1959   }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const1960   void PrintFieldName(const Message& message, const Reflection* reflection,
1961                       const FieldDescriptor* field,
1962                       TextFormat::BaseTextGenerator* generator) const override {
1963     generator->PrintString(
1964         delegate_->PrintFieldName(message, reflection, field));
1965   }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const1966   void PrintMessageStart(
1967       const Message& message, int field_index, int field_count,
1968       bool single_line_mode,
1969       TextFormat::BaseTextGenerator* generator) const override {
1970     generator->PrintString(delegate_->PrintMessageStart(
1971         message, field_index, field_count, single_line_mode));
1972   }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const1973   void PrintMessageEnd(
1974       const Message& message, int field_index, int field_count,
1975       bool single_line_mode,
1976       TextFormat::BaseTextGenerator* generator) const override {
1977     generator->PrintString(delegate_->PrintMessageEnd(
1978         message, field_index, field_count, single_line_mode));
1979   }
1980 
1981  private:
1982   std::unique_ptr<const TextFormat::FieldValuePrinter> delegate_;
1983 };
1984 
1985 }  // namespace
1986 
1987 const char* const TextFormat::Printer::kDoNotParse =
1988     "DO NOT PARSE: fields may be stripped and missing.\n";
1989 
Printer()1990 TextFormat::Printer::Printer()
1991     : initial_indent_level_(0),
1992       single_line_mode_(false),
1993       use_field_number_(false),
1994       use_short_repeated_primitives_(false),
1995       insert_silent_marker_(false),
1996       hide_unknown_fields_(false),
1997       print_message_fields_in_index_order_(false),
1998       expand_any_(false),
1999       truncate_string_field_longer_than_(0LL),
2000       finder_(nullptr) {
2001   SetUseUtf8StringEscaping(false);
2002 }
2003 
SetUseUtf8StringEscaping(bool as_utf8)2004 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
2005   SetDefaultFieldValuePrinter(as_utf8 ? new FastFieldValuePrinterUtf8Escaping()
2006                                       : new DebugStringFieldValuePrinter());
2007 }
2008 
SetDefaultFieldValuePrinter(const FieldValuePrinter * printer)2009 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2010     const FieldValuePrinter* printer) {
2011   default_field_value_printer_.reset(new FieldValuePrinterWrapper(printer));
2012 }
2013 
SetDefaultFieldValuePrinter(const FastFieldValuePrinter * printer)2014 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2015     const FastFieldValuePrinter* printer) {
2016   default_field_value_printer_.reset(printer);
2017 }
2018 
RegisterFieldValuePrinter(const FieldDescriptor * field,const FieldValuePrinter * printer)2019 bool TextFormat::Printer::RegisterFieldValuePrinter(
2020     const FieldDescriptor* field, const FieldValuePrinter* printer) {
2021   if (field == nullptr || printer == nullptr) {
2022     return false;
2023   }
2024   std::unique_ptr<FieldValuePrinterWrapper> wrapper(
2025       new FieldValuePrinterWrapper(nullptr));
2026   auto pair = custom_printers_.insert(std::make_pair(field, nullptr));
2027   if (pair.second) {
2028     wrapper->SetDelegate(printer);
2029     pair.first->second = std::move(wrapper);
2030     return true;
2031   } else {
2032     return false;
2033   }
2034 }
2035 
RegisterFieldValuePrinter(const FieldDescriptor * field,const FastFieldValuePrinter * printer)2036 bool TextFormat::Printer::RegisterFieldValuePrinter(
2037     const FieldDescriptor* field, const FastFieldValuePrinter* printer) {
2038   if (field == nullptr || printer == nullptr) {
2039     return false;
2040   }
2041   auto pair = custom_printers_.insert(std::make_pair(field, nullptr));
2042   if (pair.second) {
2043     pair.first->second.reset(printer);
2044     return true;
2045   } else {
2046     return false;
2047   }
2048 }
2049 
RegisterMessagePrinter(const Descriptor * descriptor,const MessagePrinter * printer)2050 bool TextFormat::Printer::RegisterMessagePrinter(
2051     const Descriptor* descriptor, const MessagePrinter* printer) {
2052   if (descriptor == nullptr || printer == nullptr) {
2053     return false;
2054   }
2055   auto pair =
2056       custom_message_printers_.insert(std::make_pair(descriptor, nullptr));
2057   if (pair.second) {
2058     pair.first->second.reset(printer);
2059     return true;
2060   } else {
2061     return false;
2062   }
2063 }
2064 
PrintToString(const Message & message,std::string * output) const2065 bool TextFormat::Printer::PrintToString(const Message& message,
2066                                         std::string* output) const {
2067   GOOGLE_DCHECK(output) << "output specified is nullptr";
2068 
2069   output->clear();
2070   io::StringOutputStream output_stream(output);
2071 
2072   return Print(message, &output_stream);
2073 }
2074 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output) const2075 bool TextFormat::Printer::PrintUnknownFieldsToString(
2076     const UnknownFieldSet& unknown_fields, std::string* output) const {
2077   GOOGLE_DCHECK(output) << "output specified is nullptr";
2078 
2079   output->clear();
2080   io::StringOutputStream output_stream(output);
2081   return PrintUnknownFields(unknown_fields, &output_stream);
2082 }
2083 
Print(const Message & message,io::ZeroCopyOutputStream * output) const2084 bool TextFormat::Printer::Print(const Message& message,
2085                                 io::ZeroCopyOutputStream* output) const {
2086   TextGenerator generator(output, insert_silent_marker_, initial_indent_level_);
2087 
2088   Print(message, &generator);
2089 
2090   // Output false if the generator failed internally.
2091   return !generator.failed();
2092 }
2093 
2094 // Maximum recursion depth for heuristically printing out length-delimited
2095 // unknown fields as messages.
2096 static constexpr int kUnknownFieldRecursionLimit = 10;
2097 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output) const2098 bool TextFormat::Printer::PrintUnknownFields(
2099     const UnknownFieldSet& unknown_fields,
2100     io::ZeroCopyOutputStream* output) const {
2101   TextGenerator generator(output, initial_indent_level_);
2102 
2103   PrintUnknownFields(unknown_fields, &generator, kUnknownFieldRecursionLimit);
2104 
2105   // Output false if the generator failed internally.
2106   return !generator.failed();
2107 }
2108 
2109 namespace {
2110 // Comparison functor for sorting FieldDescriptors by field index.
2111 // Normal fields have higher precedence than extensions.
2112 struct FieldIndexSorter {
operator ()google::protobuf::__anon048cea3c0611::FieldIndexSorter2113   bool operator()(const FieldDescriptor* left,
2114                   const FieldDescriptor* right) const {
2115     if (left->is_extension() && right->is_extension()) {
2116       return left->number() < right->number();
2117     } else if (left->is_extension()) {
2118       return false;
2119     } else if (right->is_extension()) {
2120       return true;
2121     } else {
2122       return left->index() < right->index();
2123     }
2124   }
2125 };
2126 
2127 }  // namespace
2128 
PrintAny(const Message & message,TextGenerator * generator) const2129 bool TextFormat::Printer::PrintAny(const Message& message,
2130                                    TextGenerator* generator) const {
2131   const FieldDescriptor* type_url_field;
2132   const FieldDescriptor* value_field;
2133   if (!internal::GetAnyFieldDescriptors(message, &type_url_field,
2134                                         &value_field)) {
2135     return false;
2136   }
2137 
2138   const Reflection* reflection = message.GetReflection();
2139 
2140   // Extract the full type name from the type_url field.
2141   const std::string& type_url = reflection->GetString(message, type_url_field);
2142   std::string url_prefix;
2143   std::string full_type_name;
2144   if (!internal::ParseAnyTypeUrl(type_url, &url_prefix, &full_type_name)) {
2145     return false;
2146   }
2147 
2148   // Print the "value" in text.
2149   const Descriptor* value_descriptor =
2150       finder_ ? finder_->FindAnyType(message, url_prefix, full_type_name)
2151               : DefaultFinderFindAnyType(message, url_prefix, full_type_name);
2152   if (value_descriptor == nullptr) {
2153     GOOGLE_LOG(WARNING) << "Can't print proto content: proto type " << type_url
2154                  << " not found";
2155     return false;
2156   }
2157   DynamicMessageFactory factory;
2158   std::unique_ptr<Message> value_message(
2159       factory.GetPrototype(value_descriptor)->New());
2160   std::string serialized_value = reflection->GetString(message, value_field);
2161   if (!value_message->ParseFromString(serialized_value)) {
2162     GOOGLE_LOG(WARNING) << type_url << ": failed to parse contents";
2163     return false;
2164   }
2165   generator->PrintLiteral("[");
2166   generator->PrintString(type_url);
2167   generator->PrintLiteral("]");
2168   const FastFieldValuePrinter* printer = GetFieldPrinter(value_field);
2169   printer->PrintMessageStart(message, -1, 0, single_line_mode_, generator);
2170   generator->Indent();
2171   Print(*value_message, generator);
2172   generator->Outdent();
2173   printer->PrintMessageEnd(message, -1, 0, single_line_mode_, generator);
2174   return true;
2175 }
2176 
Print(const Message & message,TextGenerator * generator) const2177 void TextFormat::Printer::Print(const Message& message,
2178                                 TextGenerator* generator) const {
2179   const Reflection* reflection = message.GetReflection();
2180   if (!reflection) {
2181     // This message does not provide any way to describe its structure.
2182     // Parse it again in an UnknownFieldSet, and display this instead.
2183     UnknownFieldSet unknown_fields;
2184     {
2185       std::string serialized = message.SerializeAsString();
2186       io::ArrayInputStream input(serialized.data(), serialized.size());
2187       unknown_fields.ParseFromZeroCopyStream(&input);
2188     }
2189     PrintUnknownFields(unknown_fields, generator, kUnknownFieldRecursionLimit);
2190     return;
2191   }
2192   const Descriptor* descriptor = message.GetDescriptor();
2193   auto itr = custom_message_printers_.find(descriptor);
2194   if (itr != custom_message_printers_.end()) {
2195     itr->second->Print(message, single_line_mode_, generator);
2196     return;
2197   }
2198   if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ &&
2199       PrintAny(message, generator)) {
2200     return;
2201   }
2202   std::vector<const FieldDescriptor*> fields;
2203   if (descriptor->options().map_entry()) {
2204     fields.push_back(descriptor->field(0));
2205     fields.push_back(descriptor->field(1));
2206   } else {
2207     reflection->ListFieldsOmitStripped(message, &fields);
2208     if (reflection->IsMessageStripped(message.GetDescriptor())) {
2209       generator->Print(kDoNotParse, std::strlen(kDoNotParse));
2210     }
2211   }
2212 
2213   if (print_message_fields_in_index_order_) {
2214     std::sort(fields.begin(), fields.end(), FieldIndexSorter());
2215   }
2216   for (const FieldDescriptor* field : fields) {
2217     PrintField(message, reflection, field, generator);
2218   }
2219   if (!hide_unknown_fields_) {
2220     PrintUnknownFields(reflection->GetUnknownFields(message), generator,
2221                        kUnknownFieldRecursionLimit);
2222   }
2223 }
2224 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output) const2225 void TextFormat::Printer::PrintFieldValueToString(const Message& message,
2226                                                   const FieldDescriptor* field,
2227                                                   int index,
2228                                                   std::string* output) const {
2229   GOOGLE_DCHECK(output) << "output specified is nullptr";
2230 
2231   output->clear();
2232   io::StringOutputStream output_stream(output);
2233   TextGenerator generator(&output_stream, initial_indent_level_);
2234 
2235   PrintFieldValue(message, message.GetReflection(), field, index, &generator);
2236 }
2237 
2238 class MapEntryMessageComparator {
2239  public:
MapEntryMessageComparator(const Descriptor * descriptor)2240   explicit MapEntryMessageComparator(const Descriptor* descriptor)
2241       : field_(descriptor->field(0)) {}
2242 
operator ()(const Message * a,const Message * b)2243   bool operator()(const Message* a, const Message* b) {
2244     const Reflection* reflection = a->GetReflection();
2245     switch (field_->cpp_type()) {
2246       case FieldDescriptor::CPPTYPE_BOOL: {
2247         bool first = reflection->GetBool(*a, field_);
2248         bool second = reflection->GetBool(*b, field_);
2249         return first < second;
2250       }
2251       case FieldDescriptor::CPPTYPE_INT32: {
2252         int32_t first = reflection->GetInt32(*a, field_);
2253         int32_t second = reflection->GetInt32(*b, field_);
2254         return first < second;
2255       }
2256       case FieldDescriptor::CPPTYPE_INT64: {
2257         int64_t first = reflection->GetInt64(*a, field_);
2258         int64_t second = reflection->GetInt64(*b, field_);
2259         return first < second;
2260       }
2261       case FieldDescriptor::CPPTYPE_UINT32: {
2262         uint32_t first = reflection->GetUInt32(*a, field_);
2263         uint32_t second = reflection->GetUInt32(*b, field_);
2264         return first < second;
2265       }
2266       case FieldDescriptor::CPPTYPE_UINT64: {
2267         uint64_t first = reflection->GetUInt64(*a, field_);
2268         uint64_t second = reflection->GetUInt64(*b, field_);
2269         return first < second;
2270       }
2271       case FieldDescriptor::CPPTYPE_STRING: {
2272         std::string first = reflection->GetString(*a, field_);
2273         std::string second = reflection->GetString(*b, field_);
2274         return first < second;
2275       }
2276       default:
2277         GOOGLE_LOG(DFATAL) << "Invalid key for map field.";
2278         return true;
2279     }
2280   }
2281 
2282  private:
2283   const FieldDescriptor* field_;
2284 };
2285 
2286 namespace internal {
2287 class MapFieldPrinterHelper {
2288  public:
2289   // DynamicMapSorter::Sort cannot be used because it enforces syncing with
2290   // repeated field.
2291   static bool SortMap(const Message& message, const Reflection* reflection,
2292                       const FieldDescriptor* field,
2293                       std::vector<const Message*>* sorted_map_field);
2294   static void CopyKey(const MapKey& key, Message* message,
2295                       const FieldDescriptor* field_desc);
2296   static void CopyValue(const MapValueRef& value, Message* message,
2297                         const FieldDescriptor* field_desc);
2298 };
2299 
2300 // Returns true if elements contained in sorted_map_field need to be released.
SortMap(const Message & message,const Reflection * reflection,const FieldDescriptor * field,std::vector<const Message * > * sorted_map_field)2301 bool MapFieldPrinterHelper::SortMap(
2302     const Message& message, const Reflection* reflection,
2303     const FieldDescriptor* field,
2304     std::vector<const Message*>* sorted_map_field) {
2305   bool need_release = false;
2306   const MapFieldBase& base = *reflection->GetMapData(message, field);
2307 
2308   if (base.IsRepeatedFieldValid()) {
2309     const RepeatedPtrField<Message>& map_field =
2310         reflection->GetRepeatedPtrFieldInternal<Message>(message, field);
2311     for (int i = 0; i < map_field.size(); ++i) {
2312       sorted_map_field->push_back(
2313           const_cast<RepeatedPtrField<Message>*>(&map_field)->Mutable(i));
2314     }
2315   } else {
2316     // TODO(teboring): For performance, instead of creating map entry message
2317     // for each element, just store map keys and sort them.
2318     const Descriptor* map_entry_desc = field->message_type();
2319     const Message* prototype =
2320         reflection->GetMessageFactory()->GetPrototype(map_entry_desc);
2321     for (MapIterator iter =
2322              reflection->MapBegin(const_cast<Message*>(&message), field);
2323          iter != reflection->MapEnd(const_cast<Message*>(&message), field);
2324          ++iter) {
2325       Message* map_entry_message = prototype->New();
2326       CopyKey(iter.GetKey(), map_entry_message, map_entry_desc->field(0));
2327       CopyValue(iter.GetValueRef(), map_entry_message,
2328                 map_entry_desc->field(1));
2329       sorted_map_field->push_back(map_entry_message);
2330     }
2331     need_release = true;
2332   }
2333 
2334   MapEntryMessageComparator comparator(field->message_type());
2335   std::stable_sort(sorted_map_field->begin(), sorted_map_field->end(),
2336                    comparator);
2337   return need_release;
2338 }
2339 
CopyKey(const MapKey & key,Message * message,const FieldDescriptor * field_desc)2340 void MapFieldPrinterHelper::CopyKey(const MapKey& key, Message* message,
2341                                     const FieldDescriptor* field_desc) {
2342   const Reflection* reflection = message->GetReflection();
2343   switch (field_desc->cpp_type()) {
2344     case FieldDescriptor::CPPTYPE_DOUBLE:
2345     case FieldDescriptor::CPPTYPE_FLOAT:
2346     case FieldDescriptor::CPPTYPE_ENUM:
2347     case FieldDescriptor::CPPTYPE_MESSAGE:
2348       GOOGLE_LOG(ERROR) << "Not supported.";
2349       break;
2350     case FieldDescriptor::CPPTYPE_STRING:
2351       reflection->SetString(message, field_desc, key.GetStringValue());
2352       return;
2353     case FieldDescriptor::CPPTYPE_INT64:
2354       reflection->SetInt64(message, field_desc, key.GetInt64Value());
2355       return;
2356     case FieldDescriptor::CPPTYPE_INT32:
2357       reflection->SetInt32(message, field_desc, key.GetInt32Value());
2358       return;
2359     case FieldDescriptor::CPPTYPE_UINT64:
2360       reflection->SetUInt64(message, field_desc, key.GetUInt64Value());
2361       return;
2362     case FieldDescriptor::CPPTYPE_UINT32:
2363       reflection->SetUInt32(message, field_desc, key.GetUInt32Value());
2364       return;
2365     case FieldDescriptor::CPPTYPE_BOOL:
2366       reflection->SetBool(message, field_desc, key.GetBoolValue());
2367       return;
2368   }
2369 }
2370 
CopyValue(const MapValueRef & value,Message * message,const FieldDescriptor * field_desc)2371 void MapFieldPrinterHelper::CopyValue(const MapValueRef& value,
2372                                       Message* message,
2373                                       const FieldDescriptor* field_desc) {
2374   const Reflection* reflection = message->GetReflection();
2375   switch (field_desc->cpp_type()) {
2376     case FieldDescriptor::CPPTYPE_DOUBLE:
2377       reflection->SetDouble(message, field_desc, value.GetDoubleValue());
2378       return;
2379     case FieldDescriptor::CPPTYPE_FLOAT:
2380       reflection->SetFloat(message, field_desc, value.GetFloatValue());
2381       return;
2382     case FieldDescriptor::CPPTYPE_ENUM:
2383       reflection->SetEnumValue(message, field_desc, value.GetEnumValue());
2384       return;
2385     case FieldDescriptor::CPPTYPE_MESSAGE: {
2386       Message* sub_message = value.GetMessageValue().New();
2387       sub_message->CopyFrom(value.GetMessageValue());
2388       reflection->SetAllocatedMessage(message, sub_message, field_desc);
2389       return;
2390     }
2391     case FieldDescriptor::CPPTYPE_STRING:
2392       reflection->SetString(message, field_desc, value.GetStringValue());
2393       return;
2394     case FieldDescriptor::CPPTYPE_INT64:
2395       reflection->SetInt64(message, field_desc, value.GetInt64Value());
2396       return;
2397     case FieldDescriptor::CPPTYPE_INT32:
2398       reflection->SetInt32(message, field_desc, value.GetInt32Value());
2399       return;
2400     case FieldDescriptor::CPPTYPE_UINT64:
2401       reflection->SetUInt64(message, field_desc, value.GetUInt64Value());
2402       return;
2403     case FieldDescriptor::CPPTYPE_UINT32:
2404       reflection->SetUInt32(message, field_desc, value.GetUInt32Value());
2405       return;
2406     case FieldDescriptor::CPPTYPE_BOOL:
2407       reflection->SetBool(message, field_desc, value.GetBoolValue());
2408       return;
2409   }
2410 }
2411 }  // namespace internal
2412 
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator * generator) const2413 void TextFormat::Printer::PrintField(const Message& message,
2414                                      const Reflection* reflection,
2415                                      const FieldDescriptor* field,
2416                                      TextGenerator* generator) const {
2417   if (use_short_repeated_primitives_ && field->is_repeated() &&
2418       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
2419       field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
2420     PrintShortRepeatedField(message, reflection, field, generator);
2421     return;
2422   }
2423 
2424   int count = 0;
2425 
2426   if (field->is_repeated()) {
2427     count = reflection->FieldSize(message, field);
2428   } else if (reflection->HasField(message, field) ||
2429              field->containing_type()->options().map_entry()) {
2430     count = 1;
2431   }
2432 
2433   std::vector<const Message*> sorted_map_field;
2434   bool need_release = false;
2435   bool is_map = field->is_map();
2436   if (is_map) {
2437     need_release = internal::MapFieldPrinterHelper::SortMap(
2438         message, reflection, field, &sorted_map_field);
2439   }
2440 
2441   for (int j = 0; j < count; ++j) {
2442     const int field_index = field->is_repeated() ? j : -1;
2443 
2444     PrintFieldName(message, field_index, count, reflection, field, generator);
2445 
2446     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2447       const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2448       const Message& sub_message =
2449           field->is_repeated()
2450               ? (is_map ? *sorted_map_field[j]
2451                         : reflection->GetRepeatedMessage(message, field, j))
2452               : reflection->GetMessage(message, field);
2453       printer->PrintMessageStart(sub_message, field_index, count,
2454                                  single_line_mode_, generator);
2455       generator->Indent();
2456       if (!printer->PrintMessageContent(sub_message, field_index, count,
2457                                         single_line_mode_, generator)) {
2458         Print(sub_message, generator);
2459       }
2460       generator->Outdent();
2461       printer->PrintMessageEnd(sub_message, field_index, count,
2462                                single_line_mode_, generator);
2463     } else {
2464       generator->PrintMaybeWithMarker(": ");
2465       // Write the field value.
2466       PrintFieldValue(message, reflection, field, field_index, generator);
2467       if (single_line_mode_) {
2468         generator->PrintLiteral(" ");
2469       } else {
2470         generator->PrintLiteral("\n");
2471       }
2472     }
2473   }
2474 
2475   if (need_release) {
2476     for (const Message* message_to_delete : sorted_map_field) {
2477       delete message_to_delete;
2478     }
2479   }
2480 }
2481 
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator * generator) const2482 void TextFormat::Printer::PrintShortRepeatedField(
2483     const Message& message, const Reflection* reflection,
2484     const FieldDescriptor* field, TextGenerator* generator) const {
2485   // Print primitive repeated field in short form.
2486   int size = reflection->FieldSize(message, field);
2487   PrintFieldName(message, /*field_index=*/-1, /*field_count=*/size, reflection,
2488                  field, generator);
2489   generator->PrintMaybeWithMarker(": ", "[");
2490   for (int i = 0; i < size; i++) {
2491     if (i > 0) generator->PrintLiteral(", ");
2492     PrintFieldValue(message, reflection, field, i, generator);
2493   }
2494   if (single_line_mode_) {
2495     generator->PrintLiteral("] ");
2496   } else {
2497     generator->PrintLiteral("]\n");
2498   }
2499 }
2500 
PrintFieldName(const Message & message,int field_index,int field_count,const Reflection * reflection,const FieldDescriptor * field,TextGenerator * generator) const2501 void TextFormat::Printer::PrintFieldName(const Message& message,
2502                                          int field_index, int field_count,
2503                                          const Reflection* reflection,
2504                                          const FieldDescriptor* field,
2505                                          TextGenerator* generator) const {
2506   // if use_field_number_ is true, prints field number instead
2507   // of field name.
2508   if (use_field_number_) {
2509     generator->PrintString(StrCat(field->number()));
2510     return;
2511   }
2512 
2513   const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2514   printer->PrintFieldName(message, field_index, field_count, reflection, field,
2515                           generator);
2516 }
2517 
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,TextGenerator * generator) const2518 void TextFormat::Printer::PrintFieldValue(const Message& message,
2519                                           const Reflection* reflection,
2520                                           const FieldDescriptor* field,
2521                                           int index,
2522                                           TextGenerator* generator) const {
2523   GOOGLE_DCHECK(field->is_repeated() || (index == -1))
2524       << "Index must be -1 for non-repeated fields";
2525 
2526   const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2527 
2528   switch (field->cpp_type()) {
2529 #define OUTPUT_FIELD(CPPTYPE, METHOD)                                \
2530   case FieldDescriptor::CPPTYPE_##CPPTYPE:                           \
2531     printer->Print##METHOD(                                          \
2532         field->is_repeated()                                         \
2533             ? reflection->GetRepeated##METHOD(message, field, index) \
2534             : reflection->Get##METHOD(message, field),               \
2535         generator);                                                  \
2536     break
2537 
2538     OUTPUT_FIELD(INT32, Int32);
2539     OUTPUT_FIELD(INT64, Int64);
2540     OUTPUT_FIELD(UINT32, UInt32);
2541     OUTPUT_FIELD(UINT64, UInt64);
2542     OUTPUT_FIELD(FLOAT, Float);
2543     OUTPUT_FIELD(DOUBLE, Double);
2544     OUTPUT_FIELD(BOOL, Bool);
2545 #undef OUTPUT_FIELD
2546 
2547     case FieldDescriptor::CPPTYPE_STRING: {
2548       std::string scratch;
2549       const std::string& value =
2550           field->is_repeated()
2551               ? reflection->GetRepeatedStringReference(message, field, index,
2552                                                        &scratch)
2553               : reflection->GetStringReference(message, field, &scratch);
2554       const std::string* value_to_print = &value;
2555       std::string truncated_value;
2556       if (truncate_string_field_longer_than_ > 0 &&
2557           static_cast<size_t>(truncate_string_field_longer_than_) <
2558               value.size()) {
2559         truncated_value = value.substr(0, truncate_string_field_longer_than_) +
2560                           "...<truncated>...";
2561         value_to_print = &truncated_value;
2562       }
2563       if (field->type() == FieldDescriptor::TYPE_STRING) {
2564         printer->PrintString(*value_to_print, generator);
2565       } else {
2566         GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
2567         printer->PrintBytes(*value_to_print, generator);
2568       }
2569       break;
2570     }
2571 
2572     case FieldDescriptor::CPPTYPE_ENUM: {
2573       int enum_value =
2574           field->is_repeated()
2575               ? reflection->GetRepeatedEnumValue(message, field, index)
2576               : reflection->GetEnumValue(message, field);
2577       const EnumValueDescriptor* enum_desc =
2578           field->enum_type()->FindValueByNumber(enum_value);
2579       if (enum_desc != nullptr) {
2580         printer->PrintEnum(enum_value, enum_desc->name(), generator);
2581       } else {
2582         // Ordinarily, enum_desc should not be null, because proto2 has the
2583         // invariant that set enum field values must be in-range, but with the
2584         // new integer-based API for enums (or the RepeatedField<int> loophole),
2585         // it is possible for the user to force an unknown integer value.  So we
2586         // simply use the integer value itself as the enum value name in this
2587         // case.
2588         printer->PrintEnum(enum_value, StrCat(enum_value), generator);
2589       }
2590       break;
2591     }
2592 
2593     case FieldDescriptor::CPPTYPE_MESSAGE:
2594       Print(field->is_repeated()
2595                 ? reflection->GetRepeatedMessage(message, field, index)
2596                 : reflection->GetMessage(message, field),
2597             generator);
2598       break;
2599   }
2600 }
2601 
Print(const Message & message,io::ZeroCopyOutputStream * output)2602 /* static */ bool TextFormat::Print(const Message& message,
2603                                     io::ZeroCopyOutputStream* output) {
2604   return Printer().Print(message, output);
2605 }
2606 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)2607 /* static */ bool TextFormat::PrintUnknownFields(
2608     const UnknownFieldSet& unknown_fields, io::ZeroCopyOutputStream* output) {
2609   return Printer().PrintUnknownFields(unknown_fields, output);
2610 }
2611 
PrintToString(const Message & message,std::string * output)2612 /* static */ bool TextFormat::PrintToString(const Message& message,
2613                                             std::string* output) {
2614   return Printer().PrintToString(message, output);
2615 }
2616 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output)2617 /* static */ bool TextFormat::PrintUnknownFieldsToString(
2618     const UnknownFieldSet& unknown_fields, std::string* output) {
2619   return Printer().PrintUnknownFieldsToString(unknown_fields, output);
2620 }
2621 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output)2622 /* static */ void TextFormat::PrintFieldValueToString(
2623     const Message& message, const FieldDescriptor* field, int index,
2624     std::string* output) {
2625   return Printer().PrintFieldValueToString(message, field, index, output);
2626 }
2627 
ParseFieldValueFromString(const std::string & input,const FieldDescriptor * field,Message * message)2628 /* static */ bool TextFormat::ParseFieldValueFromString(
2629     const std::string& input, const FieldDescriptor* field, Message* message) {
2630   return Parser().ParseFieldValueFromString(input, field, message);
2631 }
2632 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,TextGenerator * generator,int recursion_budget) const2633 void TextFormat::Printer::PrintUnknownFields(
2634     const UnknownFieldSet& unknown_fields, TextGenerator* generator,
2635     int recursion_budget) const {
2636   for (int i = 0; i < unknown_fields.field_count(); i++) {
2637     const UnknownField& field = unknown_fields.field(i);
2638     std::string field_number = StrCat(field.number());
2639 
2640     switch (field.type()) {
2641       case UnknownField::TYPE_VARINT:
2642         generator->PrintString(field_number);
2643         generator->PrintMaybeWithMarker(": ");
2644         generator->PrintString(StrCat(field.varint()));
2645         if (single_line_mode_) {
2646           generator->PrintLiteral(" ");
2647         } else {
2648           generator->PrintLiteral("\n");
2649         }
2650         break;
2651       case UnknownField::TYPE_FIXED32: {
2652         generator->PrintString(field_number);
2653         generator->PrintMaybeWithMarker(": ", "0x");
2654         generator->PrintString(
2655             StrCat(strings::Hex(field.fixed32(), strings::ZERO_PAD_8)));
2656         if (single_line_mode_) {
2657           generator->PrintLiteral(" ");
2658         } else {
2659           generator->PrintLiteral("\n");
2660         }
2661         break;
2662       }
2663       case UnknownField::TYPE_FIXED64: {
2664         generator->PrintString(field_number);
2665         generator->PrintMaybeWithMarker(": ", "0x");
2666         generator->PrintString(
2667             StrCat(strings::Hex(field.fixed64(), strings::ZERO_PAD_16)));
2668         if (single_line_mode_) {
2669           generator->PrintLiteral(" ");
2670         } else {
2671           generator->PrintLiteral("\n");
2672         }
2673         break;
2674       }
2675       case UnknownField::TYPE_LENGTH_DELIMITED: {
2676         generator->PrintString(field_number);
2677         const std::string& value = field.length_delimited();
2678         // We create a CodedInputStream so that we can adhere to our recursion
2679         // budget when we attempt to parse the data. UnknownFieldSet parsing is
2680         // recursive because of groups.
2681         io::CodedInputStream input_stream(
2682             reinterpret_cast<const uint8_t*>(value.data()), value.size());
2683         input_stream.SetRecursionLimit(recursion_budget);
2684         UnknownFieldSet embedded_unknown_fields;
2685         if (!value.empty() && recursion_budget > 0 &&
2686             embedded_unknown_fields.ParseFromCodedStream(&input_stream)) {
2687           // This field is parseable as a Message.
2688           // So it is probably an embedded message.
2689           if (single_line_mode_) {
2690             generator->PrintMaybeWithMarker(" ", "{ ");
2691           } else {
2692             generator->PrintMaybeWithMarker(" ", "{\n");
2693             generator->Indent();
2694           }
2695           PrintUnknownFields(embedded_unknown_fields, generator,
2696                              recursion_budget - 1);
2697           if (single_line_mode_) {
2698             generator->PrintLiteral("} ");
2699           } else {
2700             generator->Outdent();
2701             generator->PrintLiteral("}\n");
2702           }
2703         } else {
2704           // This field is not parseable as a Message (or we ran out of
2705           // recursion budget). So it is probably just a plain string.
2706           generator->PrintMaybeWithMarker(": ", "\"");
2707           generator->PrintString(CEscape(value));
2708           if (single_line_mode_) {
2709             generator->PrintLiteral("\" ");
2710           } else {
2711             generator->PrintLiteral("\"\n");
2712           }
2713         }
2714         break;
2715       }
2716       case UnknownField::TYPE_GROUP:
2717         generator->PrintString(field_number);
2718         if (single_line_mode_) {
2719           generator->PrintMaybeWithMarker(" ", "{ ");
2720         } else {
2721           generator->PrintMaybeWithMarker(" ", "{\n");
2722           generator->Indent();
2723         }
2724         // For groups, we recurse without checking the budget. This is OK,
2725         // because if the groups were too deeply nested then we would have
2726         // already rejected the message when we originally parsed it.
2727         PrintUnknownFields(field.group(), generator, recursion_budget - 1);
2728         if (single_line_mode_) {
2729           generator->PrintLiteral("} ");
2730         } else {
2731           generator->Outdent();
2732           generator->PrintLiteral("}\n");
2733         }
2734         break;
2735     }
2736   }
2737 }
2738 
2739 }  // namespace protobuf
2740 }  // namespace google
2741 
2742 #include <google/protobuf/port_undef.inc>
2743