• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: jschorr@google.com (Joseph Schorr)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 
12 #include "google/protobuf/text_format.h"
13 
14 #include <stdio.h>
15 
16 #include <algorithm>
17 #include <atomic>
18 #include <climits>
19 #include <cmath>
20 #include <cstddef>
21 #include <cstdint>
22 #include <limits>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "absl/container/btree_set.h"
29 #include "absl/log/absl_check.h"
30 #include "absl/strings/ascii.h"
31 #include "absl/strings/cord.h"
32 #include "absl/strings/escaping.h"
33 #include "absl/strings/match.h"
34 #include "absl/strings/numbers.h"
35 #include "absl/strings/str_cat.h"
36 #include "absl/strings/str_format.h"
37 #include "absl/strings/str_join.h"
38 #include "absl/strings/string_view.h"
39 #include "google/protobuf/any.h"
40 #include "google/protobuf/descriptor.h"
41 #include "google/protobuf/descriptor.pb.h"
42 #include "google/protobuf/dynamic_message.h"
43 #include "google/protobuf/io/coded_stream.h"
44 #include "google/protobuf/io/strtod.h"
45 #include "google/protobuf/io/tokenizer.h"
46 #include "google/protobuf/io/zero_copy_stream.h"
47 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
48 #include "google/protobuf/map_field.h"
49 #include "google/protobuf/message.h"
50 #include "google/protobuf/reflection_mode.h"
51 #include "google/protobuf/repeated_field.h"
52 #include "google/protobuf/unknown_field_set.h"
53 #include "google/protobuf/wire_format_lite.h"
54 #include "utf8_validity.h"
55 
56 // Must be included last.
57 #include "google/protobuf/port_def.inc"
58 
59 namespace google {
60 namespace protobuf {
61 
62 using internal::FieldReporterLevel;
63 using internal::ReflectionMode;
64 using internal::ScopedReflectionMode;
65 
66 namespace {
67 
68 const absl::string_view kFieldValueReplacement = "[REDACTED]";
69 
IsHexNumber(const std::string & str)70 inline bool IsHexNumber(const std::string& str) {
71   return (str.length() >= 2 && str[0] == '0' &&
72           (str[1] == 'x' || str[1] == 'X'));
73 }
74 
IsOctNumber(const std::string & str)75 inline bool IsOctNumber(const std::string& str) {
76   return (str.length() >= 2 && str[0] == '0' &&
77           (str[1] >= '0' && str[1] < '8'));
78 }
79 
80 // The number of fields that are redacted in AbslStringify.
81 std::atomic<int64_t> num_redacted_field{0};
82 
IncrementRedactedFieldCounter()83 inline void IncrementRedactedFieldCounter() {
84   num_redacted_field.fetch_add(1, std::memory_order_relaxed);
85 }
86 
TrimTrailingSpace(std::string & debug_string)87 inline void TrimTrailingSpace(std::string& debug_string) {
88   // Single line mode currently might have an extra space at the end.
89   if (!debug_string.empty() && debug_string.back() == ' ') {
90     debug_string.pop_back();
91   }
92 }
93 
94 }  // namespace
95 
96 namespace internal {
97 const char kDebugStringSilentMarker[] = "";
98 const char kDebugStringSilentMarkerForDetection[] = "\t ";
99 
100 // Controls insertion of a marker making debug strings non-parseable, and
101 // redacting annotated fields in Protobuf's DebugString APIs.
102 PROTOBUF_EXPORT std::atomic<bool> enable_debug_string_safe_format{false};
103 
GetRedactedFieldCount()104 int64_t GetRedactedFieldCount() {
105   return num_redacted_field.load(std::memory_order_relaxed);
106 }
107 
108 enum class Option { kNone, kShort, kUTF8 };
109 
StringifyMessage(const Message & message,Option option,FieldReporterLevel reporter_level,bool enable_safe_format)110 std::string StringifyMessage(const Message& message, Option option,
111                              FieldReporterLevel reporter_level,
112                              bool enable_safe_format) {
113   // Indicate all scoped reflection calls are from DebugString function.
114   ScopedReflectionMode scope(ReflectionMode::kDebugString);
115 
116   TextFormat::Printer printer;
117   internal::FieldReporterLevel reporter = reporter_level;
118   switch (option) {
119     case Option::kShort:
120       printer.SetSingleLineMode(true);
121       break;
122     case Option::kUTF8:
123       printer.SetUseUtf8StringEscaping(true);
124       break;
125     case Option::kNone:
126       break;
127   }
128   printer.SetExpandAny(true);
129   printer.SetRedactDebugString(enable_safe_format);
130   printer.SetRandomizeDebugString(enable_safe_format);
131   printer.SetReportSensitiveFields(reporter);
132   std::string result;
133   printer.PrintToString(message, &result);
134 
135   if (option == Option::kShort) {
136     TrimTrailingSpace(result);
137   }
138 
139   return result;
140 }
141 
StringifyMessage(const Message & message)142 PROTOBUF_EXPORT std::string StringifyMessage(const Message& message) {
143   return StringifyMessage(message, Option::kNone,
144                           FieldReporterLevel::kAbslStringify, true);
145 }
146 }  // namespace internal
147 
DebugString() const148 std::string Message::DebugString() const {
149   bool enable_safe_format =
150       internal::enable_debug_string_safe_format.load(std::memory_order_relaxed);
151   if (enable_safe_format) {
152     return StringifyMessage(*this, internal::Option::kNone,
153                             FieldReporterLevel::kDebugString, true);
154   }
155   // Indicate all scoped reflection calls are from DebugString function.
156   ScopedReflectionMode scope(ReflectionMode::kDebugString);
157   std::string debug_string;
158 
159   TextFormat::Printer printer;
160   printer.SetExpandAny(true);
161   printer.SetInsertSilentMarker(true);
162   printer.SetReportSensitiveFields(FieldReporterLevel::kDebugString);
163 
164   printer.PrintToString(*this, &debug_string);
165 
166   return debug_string;
167 }
168 
ShortDebugString() const169 std::string Message::ShortDebugString() const {
170   bool enable_safe_format =
171       internal::enable_debug_string_safe_format.load(std::memory_order_relaxed);
172   if (enable_safe_format) {
173     return StringifyMessage(*this, internal::Option::kShort,
174                             FieldReporterLevel::kShortDebugString, true);
175   }
176   // Indicate all scoped reflection calls are from DebugString function.
177   ScopedReflectionMode scope(ReflectionMode::kDebugString);
178   std::string debug_string;
179 
180   TextFormat::Printer printer;
181   printer.SetSingleLineMode(true);
182   printer.SetExpandAny(true);
183   printer.SetInsertSilentMarker(true);
184   printer.SetReportSensitiveFields(FieldReporterLevel::kShortDebugString);
185 
186   printer.PrintToString(*this, &debug_string);
187   TrimTrailingSpace(debug_string);
188 
189   return debug_string;
190 }
191 
Utf8DebugString() const192 std::string Message::Utf8DebugString() const {
193   bool enable_safe_format =
194       internal::enable_debug_string_safe_format.load(std::memory_order_relaxed);
195   if (enable_safe_format) {
196     return StringifyMessage(*this, internal::Option::kUTF8,
197                             FieldReporterLevel::kUtf8DebugString, true);
198   }
199   // Indicate all scoped reflection calls are from DebugString function.
200   ScopedReflectionMode scope(ReflectionMode::kDebugString);
201   std::string debug_string;
202 
203   TextFormat::Printer printer;
204   printer.SetUseUtf8StringEscaping(true);
205   printer.SetExpandAny(true);
206   printer.SetInsertSilentMarker(true);
207   printer.SetReportSensitiveFields(FieldReporterLevel::kUtf8DebugString);
208 
209   printer.PrintToString(*this, &debug_string);
210 
211   return debug_string;
212 }
213 
PrintDebugString() const214 void Message::PrintDebugString() const { printf("%s", DebugString().c_str()); }
215 
ShortFormat(const Message & message)216 PROTOBUF_EXPORT std::string ShortFormat(const Message& message) {
217   return internal::StringifyMessage(message, internal::Option::kShort,
218                                     FieldReporterLevel::kShortFormat, true);
219 }
220 
Utf8Format(const Message & message)221 PROTOBUF_EXPORT std::string Utf8Format(const Message& message) {
222   return internal::StringifyMessage(message, internal::Option::kUTF8,
223                                     FieldReporterLevel::kUtf8Format, true);
224 }
225 
226 
227 // ===========================================================================
228 // Implementation of the parse information tree class.
RecordLocation(const FieldDescriptor * field,TextFormat::ParseLocationRange range)229 void TextFormat::ParseInfoTree::RecordLocation(
230     const FieldDescriptor* field, TextFormat::ParseLocationRange range) {
231   locations_[field].push_back(range);
232 }
233 
CreateNested(const FieldDescriptor * field)234 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
235     const FieldDescriptor* field) {
236   // Owned by us in the map.
237   auto& vec = nested_[field];
238   vec.emplace_back(new TextFormat::ParseInfoTree());
239   return vec.back().get();
240 }
241 
CheckFieldIndex(const FieldDescriptor * field,int index)242 void CheckFieldIndex(const FieldDescriptor* field, int index) {
243   if (field == nullptr) {
244     return;
245   }
246 
247   if (field->is_repeated() && index == -1) {
248     ABSL_DLOG(FATAL) << "Index must be in range of repeated field values. "
249                      << "Field: " << field->name();
250   } else if (!field->is_repeated() && index != -1) {
251     ABSL_DLOG(FATAL) << "Index must be -1 for singular fields."
252                      << "Field: " << field->name();
253   }
254 }
255 
GetLocationRange(const FieldDescriptor * field,int index) const256 TextFormat::ParseLocationRange TextFormat::ParseInfoTree::GetLocationRange(
257     const FieldDescriptor* field, int index) const {
258   CheckFieldIndex(field, index);
259   if (index == -1) {
260     index = 0;
261   }
262 
263   auto it = locations_.find(field);
264   if (it == locations_.end() ||
265       index >= static_cast<int64_t>(it->second.size())) {
266     return TextFormat::ParseLocationRange();
267   }
268 
269   return it->second[static_cast<size_t>(index)];
270 }
271 
GetTreeForNested(const FieldDescriptor * field,int index) const272 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
273     const FieldDescriptor* field, int index) const {
274   CheckFieldIndex(field, index);
275   if (index == -1) {
276     index = 0;
277   }
278 
279   auto it = nested_.find(field);
280   if (it == nested_.end() || index >= static_cast<int64_t>(it->second.size())) {
281     return nullptr;
282   }
283 
284   return it->second[static_cast<size_t>(index)].get();
285 }
286 
287 namespace {
288 // These functions implement the behavior of the "default" TextFormat::Finder,
289 // they are defined as standalone to be called when finder_ is nullptr.
DefaultFinderFindExtension(Message * message,const std::string & name)290 const FieldDescriptor* DefaultFinderFindExtension(Message* message,
291                                                   const std::string& name) {
292   const Descriptor* descriptor = message->GetDescriptor();
293   return descriptor->file()->pool()->FindExtensionByPrintableName(descriptor,
294                                                                   name);
295 }
296 
DefaultFinderFindExtensionByNumber(const Descriptor * descriptor,int number)297 const FieldDescriptor* DefaultFinderFindExtensionByNumber(
298     const Descriptor* descriptor, int number) {
299   return descriptor->file()->pool()->FindExtensionByNumber(descriptor, number);
300 }
301 
DefaultFinderFindAnyType(const Message & message,const std::string & prefix,const std::string & name)302 const Descriptor* DefaultFinderFindAnyType(const Message& message,
303                                            const std::string& prefix,
304                                            const std::string& name) {
305   if (prefix != internal::kTypeGoogleApisComPrefix &&
306       prefix != internal::kTypeGoogleProdComPrefix) {
307     return nullptr;
308   }
309   return message.GetDescriptor()->file()->pool()->FindMessageTypeByName(name);
310 }
311 }  // namespace
312 
GetUnsetFieldId(const Message & message,const FieldDescriptor & fd)313 auto TextFormat::Parser::UnsetFieldsMetadata::GetUnsetFieldId(
314     const Message& message, const FieldDescriptor& fd) -> Id {
315   return {&message, &fd};
316 }
317 
318 // ===========================================================================
319 // Internal class for parsing an ASCII representation of a Protocol Message.
320 // This class makes use of the Protocol Message compiler's tokenizer found
321 // in //third_party/protobuf/io/tokenizer.h. Note that class's Parse
322 // method is *not* thread-safe and should only be used in a single thread at
323 // a time.
324 
325 // Makes code slightly more readable.  The meaning of "DO(foo)" is
326 // "Execute foo and fail if it fails.", where failure is indicated by
327 // returning false. Borrowed from parser.cc (Thanks Kenton!).
328 #define DO(STATEMENT) \
329   if (STATEMENT) {    \
330   } else {            \
331     return false;     \
332   }
333 
334 class TextFormat::Parser::ParserImpl {
335  public:
336   // Determines if repeated values for non-repeated fields and
337   // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
338   // required/optional field named "foo", or "baz: 1 bar: 2"
339   // where "baz" and "bar" are members of the same oneof.
340   enum SingularOverwritePolicy {
341     ALLOW_SINGULAR_OVERWRITES = 0,   // the last value is retained
342     FORBID_SINGULAR_OVERWRITES = 1,  // an error is issued
343   };
344 
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,const TextFormat::Finder * finder,ParseInfoTree * parse_info_tree,SingularOverwritePolicy singular_overwrite_policy,bool allow_case_insensitive_field,bool allow_unknown_field,bool allow_unknown_extension,bool allow_unknown_enum,bool allow_field_number,bool allow_relaxed_whitespace,bool allow_partial,int recursion_limit,UnsetFieldsMetadata * no_op_fields)345   ParserImpl(const Descriptor* root_message_type,
346              io::ZeroCopyInputStream* input_stream,
347              io::ErrorCollector* error_collector,
348              const TextFormat::Finder* finder, ParseInfoTree* parse_info_tree,
349              SingularOverwritePolicy singular_overwrite_policy,
350              bool allow_case_insensitive_field, bool allow_unknown_field,
351              bool allow_unknown_extension, bool allow_unknown_enum,
352              bool allow_field_number, bool allow_relaxed_whitespace,
353              bool allow_partial, int recursion_limit,
354              UnsetFieldsMetadata* no_op_fields)
355       : error_collector_(error_collector),
356         finder_(finder),
357         parse_info_tree_(parse_info_tree),
358         tokenizer_error_collector_(this),
359         tokenizer_(input_stream, &tokenizer_error_collector_),
360         root_message_type_(root_message_type),
361         singular_overwrite_policy_(singular_overwrite_policy),
362         allow_case_insensitive_field_(allow_case_insensitive_field),
363         allow_unknown_field_(allow_unknown_field),
364         allow_unknown_extension_(allow_unknown_extension),
365         allow_unknown_enum_(allow_unknown_enum),
366         allow_field_number_(allow_field_number),
367         allow_partial_(allow_partial),
368         initial_recursion_limit_(recursion_limit),
369         recursion_limit_(recursion_limit),
370         had_silent_marker_(false),
371         had_errors_(false),
372         no_op_fields_(no_op_fields) {
373     // For backwards-compatibility with proto1, we need to allow the 'f' suffix
374     // for floats.
375     tokenizer_.set_allow_f_after_float(true);
376 
377     // '#' starts a comment.
378     tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
379 
380     if (allow_relaxed_whitespace) {
381       tokenizer_.set_require_space_after_number(false);
382       tokenizer_.set_allow_multiline_strings(true);
383     }
384 
385     // Consume the starting token.
386     tokenizer_.Next();
387   }
388   ParserImpl(const ParserImpl&) = delete;
389   ParserImpl& operator=(const ParserImpl&) = delete;
~ParserImpl()390   ~ParserImpl() {}
391 
392   // Parses the ASCII representation specified in input and saves the
393   // information into the output pointer (a Message). Returns
394   // false if an error occurs (an error will also be logged to
395   // ABSL_LOG(ERROR)).
Parse(Message * output)396   bool Parse(Message* output) {
397     // Consume fields until we cannot do so anymore.
398     while (true) {
399       if (LookingAtType(io::Tokenizer::TYPE_END)) {
400         // Ensures recursion limit properly unwinded, but only for success
401         // cases. This implicitly avoids the check when `Parse` returns false
402         // via `DO(...)`.
403         ABSL_DCHECK(had_errors_ || recursion_limit_ == initial_recursion_limit_)
404             << "Recursion limit at end of parse should be "
405             << initial_recursion_limit_ << ", but was " << recursion_limit_
406             << ". Difference of " << initial_recursion_limit_ - recursion_limit_
407             << " stack frames not accounted for stack unwind.";
408 
409         return !had_errors_;
410       }
411 
412       DO(ConsumeField(output));
413     }
414   }
415 
ParseField(const FieldDescriptor * field,Message * output)416   bool ParseField(const FieldDescriptor* field, Message* output) {
417     bool suc;
418     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
419       suc = ConsumeFieldMessage(output, output->GetReflection(), field);
420     } else {
421       suc = ConsumeFieldValue(output, output->GetReflection(), field);
422     }
423     return suc && LookingAtType(io::Tokenizer::TYPE_END);
424   }
425 
ReportError(int line,int col,absl::string_view message)426   void ReportError(int line, int col, absl::string_view message) {
427     had_errors_ = true;
428     if (error_collector_ == nullptr) {
429       if (line >= 0) {
430         ABSL_LOG(ERROR) << "Error parsing text-format "
431                         << root_message_type_->full_name() << ": " << (line + 1)
432                         << ":" << (col + 1) << ": " << message;
433       } else {
434         ABSL_LOG(ERROR) << "Error parsing text-format "
435                         << root_message_type_->full_name() << ": " << message;
436       }
437     } else {
438       error_collector_->RecordError(line, col, message);
439     }
440   }
441 
ReportWarning(int line,int col,const absl::string_view message)442   void ReportWarning(int line, int col, const absl::string_view message) {
443     if (error_collector_ == nullptr) {
444       if (line >= 0) {
445         ABSL_LOG_EVERY_POW_2(WARNING)
446             << "Warning parsing text-format " << root_message_type_->full_name()
447             << ": " << (line + 1) << ":" << (col + 1) << " (N = " << COUNTER
448             << "): " << message;
449       } else {
450         ABSL_LOG_EVERY_POW_2(WARNING)
451             << "Warning parsing text-format " << root_message_type_->full_name()
452             << " (N = " << COUNTER << "): " << message;
453       }
454     } else {
455       error_collector_->RecordWarning(line, col, message);
456     }
457   }
458 
459  private:
460   static constexpr int32_t kint32max = std::numeric_limits<int32_t>::max();
461   static constexpr uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
462   static constexpr int64_t kint64min = std::numeric_limits<int64_t>::min();
463   static constexpr int64_t kint64max = std::numeric_limits<int64_t>::max();
464   static constexpr uint64_t kuint64max = std::numeric_limits<uint64_t>::max();
465 
466   // Reports an error with the given message with information indicating
467   // the position (as derived from the current token).
ReportError(absl::string_view message)468   void ReportError(absl::string_view message) {
469     ReportError(tokenizer_.current().line, tokenizer_.current().column,
470                 message);
471   }
472 
473   // Reports a warning with the given message with information indicating
474   // the position (as derived from the current token).
ReportWarning(absl::string_view message)475   void ReportWarning(absl::string_view message) {
476     ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
477                   message);
478   }
479 
480   // Consumes the specified message with the given starting delimiter.
481   // This method checks to see that the end delimiter at the conclusion of
482   // the consumption matches the starting delimiter passed in here.
ConsumeMessage(Message * message,const std::string delimiter)483   bool ConsumeMessage(Message* message, const std::string delimiter) {
484     while (!LookingAt(">") && !LookingAt("}")) {
485       DO(ConsumeField(message));
486     }
487 
488     // Confirm that we have a valid ending delimiter.
489     DO(Consume(delimiter));
490     return true;
491   }
492 
493   // Consume either "<" or "{".
ConsumeMessageDelimiter(std::string * delimiter)494   bool ConsumeMessageDelimiter(std::string* delimiter) {
495     if (TryConsume("<")) {
496       *delimiter = ">";
497     } else {
498       DO(Consume("{"));
499       *delimiter = "}";
500     }
501     return true;
502   }
503 
504   // Consumes the current field (as returned by the tokenizer) on the
505   // passed in message.
ConsumeField(Message * message)506   bool ConsumeField(Message* message) {
507     const Reflection* reflection = message->GetReflection();
508     const Descriptor* descriptor = message->GetDescriptor();
509 
510     std::string field_name;
511     bool reserved_field = false;
512     const FieldDescriptor* field = nullptr;
513     int start_line = tokenizer_.current().line;
514     int start_column = tokenizer_.current().column;
515 
516     const FieldDescriptor* any_type_url_field;
517     const FieldDescriptor* any_value_field;
518     if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field,
519                                          &any_value_field) &&
520         TryConsume("[")) {
521       std::string full_type_name, prefix;
522       DO(ConsumeAnyTypeUrl(&full_type_name, &prefix));
523       std::string prefix_and_full_type_name =
524           absl::StrCat(prefix, full_type_name);
525       DO(ConsumeBeforeWhitespace("]"));
526       TryConsumeWhitespace();
527       // ':' is optional between message labels and values.
528       if (TryConsumeBeforeWhitespace(":")) {
529         TryConsumeWhitespace();
530       }
531       std::string serialized_value;
532       const Descriptor* value_descriptor =
533           finder_ ? finder_->FindAnyType(*message, prefix, full_type_name)
534                   : DefaultFinderFindAnyType(*message, prefix, full_type_name);
535       if (value_descriptor == nullptr) {
536         ReportError(absl::StrCat("Could not find type \"",
537                                  prefix_and_full_type_name,
538                                  "\" stored in google.protobuf.Any."));
539         return false;
540       }
541       DO(ConsumeAnyValue(value_descriptor, &serialized_value));
542       if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
543         // Fail if any_type_url_field has already been specified.
544         if ((!any_type_url_field->is_repeated() &&
545              reflection->HasField(*message, any_type_url_field)) ||
546             (!any_value_field->is_repeated() &&
547              reflection->HasField(*message, any_value_field))) {
548           ReportError("Non-repeated Any specified multiple times.");
549           return false;
550         }
551       }
552       reflection->SetString(message, any_type_url_field,
553                             std::move(prefix_and_full_type_name));
554       reflection->SetString(message, any_value_field,
555                             std::move(serialized_value));
556       return true;
557     }
558     if (TryConsume("[")) {
559       // Extension.
560       DO(ConsumeFullTypeName(&field_name));
561       DO(ConsumeBeforeWhitespace("]"));
562       TryConsumeWhitespace();
563 
564       field = finder_ ? finder_->FindExtension(message, field_name)
565                       : DefaultFinderFindExtension(message, field_name);
566 
567       if (field == nullptr) {
568         if (!allow_unknown_field_ && !allow_unknown_extension_) {
569           ReportError(absl::StrCat("Extension \"", field_name,
570                                    "\" is not defined or "
571                                    "is not an extension of \"",
572                                    descriptor->full_name(), "\"."));
573           return false;
574         } else {
575           ReportWarning(absl::StrCat(
576               "Ignoring extension \"", field_name,
577               "\" which is not defined or is not an extension of \"",
578               descriptor->full_name(), "\"."));
579         }
580       }
581     } else {
582       DO(ConsumeIdentifierBeforeWhitespace(&field_name));
583       TryConsumeWhitespace();
584 
585       int32_t field_number;
586       if (allow_field_number_ && absl::SimpleAtoi(field_name, &field_number)) {
587         if (descriptor->IsExtensionNumber(field_number)) {
588           field = finder_
589                       ? finder_->FindExtensionByNumber(descriptor, field_number)
590                       : DefaultFinderFindExtensionByNumber(descriptor,
591                                                            field_number);
592         } else if (descriptor->IsReservedNumber(field_number)) {
593           reserved_field = true;
594         } else {
595           field = descriptor->FindFieldByNumber(field_number);
596         }
597       } else {
598         field = descriptor->FindFieldByName(field_name);
599         // Group-like delimited fields will accept both the capitalized type
600         // names as well.
601         if (field == nullptr) {
602           std::string lower_field_name = field_name;
603           absl::AsciiStrToLower(&lower_field_name);
604           field = descriptor->FindFieldByName(lower_field_name);
605           // If the case-insensitive match worked but the field is NOT a group,
606           if (field != nullptr && !internal::cpp::IsGroupLike(*field)) {
607             field = nullptr;
608           }
609           if (field != nullptr && field->message_type()->name() != field_name) {
610             field = nullptr;
611           }
612         }
613 
614         if (field == nullptr && allow_case_insensitive_field_) {
615           std::string lower_field_name = field_name;
616           absl::AsciiStrToLower(&lower_field_name);
617           field = descriptor->FindFieldByLowercaseName(lower_field_name);
618         }
619 
620         if (field == nullptr) {
621           reserved_field = descriptor->IsReservedName(field_name);
622         }
623       }
624       if (field == nullptr && !reserved_field) {
625         if (!allow_unknown_field_) {
626           ReportError(absl::StrCat("Message type \"", descriptor->full_name(),
627                                    "\" has no field named \"", field_name,
628                                    "\"."));
629           return false;
630         } else {
631           ReportWarning(absl::StrCat("Message type \"", descriptor->full_name(),
632                                      "\" has no field named \"", field_name,
633                                      "\"."));
634         }
635       }
636     }
637 
638     // Skips unknown or reserved fields.
639     if (field == nullptr) {
640       ABSL_CHECK(allow_unknown_field_ || allow_unknown_extension_ ||
641                  reserved_field);
642 
643       // Try to guess the type of this field.
644       // If this field is not a message, there should be a ":" between the
645       // field name and the field value and also the field value should not
646       // start with "{" or "<" which indicates the beginning of a message body.
647       // If there is no ":" or there is a "{" or "<" after ":", this field has
648       // to be a message or the input is ill-formed.
649       if (TryConsumeBeforeWhitespace(":")) {
650         TryConsumeWhitespace();
651         if (!LookingAt("{") && !LookingAt("<")) {
652           return SkipFieldValue();
653         }
654       }
655       return SkipFieldMessage();
656     }
657 
658     if (field->options().deprecated()) {
659       ReportWarning(absl::StrCat("text format contains deprecated field \"",
660                                  field_name, "\""));
661     }
662 
663     if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
664       // Fail if the field is not repeated and it has already been specified.
665       if (!field->is_repeated() && reflection->HasField(*message, field)) {
666         ReportError(absl::StrCat("Non-repeated field \"", field_name,
667                                  "\" is specified multiple times."));
668         return false;
669       }
670       // Fail if the field is a member of a oneof and another member has already
671       // been specified.
672       const OneofDescriptor* oneof = field->containing_oneof();
673       if (oneof != nullptr && reflection->HasOneof(*message, oneof)) {
674         const FieldDescriptor* other_field =
675             reflection->GetOneofFieldDescriptor(*message, oneof);
676         ReportError(absl::StrCat("Field \"", field_name,
677                                  "\" is specified along with "
678                                  "field \"",
679                                  other_field->name(),
680                                  "\", another member "
681                                  "of oneof \"",
682                                  oneof->name(), "\"."));
683         return false;
684       }
685     }
686 
687     // Perform special handling for embedded message types.
688     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
689       // ':' is optional here.
690       bool consumed_semicolon = TryConsumeBeforeWhitespace(":");
691       if (consumed_semicolon) {
692         TryConsumeWhitespace();
693       }
694       if (consumed_semicolon && field->options().weak() &&
695           LookingAtType(io::Tokenizer::TYPE_STRING)) {
696         // we are getting a bytes string for a weak field.
697         std::string tmp;
698         DO(ConsumeString(&tmp));
699         MessageFactory* factory =
700             finder_ ? finder_->FindExtensionFactory(field) : nullptr;
701         reflection->MutableMessage(message, field, factory)
702             ->ParseFromString(tmp);
703         goto label_skip_parsing;
704       }
705     } else {
706       // ':' is required here.
707       DO(ConsumeBeforeWhitespace(":"));
708       TryConsumeWhitespace();
709     }
710 
711     if (field->is_repeated() && TryConsume("[")) {
712       // Short repeated format, e.g.  "foo: [1, 2, 3]".
713       if (!TryConsume("]")) {
714         // "foo: []" is treated as empty.
715         while (true) {
716           if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
717             // Perform special handling for embedded message types.
718             DO(ConsumeFieldMessage(message, reflection, field));
719           } else {
720             DO(ConsumeFieldValue(message, reflection, field));
721           }
722           if (TryConsume("]")) {
723             break;
724           }
725           DO(Consume(","));
726         }
727       }
728     } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
729       DO(ConsumeFieldMessage(message, reflection, field));
730     } else {
731       DO(ConsumeFieldValue(message, reflection, field));
732     }
733   label_skip_parsing:
734     // For historical reasons, fields may optionally be separated by commas or
735     // semicolons.
736     TryConsume(";") || TryConsume(",");
737 
738     // If a parse info tree exists, add the location for the parsed
739     // field.
740     if (parse_info_tree_ != nullptr) {
741       int end_line = tokenizer_.previous().line;
742       int end_column = tokenizer_.previous().end_column;
743 
744       RecordLocation(parse_info_tree_, field,
745                      ParseLocationRange(ParseLocation(start_line, start_column),
746                                         ParseLocation(end_line, end_column)));
747     }
748 
749     return true;
750   }
751 
752   // Skips the next field including the field's name and value.
SkipField()753   bool SkipField() {
754     std::string field_name;
755     if (TryConsume("[")) {
756       // Extension name or type URL.
757       DO(ConsumeTypeUrlOrFullTypeName(&field_name));
758       DO(ConsumeBeforeWhitespace("]"));
759     } else {
760       DO(ConsumeIdentifierBeforeWhitespace(&field_name));
761     }
762     TryConsumeWhitespace();
763 
764     // Try to guess the type of this field.
765     // If this field is not a message, there should be a ":" between the
766     // field name and the field value and also the field value should not
767     // start with "{" or "<" which indicates the beginning of a message body.
768     // If there is no ":" or there is a "{" or "<" after ":", this field has
769     // to be a message or the input is ill-formed.
770     if (TryConsumeBeforeWhitespace(":")) {
771       TryConsumeWhitespace();
772       if (!LookingAt("{") && !LookingAt("<")) {
773         DO(SkipFieldValue());
774       } else {
775         DO(SkipFieldMessage());
776       }
777     } else {
778       DO(SkipFieldMessage());
779     }
780     // For historical reasons, fields may optionally be separated by commas or
781     // semicolons.
782     TryConsume(";") || TryConsume(",");
783     return true;
784   }
785 
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)786   bool ConsumeFieldMessage(Message* message, const Reflection* reflection,
787                            const FieldDescriptor* field) {
788     if (--recursion_limit_ < 0) {
789       ReportError(
790           absl::StrCat("Message is too deep, the parser exceeded the "
791                        "configured recursion limit of ",
792                        initial_recursion_limit_, "."));
793       return false;
794     }
795     // If the parse information tree is not nullptr, create a nested one
796     // for the nested message.
797     ParseInfoTree* parent = parse_info_tree_;
798     if (parent != nullptr) {
799       parse_info_tree_ = CreateNested(parent, field);
800     }
801 
802     std::string delimiter;
803     DO(ConsumeMessageDelimiter(&delimiter));
804     MessageFactory* factory =
805         finder_ ? finder_->FindExtensionFactory(field) : nullptr;
806     if (field->is_repeated()) {
807       DO(ConsumeMessage(reflection->AddMessage(message, field, factory),
808                         delimiter));
809     } else {
810       DO(ConsumeMessage(reflection->MutableMessage(message, field, factory),
811                         delimiter));
812     }
813 
814     ++recursion_limit_;
815 
816     // Reset the parse information tree.
817     parse_info_tree_ = parent;
818     return true;
819   }
820 
821   // Skips the whole body of a message including the beginning delimiter and
822   // the ending delimiter.
SkipFieldMessage()823   bool SkipFieldMessage() {
824     if (--recursion_limit_ < 0) {
825       ReportError(
826           absl::StrCat("Message is too deep, the parser exceeded the "
827                        "configured recursion limit of ",
828                        initial_recursion_limit_, "."));
829       return false;
830     }
831 
832     std::string delimiter;
833     DO(ConsumeMessageDelimiter(&delimiter));
834     while (!LookingAt(">") && !LookingAt("}")) {
835       DO(SkipField());
836     }
837     DO(Consume(delimiter));
838 
839     ++recursion_limit_;
840     return true;
841   }
842 
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)843   bool ConsumeFieldValue(Message* message, const Reflection* reflection,
844                          const FieldDescriptor* field) {
845 // Define an easy to use macro for setting fields. This macro checks
846 // to see if the field is repeated (in which case we need to use the Add
847 // methods or not (in which case we need to use the Set methods).
848 // When checking for no-op operations, We verify that both the existing value in
849 // the message and the new value are the default. If the existing field value is
850 // not the default, setting it to the default should not be treated as a no-op.
851 // The pointer of this is kept in no_op_fields_ for bookkeeping.
852 #define SET_FIELD(CPPTYPE, CPPTYPELCASE, VALUE)                    \
853   if (field->is_repeated()) {                                      \
854     reflection->Add##CPPTYPE(message, field, VALUE);               \
855   } else {                                                         \
856     if (no_op_fields_ && !field->has_presence() &&                 \
857         field->default_value_##CPPTYPELCASE() ==                   \
858             reflection->Get##CPPTYPE(*message, field) &&           \
859         field->default_value_##CPPTYPELCASE() == VALUE) {          \
860       no_op_fields_->ids_.insert(                                  \
861           UnsetFieldsMetadata::GetUnsetFieldId(*message, *field)); \
862     } else {                                                       \
863       reflection->Set##CPPTYPE(message, field, std::move(VALUE));  \
864     }                                                              \
865   }
866 
867     switch (field->cpp_type()) {
868       case FieldDescriptor::CPPTYPE_INT32: {
869         int64_t value;
870         DO(ConsumeSignedInteger(&value, kint32max));
871         SET_FIELD(Int32, int32, static_cast<int32_t>(value));
872         break;
873       }
874 
875       case FieldDescriptor::CPPTYPE_UINT32: {
876         uint64_t value;
877         DO(ConsumeUnsignedInteger(&value, kuint32max));
878         SET_FIELD(UInt32, uint32, static_cast<uint32_t>(value));
879         break;
880       }
881 
882       case FieldDescriptor::CPPTYPE_INT64: {
883         int64_t value;
884         DO(ConsumeSignedInteger(&value, kint64max));
885         SET_FIELD(Int64, int64, value);
886         break;
887       }
888 
889       case FieldDescriptor::CPPTYPE_UINT64: {
890         uint64_t value;
891         DO(ConsumeUnsignedInteger(&value, kuint64max));
892         SET_FIELD(UInt64, uint64, value);
893         break;
894       }
895 
896       case FieldDescriptor::CPPTYPE_FLOAT: {
897         double value;
898         DO(ConsumeDouble(&value));
899         SET_FIELD(Float, float, io::SafeDoubleToFloat(value));
900         break;
901       }
902 
903       case FieldDescriptor::CPPTYPE_DOUBLE: {
904         double value;
905         DO(ConsumeDouble(&value));
906         SET_FIELD(Double, double, value);
907         break;
908       }
909 
910       case FieldDescriptor::CPPTYPE_STRING: {
911         std::string value;
912         DO(ConsumeString(&value));
913         SET_FIELD(String, string, std::move(value));
914         break;
915       }
916 
917       case FieldDescriptor::CPPTYPE_BOOL: {
918         if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
919           uint64_t value;
920           DO(ConsumeUnsignedInteger(&value, 1));
921           SET_FIELD(Bool, bool, static_cast<bool>(value));
922         } else {
923           std::string value;
924           DO(ConsumeIdentifier(&value));
925           if (value == "true" || value == "True" || value == "t") {
926             SET_FIELD(Bool, bool, true);
927           } else if (value == "false" || value == "False" || value == "f") {
928             SET_FIELD(Bool, bool, false);
929           } else {
930             ReportError(absl::StrCat("Invalid value for boolean field \"",
931                                      field->name(), "\". Value: \"", value,
932                                      "\"."));
933             return false;
934           }
935         }
936         break;
937       }
938 
939       case FieldDescriptor::CPPTYPE_ENUM: {
940         std::string value;
941         int64_t int_value = kint64max;
942         const EnumDescriptor* enum_type = field->enum_type();
943         const EnumValueDescriptor* enum_value = nullptr;
944 
945         if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
946           DO(ConsumeIdentifier(&value));
947           // Find the enumeration value.
948           enum_value = enum_type->FindValueByName(value);
949 
950         } else if (LookingAt("-") ||
951                    LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
952           DO(ConsumeSignedInteger(&int_value, kint32max));
953           value = absl::StrCat(int_value);  // for error reporting
954           enum_value = enum_type->FindValueByNumber(int_value);
955         } else {
956           ReportError(absl::StrCat("Expected integer or identifier, got: ",
957                                    tokenizer_.current().text));
958           return false;
959         }
960 
961         if (enum_value == nullptr) {
962           if (int_value != kint64max &&
963               !field->legacy_enum_field_treated_as_closed()) {
964             SET_FIELD(EnumValue, int64, int_value);
965             return true;
966           } else if (!allow_unknown_enum_) {
967             ReportError(absl::StrCat("Unknown enumeration value of \"", value,
968                                      "\" for field \"", field->name(), "\"."));
969             return false;
970           } else {
971             ReportWarning(absl::StrCat("Unknown enumeration value of \"", value,
972                                        "\" for field \"", field->name(),
973                                        "\"."));
974             return true;
975           }
976         }
977 
978         SET_FIELD(Enum, enum, enum_value);
979         break;
980       }
981 
982       case FieldDescriptor::CPPTYPE_MESSAGE: {
983         // We should never get here. Put here instead of a default
984         // so that if new types are added, we get a nice compiler warning.
985         ABSL_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
986         break;
987       }
988     }
989 #undef SET_FIELD
990     return true;
991   }
992 
SkipFieldValue()993   bool SkipFieldValue() {
994     if (--recursion_limit_ < 0) {
995       ReportError(
996           absl::StrCat("Message is too deep, the parser exceeded the "
997                        "configured recursion limit of ",
998                        initial_recursion_limit_, "."));
999       return false;
1000     }
1001 
1002     if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1003       while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1004         tokenizer_.Next();
1005       }
1006       ++recursion_limit_;
1007       return true;
1008     }
1009     if (TryConsume("[")) {
1010       if (!TryConsume("]")) {
1011         while (true) {
1012           if (!LookingAt("{") && !LookingAt("<")) {
1013             DO(SkipFieldValue());
1014           } else {
1015             DO(SkipFieldMessage());
1016           }
1017           if (TryConsume("]")) {
1018             break;
1019           }
1020           DO(Consume(","));
1021         }
1022       }
1023       ++recursion_limit_;
1024       return true;
1025     }
1026     // Possible field values other than string:
1027     //   12345        => TYPE_INTEGER
1028     //   -12345       => TYPE_SYMBOL + TYPE_INTEGER
1029     //   1.2345       => TYPE_FLOAT
1030     //   -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
1031     //   inf          => TYPE_IDENTIFIER
1032     //   -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
1033     //   TYPE_INTEGER => TYPE_IDENTIFIER
1034     // Divides them into two group, one with TYPE_SYMBOL
1035     // and the other without:
1036     //   Group one:
1037     //     12345        => TYPE_INTEGER
1038     //     1.2345       => TYPE_FLOAT
1039     //     inf          => TYPE_IDENTIFIER
1040     //     TYPE_INTEGER => TYPE_IDENTIFIER
1041     //   Group two:
1042     //     -12345       => TYPE_SYMBOL + TYPE_INTEGER
1043     //     -1.2345      => TYPE_SYMBOL + TYPE_FLOAT
1044     //     -inf         => TYPE_SYMBOL + TYPE_IDENTIFIER
1045     // As we can see, the field value consists of an optional '-' and one of
1046     // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
1047     bool has_minus = TryConsume("-");
1048     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
1049         !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
1050         !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1051       std::string text = tokenizer_.current().text;
1052       ReportError(
1053           absl::StrCat("Cannot skip field value, unexpected token: ", text));
1054       ++recursion_limit_;
1055       return false;
1056     }
1057     // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
1058     // value while other combinations all generate valid values.
1059     // We check if the value of this combination is valid here.
1060     // TYPE_IDENTIFIER after a '-' should be one of the float values listed
1061     // below:
1062     //   inf, inff, infinity, nan
1063     if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1064       std::string text = tokenizer_.current().text;
1065       absl::AsciiStrToLower(&text);
1066       if (text != "inf" &&
1067           text != "infinity" && text != "nan") {
1068         ReportError(absl::StrCat("Invalid float number: ", text));
1069         ++recursion_limit_;
1070         return false;
1071       }
1072     }
1073     tokenizer_.Next();
1074     ++recursion_limit_;
1075     return true;
1076   }
1077 
1078   // Returns true if the current token's text is equal to that specified.
LookingAt(const std::string & text)1079   bool LookingAt(const std::string& text) {
1080     return tokenizer_.current().text == text;
1081   }
1082 
1083   // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)1084   bool LookingAtType(io::Tokenizer::TokenType token_type) {
1085     return tokenizer_.current().type == token_type;
1086   }
1087 
1088   // Consumes an identifier and saves its value in the identifier parameter.
1089   // Returns false if the token is not of type IDENTIFIER.
ConsumeIdentifier(std::string * identifier)1090   bool ConsumeIdentifier(std::string* identifier) {
1091     if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1092       *identifier = tokenizer_.current().text;
1093       tokenizer_.Next();
1094       return true;
1095     }
1096 
1097     // If allow_field_number_ or allow_unknown_field_ is true, we should able
1098     // to parse integer identifiers.
1099     if ((allow_field_number_ || allow_unknown_field_ ||
1100          allow_unknown_extension_) &&
1101         LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1102       *identifier = tokenizer_.current().text;
1103       tokenizer_.Next();
1104       return true;
1105     }
1106 
1107     ReportError(
1108         absl::StrCat("Expected identifier, got: ", tokenizer_.current().text));
1109     return false;
1110   }
1111 
1112   // Similar to `ConsumeIdentifier`, but any following whitespace token may
1113   // be reported.
ConsumeIdentifierBeforeWhitespace(std::string * identifier)1114   bool ConsumeIdentifierBeforeWhitespace(std::string* identifier) {
1115     tokenizer_.set_report_whitespace(true);
1116     bool result = ConsumeIdentifier(identifier);
1117     tokenizer_.set_report_whitespace(false);
1118     return result;
1119   }
1120 
1121   // Consume a string of form "<id1>.<id2>....<idN>".
ConsumeFullTypeName(std::string * name)1122   bool ConsumeFullTypeName(std::string* name) {
1123     DO(ConsumeIdentifier(name));
1124     while (TryConsume(".")) {
1125       std::string part;
1126       DO(ConsumeIdentifier(&part));
1127       absl::StrAppend(name, ".", part);
1128     }
1129     return true;
1130   }
1131 
ConsumeTypeUrlOrFullTypeName(std::string * name)1132   bool ConsumeTypeUrlOrFullTypeName(std::string* name) {
1133     DO(ConsumeIdentifier(name));
1134     while (true) {
1135       std::string connector;
1136       if (TryConsume(".")) {
1137         connector = ".";
1138       } else if (TryConsume("/")) {
1139         connector = "/";
1140       } else {
1141         break;
1142       }
1143       std::string part;
1144       DO(ConsumeIdentifier(&part));
1145       *name += connector;
1146       *name += part;
1147     }
1148     return true;
1149   }
1150 
1151   // Consumes a string and saves its value in the text parameter.
1152   // Returns false if the token is not of type STRING.
ConsumeString(std::string * text)1153   bool ConsumeString(std::string* text) {
1154     if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
1155       ReportError(
1156           absl::StrCat("Expected string, got: ", tokenizer_.current().text));
1157       return false;
1158     }
1159 
1160     text->clear();
1161     while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1162       io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
1163 
1164       tokenizer_.Next();
1165     }
1166 
1167     return true;
1168   }
1169 
1170   // Consumes a uint64_t and saves its value in the value parameter.
1171   // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64_t * value,uint64_t max_value)1172   bool ConsumeUnsignedInteger(uint64_t* value, uint64_t max_value) {
1173     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1174       ReportError(
1175           absl::StrCat("Expected integer, got: ", tokenizer_.current().text));
1176       return false;
1177     }
1178 
1179     if (!io::Tokenizer::ParseInteger(tokenizer_.current().text, max_value,
1180                                      value)) {
1181       ReportError(absl::StrCat("Integer out of range (",
1182                                tokenizer_.current().text, ")"));
1183       return false;
1184     }
1185 
1186     tokenizer_.Next();
1187     return true;
1188   }
1189 
1190   // Consumes an int64_t and saves its value in the value parameter.
1191   // Note that since the tokenizer does not support negative numbers,
1192   // we actually may consume an additional token (for the minus sign) in this
1193   // method. Returns false if the token is not an integer
1194   // (signed or otherwise).
ConsumeSignedInteger(int64_t * value,uint64_t max_value)1195   bool ConsumeSignedInteger(int64_t* value, uint64_t max_value) {
1196     bool negative = false;
1197 
1198     if (TryConsume("-")) {
1199       negative = true;
1200       // Two's complement always allows one more negative integer than
1201       // positive.
1202       ++max_value;
1203     }
1204 
1205     uint64_t unsigned_value;
1206 
1207     DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
1208 
1209     if (negative) {
1210       if ((static_cast<uint64_t>(kint64max) + 1) == unsigned_value) {
1211         *value = kint64min;
1212       } else {
1213         *value = -static_cast<int64_t>(unsigned_value);
1214       }
1215     } else {
1216       *value = static_cast<int64_t>(unsigned_value);
1217     }
1218 
1219     return true;
1220   }
1221 
1222   // Consumes a double and saves its value in the value parameter.
1223   // Accepts decimal numbers only, rejects hex or oct numbers.
ConsumeUnsignedDecimalAsDouble(double * value,uint64_t max_value)1224   bool ConsumeUnsignedDecimalAsDouble(double* value, uint64_t max_value) {
1225     if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1226       ReportError(
1227           absl::StrCat("Expected integer, got: ", tokenizer_.current().text));
1228       return false;
1229     }
1230 
1231     const std::string& text = tokenizer_.current().text;
1232     if (IsHexNumber(text) || IsOctNumber(text)) {
1233       ReportError(absl::StrCat("Expect a decimal number, got: ", text));
1234       return false;
1235     }
1236 
1237     uint64_t uint64_value;
1238     if (io::Tokenizer::ParseInteger(text, max_value, &uint64_value)) {
1239       *value = static_cast<double>(uint64_value);
1240     } else {
1241       // Uint64 overflow, attempt to parse as a double instead.
1242       *value = io::Tokenizer::ParseFloat(text);
1243     }
1244 
1245     tokenizer_.Next();
1246     return true;
1247   }
1248 
1249   // Consumes a double and saves its value in the value parameter.
1250   // Note that since the tokenizer does not support negative numbers,
1251   // we actually may consume an additional token (for the minus sign) in this
1252   // method. Returns false if the token is not a double
1253   // (signed or otherwise).
ConsumeDouble(double * value)1254   bool ConsumeDouble(double* value) {
1255     bool negative = false;
1256 
1257     if (TryConsume("-")) {
1258       negative = true;
1259     }
1260 
1261     // A double can actually be an integer, according to the tokenizer.
1262     // Therefore, we must check both cases here.
1263     if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1264       // We have found an integer value for the double.
1265       DO(ConsumeUnsignedDecimalAsDouble(value, kuint64max));
1266     } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
1267       // We have found a float value for the double.
1268       *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
1269 
1270       // Mark the current token as consumed.
1271       tokenizer_.Next();
1272     } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1273       std::string text = tokenizer_.current().text;
1274       absl::AsciiStrToLower(&text);
1275       if (text == "inf" ||
1276           text == "infinity") {
1277         *value = std::numeric_limits<double>::infinity();
1278         tokenizer_.Next();
1279       } else if (text == "nan") {
1280         *value = std::numeric_limits<double>::quiet_NaN();
1281         tokenizer_.Next();
1282       } else {
1283         ReportError(absl::StrCat("Expected double, got: ", text));
1284         return false;
1285       }
1286     } else {
1287       ReportError(
1288           absl::StrCat("Expected double, got: ", tokenizer_.current().text));
1289       return false;
1290     }
1291 
1292     if (negative) {
1293       *value = -*value;
1294     }
1295 
1296     return true;
1297   }
1298 
1299   // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name"
1300   // or "type.googleprod.com/full.type.Name"
ConsumeAnyTypeUrl(std::string * full_type_name,std::string * prefix)1301   bool ConsumeAnyTypeUrl(std::string* full_type_name, std::string* prefix) {
1302     // TODO Extend Consume() to consume multiple tokens at once, so that
1303     // this code can be written as just DO(Consume(kGoogleApisTypePrefix)).
1304     DO(ConsumeIdentifier(prefix));
1305     while (TryConsume(".")) {
1306       std::string url;
1307       DO(ConsumeIdentifier(&url));
1308       absl::StrAppend(prefix, ".", url);
1309     }
1310     DO(Consume("/"));
1311     absl::StrAppend(prefix, "/");
1312     DO(ConsumeFullTypeName(full_type_name));
1313 
1314     return true;
1315   }
1316 
1317   // A helper function for reconstructing Any::value. Consumes a text of
1318   // full_type_name, then serializes it into serialized_value.
ConsumeAnyValue(const Descriptor * value_descriptor,std::string * serialized_value)1319   bool ConsumeAnyValue(const Descriptor* value_descriptor,
1320                        std::string* serialized_value) {
1321     DynamicMessageFactory factory;
1322     const Message* value_prototype = factory.GetPrototype(value_descriptor);
1323     if (value_prototype == nullptr) {
1324       return false;
1325     }
1326     std::unique_ptr<Message> value(value_prototype->New());
1327     std::string sub_delimiter;
1328     DO(ConsumeMessageDelimiter(&sub_delimiter));
1329     DO(ConsumeMessage(value.get(), sub_delimiter));
1330 
1331     if (allow_partial_) {
1332       value->AppendPartialToString(serialized_value);
1333     } else {
1334       if (!value->IsInitialized()) {
1335         ReportError(absl::StrCat(
1336             "Value of type \"", value_descriptor->full_name(),
1337             "\" stored in google.protobuf.Any has missing required fields"));
1338         return false;
1339       }
1340       value->AppendToString(serialized_value);
1341     }
1342     return true;
1343   }
1344 
1345   // Consumes a token and confirms that it matches that specified in the
1346   // value parameter. Returns false if the token found does not match that
1347   // which was specified.
Consume(const std::string & value)1348   bool Consume(const std::string& value) {
1349     const std::string& current_value = tokenizer_.current().text;
1350 
1351     if (current_value != value) {
1352       ReportError(absl::StrCat("Expected \"", value, "\", found \"",
1353                                current_value, "\"."));
1354       return false;
1355     }
1356 
1357     tokenizer_.Next();
1358 
1359     return true;
1360   }
1361 
1362   // Similar to `Consume`, but the following token may be tokenized as
1363   // TYPE_WHITESPACE.
ConsumeBeforeWhitespace(const std::string & value)1364   bool ConsumeBeforeWhitespace(const std::string& value) {
1365     // Report whitespace after this token, but only once.
1366     tokenizer_.set_report_whitespace(true);
1367     bool result = Consume(value);
1368     tokenizer_.set_report_whitespace(false);
1369     return result;
1370   }
1371 
1372   // Attempts to consume the supplied value. Returns false if the token found
1373   // does not match the value specified.
TryConsume(const std::string & value)1374   bool TryConsume(const std::string& value) {
1375     if (tokenizer_.current().text == value) {
1376       tokenizer_.Next();
1377       return true;
1378     } else {
1379       return false;
1380     }
1381   }
1382 
1383   // Similar to `TryConsume`, but the following token may be tokenized as
1384   // TYPE_WHITESPACE.
TryConsumeBeforeWhitespace(const std::string & value)1385   bool TryConsumeBeforeWhitespace(const std::string& value) {
1386     // Report whitespace after this token, but only once.
1387     tokenizer_.set_report_whitespace(true);
1388     bool result = TryConsume(value);
1389     tokenizer_.set_report_whitespace(false);
1390     return result;
1391   }
1392 
TryConsumeWhitespace()1393   bool TryConsumeWhitespace() {
1394     had_silent_marker_ = false;
1395     if (LookingAtType(io::Tokenizer::TYPE_WHITESPACE)) {
1396       if (tokenizer_.current().text ==
1397           absl::StrCat(" ", internal::kDebugStringSilentMarkerForDetection)) {
1398         had_silent_marker_ = true;
1399       }
1400       tokenizer_.Next();
1401       return true;
1402     }
1403     return false;
1404   }
1405 
1406   // An internal instance of the Tokenizer's error collector, used to
1407   // collect any base-level parse errors and feed them to the ParserImpl.
1408   class ParserErrorCollector : public io::ErrorCollector {
1409    public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)1410     explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser)
1411         : parser_(parser) {}
1412 
1413     ParserErrorCollector(const ParserErrorCollector&) = delete;
1414     ParserErrorCollector& operator=(const ParserErrorCollector&) = delete;
~ParserErrorCollector()1415     ~ParserErrorCollector() override {}
1416 
RecordError(int line,int column,absl::string_view message)1417     void RecordError(int line, int column, absl::string_view message) override {
1418       parser_->ReportError(line, column, message);
1419     }
1420 
RecordWarning(int line,int column,absl::string_view message)1421     void RecordWarning(int line, int column,
1422                        absl::string_view message) override {
1423       parser_->ReportWarning(line, column, message);
1424     }
1425 
1426    private:
1427     TextFormat::Parser::ParserImpl* parser_;
1428   };
1429 
1430   io::ErrorCollector* error_collector_;
1431   const TextFormat::Finder* finder_;
1432   ParseInfoTree* parse_info_tree_;
1433   ParserErrorCollector tokenizer_error_collector_;
1434   io::Tokenizer tokenizer_;
1435   const Descriptor* root_message_type_;
1436   SingularOverwritePolicy singular_overwrite_policy_;
1437   const bool allow_case_insensitive_field_;
1438   const bool allow_unknown_field_;
1439   const bool allow_unknown_extension_;
1440   const bool allow_unknown_enum_;
1441   const bool allow_field_number_;
1442   const bool allow_partial_;
1443   const int initial_recursion_limit_;
1444   int recursion_limit_;
1445   bool had_silent_marker_;
1446   bool had_errors_;
1447   UnsetFieldsMetadata* no_op_fields_{};
1448 
1449 };
1450 
1451 // ===========================================================================
1452 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1453 // from the Printer found in //third_party/protobuf/io/printer.h
1454 class TextFormat::Printer::TextGenerator
1455     : public TextFormat::BaseTextGenerator {
1456  public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)1457   explicit TextGenerator(io::ZeroCopyOutputStream* output,
1458                          int initial_indent_level)
1459       : output_(output),
1460         buffer_(nullptr),
1461         buffer_size_(0),
1462         at_start_of_line_(true),
1463         failed_(false),
1464         insert_silent_marker_(false),
1465         indent_level_(initial_indent_level),
1466         initial_indent_level_(initial_indent_level) {}
1467 
TextGenerator(io::ZeroCopyOutputStream * output,bool insert_silent_marker,int initial_indent_level)1468   explicit TextGenerator(io::ZeroCopyOutputStream* output,
1469                          bool insert_silent_marker, int initial_indent_level)
1470       : output_(output),
1471         buffer_(nullptr),
1472         buffer_size_(0),
1473         at_start_of_line_(true),
1474         failed_(false),
1475         insert_silent_marker_(insert_silent_marker),
1476         indent_level_(initial_indent_level),
1477         initial_indent_level_(initial_indent_level) {}
1478 
1479   TextGenerator(const TextGenerator&) = delete;
1480   TextGenerator& operator=(const TextGenerator&) = delete;
~TextGenerator()1481   ~TextGenerator() override {
1482     // Only BackUp() if we're sure we've successfully called Next() at least
1483     // once.
1484     if (!failed_) {
1485       output_->BackUp(buffer_size_);
1486     }
1487   }
1488 
1489   // Indent text by two spaces.  After calling Indent(), two spaces will be
1490   // inserted at the beginning of each line of text.  Indent() may be called
1491   // multiple times to produce deeper indents.
Indent()1492   void Indent() override { ++indent_level_; }
1493 
1494   // Reduces the current indent level by two spaces, or crashes if the indent
1495   // level is zero.
Outdent()1496   void Outdent() override {
1497     if (indent_level_ == 0 || indent_level_ < initial_indent_level_) {
1498       ABSL_DLOG(FATAL) << " Outdent() without matching Indent().";
1499       return;
1500     }
1501 
1502     --indent_level_;
1503   }
1504 
GetCurrentIndentationSize() const1505   size_t GetCurrentIndentationSize() const override {
1506     return 2 * indent_level_;
1507   }
1508 
1509   // Print text to the output stream.
Print(const char * text,size_t size)1510   void Print(const char* text, size_t size) override {
1511     if (indent_level_ > 0) {
1512       size_t pos = 0;  // The number of bytes we've written so far.
1513       for (size_t i = 0; i < size; i++) {
1514         if (text[i] == '\n') {
1515           // Saw newline.  If there is more text, we may need to insert an
1516           // indent here.  So, write what we have so far, including the '\n'.
1517           Write(text + pos, i - pos + 1);
1518           pos = i + 1;
1519 
1520           // Setting this true will cause the next Write() to insert an indent
1521           // first.
1522           at_start_of_line_ = true;
1523         }
1524       }
1525       // Write the rest.
1526       Write(text + pos, size - pos);
1527     } else {
1528       Write(text, size);
1529       if (size > 0 && text[size - 1] == '\n') {
1530         at_start_of_line_ = true;
1531       }
1532     }
1533   }
1534 
1535   // True if any write to the underlying stream failed.  (We don't just
1536   // crash in this case because this is an I/O failure, not a programming
1537   // error.)
failed() const1538   bool failed() const { return failed_; }
1539 
PrintMaybeWithMarker(MarkerToken,absl::string_view text)1540   void PrintMaybeWithMarker(MarkerToken, absl::string_view text) override {
1541     Print(text.data(), text.size());
1542     if (ConsumeInsertSilentMarker()) {
1543       PrintLiteral(internal::kDebugStringSilentMarker);
1544     }
1545   }
1546 
PrintMaybeWithMarker(MarkerToken,absl::string_view text_head,absl::string_view text_tail)1547   void PrintMaybeWithMarker(MarkerToken, absl::string_view text_head,
1548                             absl::string_view text_tail) override {
1549     Print(text_head.data(), text_head.size());
1550     if (ConsumeInsertSilentMarker()) {
1551       PrintLiteral(internal::kDebugStringSilentMarker);
1552     }
1553     Print(text_tail.data(), text_tail.size());
1554   }
1555 
1556  private:
Write(const char * data,size_t size)1557   void Write(const char* data, size_t size) {
1558     if (failed_) return;
1559     if (size == 0) return;
1560 
1561     if (at_start_of_line_) {
1562       // Insert an indent.
1563       at_start_of_line_ = false;
1564       WriteIndent();
1565       if (failed_) return;
1566     }
1567 
1568     while (static_cast<int64_t>(size) > buffer_size_) {
1569       // Data exceeds space in the buffer.  Copy what we can and request a
1570       // new buffer.
1571       if (buffer_size_ > 0) {
1572         memcpy(buffer_, data, buffer_size_);
1573         data += buffer_size_;
1574         size -= buffer_size_;
1575       }
1576       void* void_buffer = nullptr;
1577       failed_ = !output_->Next(&void_buffer, &buffer_size_);
1578       if (failed_) return;
1579       buffer_ = reinterpret_cast<char*>(void_buffer);
1580     }
1581 
1582     // Buffer is big enough to receive the data; copy it.
1583     memcpy(buffer_, data, size);
1584     buffer_ += size;
1585     buffer_size_ -= size;
1586   }
1587 
WriteIndent()1588   void WriteIndent() {
1589     if (indent_level_ == 0) {
1590       return;
1591     }
1592     ABSL_DCHECK(!failed_);
1593     int size = GetCurrentIndentationSize();
1594 
1595     while (size > buffer_size_) {
1596       // Data exceeds space in the buffer. Write what we can and request a new
1597       // buffer.
1598       if (buffer_size_ > 0) {
1599         memset(buffer_, ' ', buffer_size_);
1600       }
1601       size -= buffer_size_;
1602       void* void_buffer;
1603       failed_ = !output_->Next(&void_buffer, &buffer_size_);
1604       if (failed_) return;
1605       buffer_ = reinterpret_cast<char*>(void_buffer);
1606     }
1607 
1608     // Buffer is big enough to receive the data; copy it.
1609     memset(buffer_, ' ', size);
1610     buffer_ += size;
1611     buffer_size_ -= size;
1612   }
1613 
1614   // Return the current value of insert_silent_marker_. If it is true, set it
1615   // to false as we assume that a silent marker is inserted after a call to this
1616   // function.
ConsumeInsertSilentMarker()1617   bool ConsumeInsertSilentMarker() {
1618     if (insert_silent_marker_) {
1619       insert_silent_marker_ = false;
1620       return true;
1621     }
1622     return false;
1623   }
1624 
1625   io::ZeroCopyOutputStream* const output_;
1626   char* buffer_;
1627   int buffer_size_;
1628   bool at_start_of_line_;
1629   bool failed_;
1630   // This flag is false when inserting silent marker is disabled or a silent
1631   // marker has been inserted.
1632   bool insert_silent_marker_;
1633 
1634   int indent_level_;
1635   int initial_indent_level_;
1636 };
1637 
1638 // ===========================================================================
1639 //  An internal field value printer that may insert a silent marker in
1640 //  DebugStrings.
1641 class TextFormat::Printer::DebugStringFieldValuePrinter
1642     : public TextFormat::FastFieldValuePrinter {
1643  public:
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1644   void PrintMessageStart(const Message& /*message*/, int /*field_index*/,
1645                          int /*field_count*/, bool single_line_mode,
1646                          BaseTextGenerator* generator) const override {
1647     if (single_line_mode) {
1648       generator->PrintMaybeWithMarker(MarkerToken(), " ", "{ ");
1649     } else {
1650       generator->PrintMaybeWithMarker(MarkerToken(), " ", "{\n");
1651     }
1652   }
1653 };
1654 
1655 namespace {
1656 
1657 // Returns true if `ch` needs to be escaped in TextFormat, independent of any
1658 // UTF-8 validity issues.
DefinitelyNeedsEscape(unsigned char ch)1659 bool DefinitelyNeedsEscape(unsigned char ch) {
1660   if (ch >= 0x80) {
1661     return false;  // High byte; no escapes necessary if UTF-8 is valid.
1662   }
1663 
1664   if (!absl::ascii_isprint(ch)) {
1665     return true;  // Unprintable characters need escape.
1666   }
1667 
1668   switch (ch) {
1669     case '\"':
1670     case '\'':
1671     case '\\':
1672       // These characters need escapes despite being printable.
1673       return true;
1674   }
1675 
1676   return false;
1677 }
1678 
1679 // Returns true if this is a high byte that requires UTF-8 validation.  If the
1680 // UTF-8 validation fails, we must escape the byte.
NeedsUtf8Validation(unsigned char ch)1681 bool NeedsUtf8Validation(unsigned char ch) { return ch > 127; }
1682 
1683 // Returns the number of bytes in the prefix of `val` that do not need escaping.
1684 // This is like utf8_range::SpanStructurallyValid(), except that it also
1685 // terminates at any ASCII char that needs to be escaped in TextFormat (any char
1686 // that has `DefinitelyNeedsEscape(ch) == true`).
1687 //
1688 // If we could get a variant of utf8_range::SpanStructurallyValid() that could
1689 // terminate on any of these chars, that might be more efficient, but it would
1690 // be much more complicated to modify that heavily SIMD code.
SkipPassthroughBytes(absl::string_view val)1691 size_t SkipPassthroughBytes(absl::string_view val) {
1692   for (size_t i = 0; i < val.size(); i++) {
1693     unsigned char uc = val[i];
1694     if (DefinitelyNeedsEscape(uc)) return i;
1695     if (NeedsUtf8Validation(uc)) {
1696       // Find the end of this region of consecutive high bytes, so that we only
1697       // give high bytes to the UTF-8 checker.  This avoids needing to perform
1698       // a second scan of the ASCII characters looking for characters that
1699       // need escaping.
1700       //
1701       // We assume that high bytes are less frequent than plain, printable ASCII
1702       // bytes, so we accept the double-scan of high bytes.
1703       size_t end = i + 1;
1704       for (; end < val.size(); end++) {
1705         if (!NeedsUtf8Validation(val[end])) break;
1706       }
1707       size_t n = end - i;
1708       size_t ok = utf8_range::SpanStructurallyValid(val.substr(i, n));
1709       if (ok != n) return i + ok;
1710       i += ok - 1;
1711     }
1712   }
1713   return val.size();
1714 }
1715 
1716 }  // namespace
1717 
HardenedPrintString(absl::string_view src,TextFormat::BaseTextGenerator * generator)1718 void TextFormat::Printer::HardenedPrintString(
1719     absl::string_view src, TextFormat::BaseTextGenerator* generator) {
1720   // Print as UTF-8, while guarding against any invalid UTF-8 in the string
1721   // field.
1722   //
1723   // If in the future we have a guaranteed invariant that invalid UTF-8 will
1724   // never be present, we could avoid the UTF-8 check here.
1725 
1726   generator->PrintLiteral("\"");
1727   while (!src.empty()) {
1728     size_t n = SkipPassthroughBytes(src);
1729     if (n != 0) {
1730       generator->PrintString(src.substr(0, n));
1731       src.remove_prefix(n);
1732       if (src.empty()) break;
1733     }
1734 
1735     // If repeated calls to CEscape() and PrintString() are expensive, we could
1736     // consider batching them, at the cost of some complexity.
1737     generator->PrintString(absl::CEscape(src.substr(0, 1)));
1738     src.remove_prefix(1);
1739   }
1740   generator->PrintLiteral("\"");
1741 }
1742 
1743 // ===========================================================================
1744 //  An internal field value printer that escape UTF8 strings.
1745 class TextFormat::Printer::FastFieldValuePrinterUtf8Escaping
1746     : public TextFormat::Printer::DebugStringFieldValuePrinter {
1747  public:
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const1748   void PrintString(const std::string& val,
1749                    TextFormat::BaseTextGenerator* generator) const override {
1750     TextFormat::Printer::HardenedPrintString(val, generator);
1751   }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const1752   void PrintBytes(const std::string& val,
1753                   TextFormat::BaseTextGenerator* generator) const override {
1754     return FastFieldValuePrinter::PrintString(val, generator);
1755   }
1756 };
1757 
1758 // ===========================================================================
1759 // Implementation of the default Finder for extensions.
~Finder()1760 TextFormat::Finder::~Finder() {}
1761 
FindExtension(Message * message,const std::string & name) const1762 const FieldDescriptor* TextFormat::Finder::FindExtension(
1763     Message* message, const std::string& name) const {
1764   return DefaultFinderFindExtension(message, name);
1765 }
1766 
FindExtensionByNumber(const Descriptor * descriptor,int number) const1767 const FieldDescriptor* TextFormat::Finder::FindExtensionByNumber(
1768     const Descriptor* descriptor, int number) const {
1769   return DefaultFinderFindExtensionByNumber(descriptor, number);
1770 }
1771 
FindAnyType(const Message & message,const std::string & prefix,const std::string & name) const1772 const Descriptor* TextFormat::Finder::FindAnyType(
1773     const Message& message, const std::string& prefix,
1774     const std::string& name) const {
1775   return DefaultFinderFindAnyType(message, prefix, name);
1776 }
1777 
FindExtensionFactory(const FieldDescriptor *) const1778 MessageFactory* TextFormat::Finder::FindExtensionFactory(
1779     const FieldDescriptor* /*field*/) const {
1780   return nullptr;
1781 }
1782 
1783 // ===========================================================================
1784 
Parser()1785 TextFormat::Parser::Parser()
1786     : error_collector_(nullptr),
1787       finder_(nullptr),
1788       parse_info_tree_(nullptr),
1789       allow_partial_(false),
1790       allow_case_insensitive_field_(false),
1791       allow_unknown_field_(false),
1792       allow_unknown_extension_(false),
1793       allow_unknown_enum_(false),
1794       allow_field_number_(false),
1795       allow_relaxed_whitespace_(false),
1796       allow_singular_overwrites_(false),
1797       recursion_limit_(std::numeric_limits<int>::max()) {}
1798 
~Parser()1799 TextFormat::Parser::~Parser() {}
1800 
1801 namespace {
1802 
1803 template <typename T>
CheckParseInputSize(T & input,io::ErrorCollector * error_collector)1804 bool CheckParseInputSize(T& input, io::ErrorCollector* error_collector) {
1805   if (input.size() > INT_MAX) {
1806     error_collector->RecordError(
1807         -1, 0,
1808         absl::StrCat(
1809             "Input size too large: ", static_cast<int64_t>(input.size()),
1810             " bytes", " > ", INT_MAX, " bytes."));
1811     return false;
1812   }
1813   return true;
1814 }
1815 
1816 }  // namespace
1817 
Parse(io::ZeroCopyInputStream * input,Message * output)1818 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1819                                Message* output) {
1820   output->Clear();
1821 
1822   ParserImpl::SingularOverwritePolicy overwrites_policy =
1823       allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1824                                  : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1825 
1826   ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1827                     parse_info_tree_, overwrites_policy,
1828                     allow_case_insensitive_field_, allow_unknown_field_,
1829                     allow_unknown_extension_, allow_unknown_enum_,
1830                     allow_field_number_, allow_relaxed_whitespace_,
1831                     allow_partial_, recursion_limit_, no_op_fields_);
1832   return MergeUsingImpl(input, output, &parser);
1833 }
1834 
ParseFromString(absl::string_view input,Message * output)1835 bool TextFormat::Parser::ParseFromString(absl::string_view input,
1836                                          Message* output) {
1837   DO(CheckParseInputSize(input, error_collector_));
1838   io::ArrayInputStream input_stream(input.data(), input.size());
1839   return Parse(&input_stream, output);
1840 }
1841 
ParseFromCord(const absl::Cord & input,Message * output)1842 bool TextFormat::Parser::ParseFromCord(const absl::Cord& input,
1843                                        Message* output) {
1844   DO(CheckParseInputSize(input, error_collector_));
1845   io::CordInputStream input_stream(&input);
1846   return Parse(&input_stream, output);
1847 }
1848 
Merge(io::ZeroCopyInputStream * input,Message * output)1849 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1850                                Message* output) {
1851   ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1852                     parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1853                     allow_case_insensitive_field_, allow_unknown_field_,
1854                     allow_unknown_extension_, allow_unknown_enum_,
1855                     allow_field_number_, allow_relaxed_whitespace_,
1856                     allow_partial_, recursion_limit_, no_op_fields_);
1857   return MergeUsingImpl(input, output, &parser);
1858 }
1859 
MergeFromString(absl::string_view input,Message * output)1860 bool TextFormat::Parser::MergeFromString(absl::string_view input,
1861                                          Message* output) {
1862   DO(CheckParseInputSize(input, error_collector_));
1863   io::ArrayInputStream input_stream(input.data(), input.size());
1864   return Merge(&input_stream, output);
1865 }
1866 
MergeUsingImpl(io::ZeroCopyInputStream *,Message * output,ParserImpl * parser_impl)1867 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1868                                         Message* output,
1869                                         ParserImpl* parser_impl) {
1870   if (!parser_impl->Parse(output)) return false;
1871   if (!allow_partial_ && !output->IsInitialized()) {
1872     std::vector<std::string> missing_fields;
1873     output->FindInitializationErrors(&missing_fields);
1874     parser_impl->ReportError(-1, 0,
1875                              absl::StrCat("Message missing required fields: ",
1876                                           absl::StrJoin(missing_fields, ", ")));
1877     return false;
1878   }
1879   return true;
1880 }
1881 
ParseFieldValueFromString(absl::string_view input,const FieldDescriptor * field,Message * output)1882 bool TextFormat::Parser::ParseFieldValueFromString(absl::string_view input,
1883                                                    const FieldDescriptor* field,
1884                                                    Message* output) {
1885   io::ArrayInputStream input_stream(input.data(), input.size());
1886   ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
1887                     finder_, parse_info_tree_,
1888                     ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1889                     allow_case_insensitive_field_, allow_unknown_field_,
1890                     allow_unknown_extension_, allow_unknown_enum_,
1891                     allow_field_number_, allow_relaxed_whitespace_,
1892                     allow_partial_, recursion_limit_, no_op_fields_);
1893   return parser.ParseField(field, output);
1894 }
1895 
Parse(io::ZeroCopyInputStream * input,Message * output)1896 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1897                                     Message* output) {
1898   return Parser().Parse(input, output);
1899 }
1900 
Merge(io::ZeroCopyInputStream * input,Message * output)1901 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1902                                     Message* output) {
1903   return Parser().Merge(input, output);
1904 }
1905 
ParseFromString(absl::string_view input,Message * output)1906 /* static */ bool TextFormat::ParseFromString(absl::string_view input,
1907                                               Message* output) {
1908   return Parser().ParseFromString(input, output);
1909 }
1910 
ParseFromCord(const absl::Cord & input,Message * output)1911 /* static */ bool TextFormat::ParseFromCord(const absl::Cord& input,
1912                                             Message* output) {
1913   return Parser().ParseFromCord(input, output);
1914 }
1915 
MergeFromString(absl::string_view input,Message * output)1916 /* static */ bool TextFormat::MergeFromString(absl::string_view input,
1917                                               Message* output) {
1918   return Parser().MergeFromString(input, output);
1919 }
1920 
1921 #undef DO
1922 
1923 // ===========================================================================
1924 
~BaseTextGenerator()1925 TextFormat::BaseTextGenerator::~BaseTextGenerator() {}
1926 
1927 namespace {
1928 
1929 // A BaseTextGenerator that writes to a string.
1930 class StringBaseTextGenerator : public TextFormat::BaseTextGenerator {
1931  public:
Print(const char * text,size_t size)1932   void Print(const char* text, size_t size) override {
1933     output_.append(text, size);
1934   }
1935 
Consume()1936   std::string Consume() && { return std::move(output_); }
1937 
1938  private:
1939   std::string output_;
1940 };
1941 
1942 }  // namespace
1943 
1944 // The default implementation for FieldValuePrinter. We just delegate the
1945 // implementation to the default FastFieldValuePrinter to avoid duplicating the
1946 // logic.
FieldValuePrinter()1947 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
~FieldValuePrinter()1948 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
1949 
1950 #define FORWARD_IMPL(fn, ...)            \
1951   StringBaseTextGenerator generator;     \
1952   delegate_.fn(__VA_ARGS__, &generator); \
1953   return std::move(generator).Consume()
1954 
PrintBool(bool val) const1955 std::string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1956   FORWARD_IMPL(PrintBool, val);
1957 }
PrintInt32(int32_t val) const1958 std::string TextFormat::FieldValuePrinter::PrintInt32(int32_t val) const {
1959   FORWARD_IMPL(PrintInt32, val);
1960 }
PrintUInt32(uint32_t val) const1961 std::string TextFormat::FieldValuePrinter::PrintUInt32(uint32_t val) const {
1962   FORWARD_IMPL(PrintUInt32, val);
1963 }
PrintInt64(int64_t val) const1964 std::string TextFormat::FieldValuePrinter::PrintInt64(int64_t val) const {
1965   FORWARD_IMPL(PrintInt64, val);
1966 }
PrintUInt64(uint64_t val) const1967 std::string TextFormat::FieldValuePrinter::PrintUInt64(uint64_t val) const {
1968   FORWARD_IMPL(PrintUInt64, val);
1969 }
PrintFloat(float val) const1970 std::string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1971   FORWARD_IMPL(PrintFloat, val);
1972 }
PrintDouble(double val) const1973 std::string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1974   FORWARD_IMPL(PrintDouble, val);
1975 }
PrintString(const std::string & val) const1976 std::string TextFormat::FieldValuePrinter::PrintString(
1977     const std::string& val) const {
1978   FORWARD_IMPL(PrintString, val);
1979 }
PrintBytes(const std::string & val) const1980 std::string TextFormat::FieldValuePrinter::PrintBytes(
1981     const std::string& val) const {
1982   return PrintString(val);
1983 }
PrintEnum(int32_t val,const std::string & name) const1984 std::string TextFormat::FieldValuePrinter::PrintEnum(
1985     int32_t val, const std::string& name) const {
1986   FORWARD_IMPL(PrintEnum, val, name);
1987 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field) const1988 std::string TextFormat::FieldValuePrinter::PrintFieldName(
1989     const Message& message, const Reflection* reflection,
1990     const FieldDescriptor* field) const {
1991   FORWARD_IMPL(PrintFieldName, message, reflection, field);
1992 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode) const1993 std::string TextFormat::FieldValuePrinter::PrintMessageStart(
1994     const Message& message, int field_index, int field_count,
1995     bool single_line_mode) const {
1996   FORWARD_IMPL(PrintMessageStart, message, field_index, field_count,
1997                single_line_mode);
1998 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode) const1999 std::string TextFormat::FieldValuePrinter::PrintMessageEnd(
2000     const Message& message, int field_index, int field_count,
2001     bool single_line_mode) const {
2002   FORWARD_IMPL(PrintMessageEnd, message, field_index, field_count,
2003                single_line_mode);
2004 }
2005 #undef FORWARD_IMPL
2006 
FastFieldValuePrinter()2007 TextFormat::FastFieldValuePrinter::FastFieldValuePrinter() {}
~FastFieldValuePrinter()2008 TextFormat::FastFieldValuePrinter::~FastFieldValuePrinter() {}
PrintBool(bool val,BaseTextGenerator * generator) const2009 void TextFormat::FastFieldValuePrinter::PrintBool(
2010     bool val, BaseTextGenerator* generator) const {
2011   if (val) {
2012     generator->PrintLiteral("true");
2013   } else {
2014     generator->PrintLiteral("false");
2015   }
2016 }
PrintInt32(int32_t val,BaseTextGenerator * generator) const2017 void TextFormat::FastFieldValuePrinter::PrintInt32(
2018     int32_t val, BaseTextGenerator* generator) const {
2019   generator->PrintString(absl::StrCat(val));
2020 }
PrintUInt32(uint32_t val,BaseTextGenerator * generator) const2021 void TextFormat::FastFieldValuePrinter::PrintUInt32(
2022     uint32_t val, BaseTextGenerator* generator) const {
2023   generator->PrintString(absl::StrCat(val));
2024 }
PrintInt64(int64_t val,BaseTextGenerator * generator) const2025 void TextFormat::FastFieldValuePrinter::PrintInt64(
2026     int64_t val, BaseTextGenerator* generator) const {
2027   generator->PrintString(absl::StrCat(val));
2028 }
PrintUInt64(uint64_t val,BaseTextGenerator * generator) const2029 void TextFormat::FastFieldValuePrinter::PrintUInt64(
2030     uint64_t val, BaseTextGenerator* generator) const {
2031   generator->PrintString(absl::StrCat(val));
2032 }
PrintFloat(float val,BaseTextGenerator * generator) const2033 void TextFormat::FastFieldValuePrinter::PrintFloat(
2034     float val, BaseTextGenerator* generator) const {
2035   generator->PrintString(!std::isnan(val) ? io::SimpleFtoa(val) : "nan");
2036 }
PrintDouble(double val,BaseTextGenerator * generator) const2037 void TextFormat::FastFieldValuePrinter::PrintDouble(
2038     double val, BaseTextGenerator* generator) const {
2039   generator->PrintString(!std::isnan(val) ? io::SimpleDtoa(val) : "nan");
2040 }
PrintEnum(int32_t,const std::string & name,BaseTextGenerator * generator) const2041 void TextFormat::FastFieldValuePrinter::PrintEnum(
2042     int32_t /*val*/, const std::string& name,
2043     BaseTextGenerator* generator) const {
2044   generator->PrintString(name);
2045 }
2046 
PrintString(const std::string & val,BaseTextGenerator * generator) const2047 void TextFormat::FastFieldValuePrinter::PrintString(
2048     const std::string& val, BaseTextGenerator* generator) const {
2049   generator->PrintLiteral("\"");
2050   if (!val.empty()) {
2051     generator->PrintString(absl::CEscape(val));
2052   }
2053   generator->PrintLiteral("\"");
2054 }
PrintBytes(const std::string & val,BaseTextGenerator * generator) const2055 void TextFormat::FastFieldValuePrinter::PrintBytes(
2056     const std::string& val, BaseTextGenerator* generator) const {
2057   PrintString(val, generator);
2058 }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2059 void TextFormat::FastFieldValuePrinter::PrintFieldName(
2060     const Message& message, int /*field_index*/, int /*field_count*/,
2061     const Reflection* reflection, const FieldDescriptor* field,
2062     BaseTextGenerator* generator) const {
2063   PrintFieldName(message, reflection, field, generator);
2064 }
PrintFieldName(const Message &,const Reflection *,const FieldDescriptor * field,BaseTextGenerator * generator) const2065 void TextFormat::FastFieldValuePrinter::PrintFieldName(
2066     const Message& /*message*/, const Reflection* /*reflection*/,
2067     const FieldDescriptor* field, BaseTextGenerator* generator) const {
2068   if (field->is_extension()) {
2069     generator->PrintLiteral("[");
2070     generator->PrintString(field->PrintableNameForExtension());
2071     generator->PrintLiteral("]");
2072   } else if (internal::cpp::IsGroupLike(*field)) {
2073     // Groups must be serialized with their original capitalization.
2074     generator->PrintString(field->message_type()->name());
2075   } else {
2076     generator->PrintString(field->name());
2077   }
2078 }
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const2079 void TextFormat::FastFieldValuePrinter::PrintMessageStart(
2080     const Message& /*message*/, int /*field_index*/, int /*field_count*/,
2081     bool single_line_mode, BaseTextGenerator* generator) const {
2082   if (single_line_mode) {
2083     generator->PrintLiteral(" { ");
2084   } else {
2085     generator->PrintLiteral(" {\n");
2086   }
2087 }
PrintMessageContent(const Message &,int,int,bool,BaseTextGenerator *) const2088 bool TextFormat::FastFieldValuePrinter::PrintMessageContent(
2089     const Message& /*message*/, int /*field_index*/, int /*field_count*/,
2090     bool /*single_line_mode*/, BaseTextGenerator* /*generator*/) const {
2091   return false;  // Use the default printing function.
2092 }
PrintMessageEnd(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const2093 void TextFormat::FastFieldValuePrinter::PrintMessageEnd(
2094     const Message& /*message*/, int /*field_index*/, int /*field_count*/,
2095     bool single_line_mode, BaseTextGenerator* generator) const {
2096   if (single_line_mode) {
2097     generator->PrintLiteral("} ");
2098   } else {
2099     generator->PrintLiteral("}\n");
2100   }
2101 }
2102 
2103 namespace {
2104 
2105 // A legacy compatibility wrapper. Takes ownership of the delegate.
2106 class FieldValuePrinterWrapper : public TextFormat::FastFieldValuePrinter {
2107  public:
FieldValuePrinterWrapper(const TextFormat::FieldValuePrinter * delegate)2108   explicit FieldValuePrinterWrapper(
2109       const TextFormat::FieldValuePrinter* delegate)
2110       : delegate_(delegate) {}
2111 
SetDelegate(const TextFormat::FieldValuePrinter * delegate)2112   void SetDelegate(const TextFormat::FieldValuePrinter* delegate) {
2113     delegate_.reset(delegate);
2114   }
2115 
PrintBool(bool val,TextFormat::BaseTextGenerator * generator) const2116   void PrintBool(bool val,
2117                  TextFormat::BaseTextGenerator* generator) const override {
2118     generator->PrintString(delegate_->PrintBool(val));
2119   }
PrintInt32(int32_t val,TextFormat::BaseTextGenerator * generator) const2120   void PrintInt32(int32_t val,
2121                   TextFormat::BaseTextGenerator* generator) const override {
2122     generator->PrintString(delegate_->PrintInt32(val));
2123   }
PrintUInt32(uint32_t val,TextFormat::BaseTextGenerator * generator) const2124   void PrintUInt32(uint32_t val,
2125                    TextFormat::BaseTextGenerator* generator) const override {
2126     generator->PrintString(delegate_->PrintUInt32(val));
2127   }
PrintInt64(int64_t val,TextFormat::BaseTextGenerator * generator) const2128   void PrintInt64(int64_t val,
2129                   TextFormat::BaseTextGenerator* generator) const override {
2130     generator->PrintString(delegate_->PrintInt64(val));
2131   }
PrintUInt64(uint64_t val,TextFormat::BaseTextGenerator * generator) const2132   void PrintUInt64(uint64_t val,
2133                    TextFormat::BaseTextGenerator* generator) const override {
2134     generator->PrintString(delegate_->PrintUInt64(val));
2135   }
PrintFloat(float val,TextFormat::BaseTextGenerator * generator) const2136   void PrintFloat(float val,
2137                   TextFormat::BaseTextGenerator* generator) const override {
2138     generator->PrintString(delegate_->PrintFloat(val));
2139   }
PrintDouble(double val,TextFormat::BaseTextGenerator * generator) const2140   void PrintDouble(double val,
2141                    TextFormat::BaseTextGenerator* generator) const override {
2142     generator->PrintString(delegate_->PrintDouble(val));
2143   }
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const2144   void PrintString(const std::string& val,
2145                    TextFormat::BaseTextGenerator* generator) const override {
2146     generator->PrintString(delegate_->PrintString(val));
2147   }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const2148   void PrintBytes(const std::string& val,
2149                   TextFormat::BaseTextGenerator* generator) const override {
2150     generator->PrintString(delegate_->PrintBytes(val));
2151   }
PrintEnum(int32_t val,const std::string & name,TextFormat::BaseTextGenerator * generator) const2152   void PrintEnum(int32_t val, const std::string& name,
2153                  TextFormat::BaseTextGenerator* generator) const override {
2154     generator->PrintString(delegate_->PrintEnum(val, name));
2155   }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const2156   void PrintFieldName(const Message& message, int /*field_index*/,
2157                       int /*field_count*/, const Reflection* reflection,
2158                       const FieldDescriptor* field,
2159                       TextFormat::BaseTextGenerator* generator) const override {
2160     generator->PrintString(
2161         delegate_->PrintFieldName(message, reflection, field));
2162   }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const2163   void PrintFieldName(const Message& message, const Reflection* reflection,
2164                       const FieldDescriptor* field,
2165                       TextFormat::BaseTextGenerator* generator) const override {
2166     generator->PrintString(
2167         delegate_->PrintFieldName(message, reflection, field));
2168   }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const2169   void PrintMessageStart(
2170       const Message& message, int field_index, int field_count,
2171       bool single_line_mode,
2172       TextFormat::BaseTextGenerator* generator) const override {
2173     generator->PrintString(delegate_->PrintMessageStart(
2174         message, field_index, field_count, single_line_mode));
2175   }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const2176   void PrintMessageEnd(
2177       const Message& message, int field_index, int field_count,
2178       bool single_line_mode,
2179       TextFormat::BaseTextGenerator* generator) const override {
2180     generator->PrintString(delegate_->PrintMessageEnd(
2181         message, field_index, field_count, single_line_mode));
2182   }
2183 
2184  private:
2185   std::unique_ptr<const TextFormat::FieldValuePrinter> delegate_;
2186 };
2187 
2188 }  // namespace
2189 
Printer()2190 TextFormat::Printer::Printer()
2191     : initial_indent_level_(0),
2192       single_line_mode_(false),
2193       use_field_number_(false),
2194       use_short_repeated_primitives_(false),
2195       insert_silent_marker_(false),
2196       redact_debug_string_(false),
2197       randomize_debug_string_(false),
2198       report_sensitive_fields_(internal::FieldReporterLevel::kNoReport),
2199       hide_unknown_fields_(false),
2200       print_message_fields_in_index_order_(false),
2201       expand_any_(false),
2202       truncate_string_field_longer_than_(0LL),
2203       finder_(nullptr) {
2204   SetUseUtf8StringEscaping(false);
2205 }
2206 
SetUseUtf8StringEscaping(bool as_utf8)2207 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
2208   SetDefaultFieldValuePrinter(as_utf8 ? new FastFieldValuePrinterUtf8Escaping()
2209                                       : new DebugStringFieldValuePrinter());
2210 }
2211 
SetDefaultFieldValuePrinter(const FieldValuePrinter * printer)2212 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2213     const FieldValuePrinter* printer) {
2214   default_field_value_printer_.reset(new FieldValuePrinterWrapper(printer));
2215 }
2216 
SetDefaultFieldValuePrinter(const FastFieldValuePrinter * printer)2217 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2218     const FastFieldValuePrinter* printer) {
2219   default_field_value_printer_.reset(printer);
2220 }
2221 
RegisterFieldValuePrinter(const FieldDescriptor * field,const FieldValuePrinter * printer)2222 bool TextFormat::Printer::RegisterFieldValuePrinter(
2223     const FieldDescriptor* field, const FieldValuePrinter* printer) {
2224   if (field == nullptr || printer == nullptr) {
2225     return false;
2226   }
2227   std::unique_ptr<FieldValuePrinterWrapper> wrapper(
2228       new FieldValuePrinterWrapper(nullptr));
2229   auto pair = custom_printers_.emplace(field, nullptr);
2230   if (pair.second) {
2231     wrapper->SetDelegate(printer);
2232     pair.first->second = std::move(wrapper);
2233     return true;
2234   } else {
2235     return false;
2236   }
2237 }
2238 
RegisterFieldValuePrinter(const FieldDescriptor * field,const FastFieldValuePrinter * printer)2239 bool TextFormat::Printer::RegisterFieldValuePrinter(
2240     const FieldDescriptor* field, const FastFieldValuePrinter* printer) {
2241   if (field == nullptr || printer == nullptr) {
2242     return false;
2243   }
2244   auto pair = custom_printers_.emplace(field, nullptr);
2245   if (pair.second) {
2246     pair.first->second.reset(printer);
2247     return true;
2248   } else {
2249     return false;
2250   }
2251 }
2252 
RegisterMessagePrinter(const Descriptor * descriptor,const MessagePrinter * printer)2253 bool TextFormat::Printer::RegisterMessagePrinter(
2254     const Descriptor* descriptor, const MessagePrinter* printer) {
2255   if (descriptor == nullptr || printer == nullptr) {
2256     return false;
2257   }
2258   auto pair = custom_message_printers_.emplace(descriptor, nullptr);
2259   if (pair.second) {
2260     pair.first->second.reset(printer);
2261     return true;
2262   } else {
2263     return false;
2264   }
2265 }
2266 
PrintToString(const Message & message,std::string * output) const2267 bool TextFormat::Printer::PrintToString(const Message& message,
2268                                         std::string* output) const {
2269   ABSL_DCHECK(output) << "output specified is nullptr";
2270 
2271   output->clear();
2272   io::StringOutputStream output_stream(output);
2273 
2274   return Print(message, &output_stream,
2275                internal::FieldReporterLevel::kMemberPrintToString);
2276 }
2277 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output) const2278 bool TextFormat::Printer::PrintUnknownFieldsToString(
2279     const UnknownFieldSet& unknown_fields, std::string* output) const {
2280   ABSL_DCHECK(output) << "output specified is nullptr";
2281 
2282   output->clear();
2283   io::StringOutputStream output_stream(output);
2284   return PrintUnknownFields(unknown_fields, &output_stream);
2285 }
2286 
Print(const Message & message,io::ZeroCopyOutputStream * output) const2287 bool TextFormat::Printer::Print(const Message& message,
2288                                 io::ZeroCopyOutputStream* output) const {
2289   return Print(message, output, internal::FieldReporterLevel::kPrintWithStream);
2290 }
2291 
Print(const Message & message,io::ZeroCopyOutputStream * output,internal::FieldReporterLevel reporter) const2292 bool TextFormat::Printer::Print(const Message& message,
2293                                 io::ZeroCopyOutputStream* output,
2294                                 internal::FieldReporterLevel reporter) const {
2295   TextGenerator generator(output, insert_silent_marker_, initial_indent_level_);
2296 
2297 
2298   Print(message, &generator);
2299 
2300   // Output false if the generator failed internally.
2301   return !generator.failed();
2302 }
2303 
2304 // Maximum recursion depth for heuristically printing out length-prefixed
2305 // unknown fields as messages.
2306 static constexpr int kUnknownFieldRecursionLimit = 10;
2307 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output) const2308 bool TextFormat::Printer::PrintUnknownFields(
2309     const UnknownFieldSet& unknown_fields,
2310     io::ZeroCopyOutputStream* output) const {
2311   TextGenerator generator(output, initial_indent_level_);
2312 
2313   PrintUnknownFields(unknown_fields, &generator, kUnknownFieldRecursionLimit);
2314 
2315   // Output false if the generator failed internally.
2316   return !generator.failed();
2317 }
2318 
2319 namespace {
2320 // Comparison functor for sorting FieldDescriptors by field index.
2321 // Normal fields have higher precedence than extensions.
2322 struct FieldIndexSorter {
operator ()google::protobuf::__anonbeb475080711::FieldIndexSorter2323   bool operator()(const FieldDescriptor* left,
2324                   const FieldDescriptor* right) const {
2325     if (left->is_extension() && right->is_extension()) {
2326       return left->number() < right->number();
2327     } else if (left->is_extension()) {
2328       return false;
2329     } else if (right->is_extension()) {
2330       return true;
2331     } else {
2332       return left->index() < right->index();
2333     }
2334   }
2335 };
2336 
2337 }  // namespace
2338 
PrintAny(const Message & message,BaseTextGenerator * generator) const2339 bool TextFormat::Printer::PrintAny(const Message& message,
2340                                    BaseTextGenerator* generator) const {
2341   const FieldDescriptor* type_url_field;
2342   const FieldDescriptor* value_field;
2343   if (!internal::GetAnyFieldDescriptors(message, &type_url_field,
2344                                         &value_field)) {
2345     return false;
2346   }
2347 
2348   const Reflection* reflection = message.GetReflection();
2349 
2350   // Extract the full type name from the type_url field.
2351   const std::string& type_url = reflection->GetString(message, type_url_field);
2352   std::string url_prefix;
2353   std::string full_type_name;
2354 
2355   if (!internal::ParseAnyTypeUrl(type_url, &url_prefix, &full_type_name)) {
2356     return false;
2357   }
2358 
2359   // Print the "value" in text.
2360   const Descriptor* value_descriptor =
2361       finder_ ? finder_->FindAnyType(message, url_prefix, full_type_name)
2362               : DefaultFinderFindAnyType(message, url_prefix, full_type_name);
2363   if (value_descriptor == nullptr) {
2364     ABSL_LOG(WARNING) << "Can't print proto content: proto type " << type_url
2365                       << " not found";
2366     return false;
2367   }
2368   DynamicMessageFactory factory;
2369   std::unique_ptr<Message> value_message(
2370       factory.GetPrototype(value_descriptor)->New());
2371   std::string serialized_value = reflection->GetString(message, value_field);
2372   if (!value_message->ParseFromString(serialized_value)) {
2373     ABSL_LOG(WARNING) << type_url << ": failed to parse contents";
2374     return false;
2375   }
2376   generator->PrintLiteral("[");
2377   generator->PrintString(type_url);
2378   generator->PrintLiteral("]");
2379   const FastFieldValuePrinter* printer = GetFieldPrinter(value_field);
2380   printer->PrintMessageStart(message, -1, 0, single_line_mode_, generator);
2381   generator->Indent();
2382   Print(*value_message, generator);
2383   generator->Outdent();
2384   printer->PrintMessageEnd(message, -1, 0, single_line_mode_, generator);
2385   return true;
2386 }
2387 
Print(const Message & message,BaseTextGenerator * generator) const2388 void TextFormat::Printer::Print(const Message& message,
2389                                 BaseTextGenerator* generator) const {
2390   const Reflection* reflection = message.GetReflection();
2391   if (!reflection) {
2392     // This message does not provide any way to describe its structure.
2393     // Parse it again in an UnknownFieldSet, and display this instead.
2394     UnknownFieldSet unknown_fields;
2395     {
2396       std::string serialized = message.SerializeAsString();
2397       io::ArrayInputStream input(serialized.data(), serialized.size());
2398       unknown_fields.ParseFromZeroCopyStream(&input);
2399     }
2400     PrintUnknownFields(unknown_fields, generator, kUnknownFieldRecursionLimit);
2401     return;
2402   }
2403   const Descriptor* descriptor = message.GetDescriptor();
2404   auto itr = custom_message_printers_.find(descriptor);
2405   if (itr != custom_message_printers_.end()) {
2406     itr->second->Print(message, single_line_mode_, generator);
2407     return;
2408   }
2409   PrintMessage(message, generator);
2410 }
2411 
PrintMessage(const Message & message,BaseTextGenerator * generator) const2412 void TextFormat::Printer::PrintMessage(const Message& message,
2413                                        BaseTextGenerator* generator) const {
2414   if (generator == nullptr) {
2415     return;
2416   }
2417   const Descriptor* descriptor = message.GetDescriptor();
2418   if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ &&
2419       PrintAny(message, generator)) {
2420     return;
2421   }
2422   const Reflection* reflection = message.GetReflection();
2423   std::vector<const FieldDescriptor*> fields;
2424   if (descriptor->options().map_entry()) {
2425     fields.push_back(descriptor->field(0));
2426     fields.push_back(descriptor->field(1));
2427   } else {
2428     reflection->ListFields(message, &fields);
2429   }
2430 
2431   if (print_message_fields_in_index_order_) {
2432     std::sort(fields.begin(), fields.end(), FieldIndexSorter());
2433   }
2434   for (const FieldDescriptor* field : fields) {
2435     PrintField(message, reflection, field, generator);
2436   }
2437   if (!hide_unknown_fields_) {
2438     PrintUnknownFields(reflection->GetUnknownFields(message), generator,
2439                        kUnknownFieldRecursionLimit);
2440   }
2441 }
2442 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output) const2443 void TextFormat::Printer::PrintFieldValueToString(const Message& message,
2444                                                   const FieldDescriptor* field,
2445                                                   int index,
2446                                                   std::string* output) const {
2447   ABSL_DCHECK(output) << "output specified is nullptr";
2448 
2449   output->clear();
2450   io::StringOutputStream output_stream(output);
2451   TextGenerator generator(&output_stream, initial_indent_level_);
2452 
2453   PrintFieldValue(message, message.GetReflection(), field, index, &generator);
2454 }
2455 
2456 class MapEntryMessageComparator {
2457  public:
MapEntryMessageComparator(const Descriptor * descriptor)2458   explicit MapEntryMessageComparator(const Descriptor* descriptor)
2459       : field_(descriptor->field(0)) {}
2460 
operator ()(const Message * a,const Message * b)2461   bool operator()(const Message* a, const Message* b) {
2462     const Reflection* reflection = a->GetReflection();
2463     switch (field_->cpp_type()) {
2464       case FieldDescriptor::CPPTYPE_BOOL: {
2465         bool first = reflection->GetBool(*a, field_);
2466         bool second = reflection->GetBool(*b, field_);
2467         return first < second;
2468       }
2469       case FieldDescriptor::CPPTYPE_INT32: {
2470         int32_t first = reflection->GetInt32(*a, field_);
2471         int32_t second = reflection->GetInt32(*b, field_);
2472         return first < second;
2473       }
2474       case FieldDescriptor::CPPTYPE_INT64: {
2475         int64_t first = reflection->GetInt64(*a, field_);
2476         int64_t second = reflection->GetInt64(*b, field_);
2477         return first < second;
2478       }
2479       case FieldDescriptor::CPPTYPE_UINT32: {
2480         uint32_t first = reflection->GetUInt32(*a, field_);
2481         uint32_t second = reflection->GetUInt32(*b, field_);
2482         return first < second;
2483       }
2484       case FieldDescriptor::CPPTYPE_UINT64: {
2485         uint64_t first = reflection->GetUInt64(*a, field_);
2486         uint64_t second = reflection->GetUInt64(*b, field_);
2487         return first < second;
2488       }
2489       case FieldDescriptor::CPPTYPE_STRING: {
2490         std::string first = reflection->GetString(*a, field_);
2491         std::string second = reflection->GetString(*b, field_);
2492         return first < second;
2493       }
2494       default:
2495         ABSL_DLOG(FATAL) << "Invalid key for map field.";
2496         return true;
2497     }
2498   }
2499 
2500  private:
2501   const FieldDescriptor* field_;
2502 };
2503 
2504 namespace internal {
2505 class MapFieldPrinterHelper {
2506  public:
2507   // DynamicMapSorter::Sort cannot be used because it enforces syncing with
2508   // repeated field.
2509   static bool SortMap(const Message& message, const Reflection* reflection,
2510                       const FieldDescriptor* field,
2511                       std::vector<const Message*>* sorted_map_field);
2512   static void CopyKey(const MapKey& key, Message* message,
2513                       const FieldDescriptor* field_desc);
2514   static void CopyValue(const MapValueRef& value, Message* message,
2515                         const FieldDescriptor* field_desc);
2516 };
2517 
2518 // Returns true if elements contained in sorted_map_field need to be released.
SortMap(const Message & message,const Reflection * reflection,const FieldDescriptor * field,std::vector<const Message * > * sorted_map_field)2519 bool MapFieldPrinterHelper::SortMap(
2520     const Message& message, const Reflection* reflection,
2521     const FieldDescriptor* field,
2522     std::vector<const Message*>* sorted_map_field) {
2523   bool need_release = false;
2524   const MapFieldBase& base = *reflection->GetMapData(message, field);
2525 
2526   if (base.IsRepeatedFieldValid()) {
2527     const RepeatedPtrField<Message>& map_field =
2528         reflection->GetRepeatedPtrFieldInternal<Message>(message, field);
2529     for (int i = 0; i < map_field.size(); ++i) {
2530       sorted_map_field->push_back(
2531           const_cast<RepeatedPtrField<Message>*>(&map_field)->Mutable(i));
2532     }
2533   } else {
2534     // TODO: For performance, instead of creating map entry message
2535     // for each element, just store map keys and sort them.
2536     const Descriptor* map_entry_desc = field->message_type();
2537     const Message* prototype =
2538         reflection->GetMessageFactory()->GetPrototype(map_entry_desc);
2539     for (MapIterator iter =
2540              reflection->MapBegin(const_cast<Message*>(&message), field);
2541          iter != reflection->MapEnd(const_cast<Message*>(&message), field);
2542          ++iter) {
2543       Message* map_entry_message = prototype->New();
2544       CopyKey(iter.GetKey(), map_entry_message, map_entry_desc->field(0));
2545       CopyValue(iter.GetValueRef(), map_entry_message,
2546                 map_entry_desc->field(1));
2547       sorted_map_field->push_back(map_entry_message);
2548     }
2549     need_release = true;
2550   }
2551 
2552   MapEntryMessageComparator comparator(field->message_type());
2553   std::stable_sort(sorted_map_field->begin(), sorted_map_field->end(),
2554                    comparator);
2555   return need_release;
2556 }
2557 
CopyKey(const MapKey & key,Message * message,const FieldDescriptor * field_desc)2558 void MapFieldPrinterHelper::CopyKey(const MapKey& key, Message* message,
2559                                     const FieldDescriptor* field_desc) {
2560   const Reflection* reflection = message->GetReflection();
2561   switch (field_desc->cpp_type()) {
2562     case FieldDescriptor::CPPTYPE_DOUBLE:
2563     case FieldDescriptor::CPPTYPE_FLOAT:
2564     case FieldDescriptor::CPPTYPE_ENUM:
2565     case FieldDescriptor::CPPTYPE_MESSAGE:
2566       ABSL_LOG(ERROR) << "Not supported.";
2567       break;
2568     case FieldDescriptor::CPPTYPE_STRING:
2569       reflection->SetString(message, field_desc,
2570                             std::string(key.GetStringValue()));
2571       return;
2572     case FieldDescriptor::CPPTYPE_INT64:
2573       reflection->SetInt64(message, field_desc, key.GetInt64Value());
2574       return;
2575     case FieldDescriptor::CPPTYPE_INT32:
2576       reflection->SetInt32(message, field_desc, key.GetInt32Value());
2577       return;
2578     case FieldDescriptor::CPPTYPE_UINT64:
2579       reflection->SetUInt64(message, field_desc, key.GetUInt64Value());
2580       return;
2581     case FieldDescriptor::CPPTYPE_UINT32:
2582       reflection->SetUInt32(message, field_desc, key.GetUInt32Value());
2583       return;
2584     case FieldDescriptor::CPPTYPE_BOOL:
2585       reflection->SetBool(message, field_desc, key.GetBoolValue());
2586       return;
2587   }
2588 }
2589 
CopyValue(const MapValueRef & value,Message * message,const FieldDescriptor * field_desc)2590 void MapFieldPrinterHelper::CopyValue(const MapValueRef& value,
2591                                       Message* message,
2592                                       const FieldDescriptor* field_desc) {
2593   const Reflection* reflection = message->GetReflection();
2594   switch (field_desc->cpp_type()) {
2595     case FieldDescriptor::CPPTYPE_DOUBLE:
2596       reflection->SetDouble(message, field_desc, value.GetDoubleValue());
2597       return;
2598     case FieldDescriptor::CPPTYPE_FLOAT:
2599       reflection->SetFloat(message, field_desc, value.GetFloatValue());
2600       return;
2601     case FieldDescriptor::CPPTYPE_ENUM:
2602       reflection->SetEnumValue(message, field_desc, value.GetEnumValue());
2603       return;
2604     case FieldDescriptor::CPPTYPE_MESSAGE: {
2605       Message* sub_message = value.GetMessageValue().New();
2606       sub_message->CopyFrom(value.GetMessageValue());
2607       reflection->SetAllocatedMessage(message, sub_message, field_desc);
2608       return;
2609     }
2610     case FieldDescriptor::CPPTYPE_STRING:
2611       reflection->SetString(message, field_desc, value.GetStringValue());
2612       return;
2613     case FieldDescriptor::CPPTYPE_INT64:
2614       reflection->SetInt64(message, field_desc, value.GetInt64Value());
2615       return;
2616     case FieldDescriptor::CPPTYPE_INT32:
2617       reflection->SetInt32(message, field_desc, value.GetInt32Value());
2618       return;
2619     case FieldDescriptor::CPPTYPE_UINT64:
2620       reflection->SetUInt64(message, field_desc, value.GetUInt64Value());
2621       return;
2622     case FieldDescriptor::CPPTYPE_UINT32:
2623       reflection->SetUInt32(message, field_desc, value.GetUInt32Value());
2624       return;
2625     case FieldDescriptor::CPPTYPE_BOOL:
2626       reflection->SetBool(message, field_desc, value.GetBoolValue());
2627       return;
2628   }
2629 }
2630 }  // namespace internal
2631 
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2632 void TextFormat::Printer::PrintField(const Message& message,
2633                                      const Reflection* reflection,
2634                                      const FieldDescriptor* field,
2635                                      BaseTextGenerator* generator) const {
2636   if (use_short_repeated_primitives_ && field->is_repeated() &&
2637       field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
2638       field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
2639     PrintShortRepeatedField(message, reflection, field, generator);
2640     return;
2641   }
2642 
2643   int count = 0;
2644 
2645   if (field->is_repeated()) {
2646     count = reflection->FieldSize(message, field);
2647   } else if (reflection->HasField(message, field) ||
2648              field->containing_type()->options().map_entry()) {
2649     count = 1;
2650   }
2651 
2652   std::vector<const Message*> sorted_map_field;
2653   bool need_release = false;
2654   bool is_map = field->is_map();
2655   if (is_map) {
2656     need_release = internal::MapFieldPrinterHelper::SortMap(
2657         message, reflection, field, &sorted_map_field);
2658   }
2659 
2660   for (int j = 0; j < count; ++j) {
2661     const int field_index = field->is_repeated() ? j : -1;
2662 
2663     PrintFieldName(message, field_index, count, reflection, field, generator);
2664 
2665     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2666       if (TryRedactFieldValue(message, field, generator,
2667                               /*insert_value_separator=*/true)) {
2668         break;
2669       }
2670       const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2671       const Message& sub_message =
2672           field->is_repeated()
2673               ? (is_map ? *sorted_map_field[j]
2674                         : reflection->GetRepeatedMessage(message, field, j))
2675               : reflection->GetMessage(message, field);
2676       printer->PrintMessageStart(sub_message, field_index, count,
2677                                  single_line_mode_, generator);
2678       generator->Indent();
2679       if (!printer->PrintMessageContent(sub_message, field_index, count,
2680                                         single_line_mode_, generator)) {
2681         Print(sub_message, generator);
2682       }
2683       generator->Outdent();
2684       printer->PrintMessageEnd(sub_message, field_index, count,
2685                                single_line_mode_, generator);
2686     } else {
2687       generator->PrintMaybeWithMarker(MarkerToken(), ": ");
2688       // Write the field value.
2689       PrintFieldValue(message, reflection, field, field_index, generator);
2690       if (single_line_mode_) {
2691         generator->PrintLiteral(" ");
2692       } else {
2693         generator->PrintLiteral("\n");
2694       }
2695     }
2696   }
2697 
2698   if (need_release) {
2699     for (const Message* message_to_delete : sorted_map_field) {
2700       delete message_to_delete;
2701     }
2702   }
2703 }
2704 
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2705 void TextFormat::Printer::PrintShortRepeatedField(
2706     const Message& message, const Reflection* reflection,
2707     const FieldDescriptor* field, BaseTextGenerator* generator) const {
2708   // Print primitive repeated field in short form.
2709   int size = reflection->FieldSize(message, field);
2710   PrintFieldName(message, /*field_index=*/-1, /*field_count=*/size, reflection,
2711                  field, generator);
2712   generator->PrintMaybeWithMarker(MarkerToken(), ": ", "[");
2713   for (int i = 0; i < size; i++) {
2714     if (i > 0) generator->PrintLiteral(", ");
2715     PrintFieldValue(message, reflection, field, i, generator);
2716   }
2717   if (single_line_mode_) {
2718     generator->PrintLiteral("] ");
2719   } else {
2720     generator->PrintLiteral("]\n");
2721   }
2722 }
2723 
PrintFieldName(const Message & message,int field_index,int field_count,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2724 void TextFormat::Printer::PrintFieldName(const Message& message,
2725                                          int field_index, int field_count,
2726                                          const Reflection* reflection,
2727                                          const FieldDescriptor* field,
2728                                          BaseTextGenerator* generator) const {
2729   // if use_field_number_ is true, prints field number instead
2730   // of field name.
2731   if (use_field_number_) {
2732     generator->PrintString(absl::StrCat(field->number()));
2733     return;
2734   }
2735 
2736   const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2737   printer->PrintFieldName(message, field_index, field_count, reflection, field,
2738                           generator);
2739 }
2740 
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,BaseTextGenerator * generator) const2741 void TextFormat::Printer::PrintFieldValue(const Message& message,
2742                                           const Reflection* reflection,
2743                                           const FieldDescriptor* field,
2744                                           int index,
2745                                           BaseTextGenerator* generator) const {
2746   ABSL_DCHECK(field->is_repeated() || (index == -1))
2747       << "Index must be -1 for non-repeated fields";
2748 
2749   const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2750   if (TryRedactFieldValue(message, field, generator,
2751                           /*insert_value_separator=*/false)) {
2752     return;
2753   }
2754 
2755   switch (field->cpp_type()) {
2756 #define OUTPUT_FIELD(CPPTYPE, METHOD)                                \
2757   case FieldDescriptor::CPPTYPE_##CPPTYPE:                           \
2758     printer->Print##METHOD(                                          \
2759         field->is_repeated()                                         \
2760             ? reflection->GetRepeated##METHOD(message, field, index) \
2761             : reflection->Get##METHOD(message, field),               \
2762         generator);                                                  \
2763     break
2764 
2765     OUTPUT_FIELD(INT32, Int32);
2766     OUTPUT_FIELD(INT64, Int64);
2767     OUTPUT_FIELD(UINT32, UInt32);
2768     OUTPUT_FIELD(UINT64, UInt64);
2769     OUTPUT_FIELD(FLOAT, Float);
2770     OUTPUT_FIELD(DOUBLE, Double);
2771     OUTPUT_FIELD(BOOL, Bool);
2772 #undef OUTPUT_FIELD
2773 
2774     case FieldDescriptor::CPPTYPE_STRING: {
2775       std::string scratch;
2776       const std::string& value =
2777           field->is_repeated()
2778               ? reflection->GetRepeatedStringReference(message, field, index,
2779                                                        &scratch)
2780               : reflection->GetStringReference(message, field, &scratch);
2781       const std::string* value_to_print = &value;
2782       std::string truncated_value;
2783       if (truncate_string_field_longer_than_ > 0 &&
2784           static_cast<size_t>(truncate_string_field_longer_than_) <
2785               value.size()) {
2786         truncated_value = value.substr(0, truncate_string_field_longer_than_) +
2787                           "...<truncated>...";
2788         value_to_print = &truncated_value;
2789       }
2790       if (field->type() == FieldDescriptor::TYPE_STRING) {
2791         printer->PrintString(*value_to_print, generator);
2792       } else {
2793         ABSL_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
2794         printer->PrintBytes(*value_to_print, generator);
2795       }
2796       break;
2797     }
2798 
2799     case FieldDescriptor::CPPTYPE_ENUM: {
2800       int enum_value =
2801           field->is_repeated()
2802               ? reflection->GetRepeatedEnumValue(message, field, index)
2803               : reflection->GetEnumValue(message, field);
2804       const EnumValueDescriptor* enum_desc =
2805           field->enum_type()->FindValueByNumber(enum_value);
2806       if (enum_desc != nullptr) {
2807         printer->PrintEnum(enum_value, internal::NameOfEnumAsString(enum_desc),
2808                            generator);
2809       } else {
2810         // Ordinarily, enum_desc should not be null, because proto2 has the
2811         // invariant that set enum field values must be in-range, but with the
2812         // new integer-based API for enums (or the RepeatedField<int> loophole),
2813         // it is possible for the user to force an unknown integer value.  So we
2814         // simply use the integer value itself as the enum value name in this
2815         // case.
2816         printer->PrintEnum(enum_value, absl::StrCat(enum_value), generator);
2817       }
2818       break;
2819     }
2820 
2821     case FieldDescriptor::CPPTYPE_MESSAGE:
2822       Print(field->is_repeated()
2823                 ? reflection->GetRepeatedMessage(message, field, index)
2824                 : reflection->GetMessage(message, field),
2825             generator);
2826       break;
2827   }
2828 }
2829 
Print(const Message & message,io::ZeroCopyOutputStream * output)2830 /* static */ bool TextFormat::Print(const Message& message,
2831                                     io::ZeroCopyOutputStream* output) {
2832   return Printer().Print(message, output);
2833 }
2834 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)2835 /* static */ bool TextFormat::PrintUnknownFields(
2836     const UnknownFieldSet& unknown_fields, io::ZeroCopyOutputStream* output) {
2837   return Printer().PrintUnknownFields(unknown_fields, output);
2838 }
2839 
PrintToString(const Message & message,std::string * output)2840 /* static */ bool TextFormat::PrintToString(const Message& message,
2841                                             std::string* output) {
2842   auto printer = Printer();
2843   return printer.PrintToString(message, output);
2844 }
2845 
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output)2846 /* static */ bool TextFormat::PrintUnknownFieldsToString(
2847     const UnknownFieldSet& unknown_fields, std::string* output) {
2848   return Printer().PrintUnknownFieldsToString(unknown_fields, output);
2849 }
2850 
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output)2851 /* static */ void TextFormat::PrintFieldValueToString(
2852     const Message& message, const FieldDescriptor* field, int index,
2853     std::string* output) {
2854   return Printer().PrintFieldValueToString(message, field, index, output);
2855 }
2856 
ParseFieldValueFromString(absl::string_view input,const FieldDescriptor * field,Message * message)2857 /* static */ bool TextFormat::ParseFieldValueFromString(
2858     absl::string_view input, const FieldDescriptor* field, Message* message) {
2859   return Parser().ParseFieldValueFromString(input, field, message);
2860 }
2861 
2862 template <typename... T>
OutOfLinePrintString(BaseTextGenerator * generator,const T &...values)2863 PROTOBUF_NOINLINE void TextFormat::OutOfLinePrintString(
2864     BaseTextGenerator* generator, const T&... values) {
2865   generator->PrintString(absl::StrCat(values...));
2866 }
2867 
PrintUnknownFields(const UnknownFieldSet & unknown_fields,BaseTextGenerator * generator,int recursion_budget) const2868 void TextFormat::Printer::PrintUnknownFields(
2869     const UnknownFieldSet& unknown_fields, BaseTextGenerator* generator,
2870     int recursion_budget) const {
2871   for (int i = 0; i < unknown_fields.field_count(); i++) {
2872     const UnknownField& field = unknown_fields.field(i);
2873 
2874     switch (field.type()) {
2875       case UnknownField::TYPE_VARINT:
2876         OutOfLinePrintString(generator, field.number());
2877         generator->PrintMaybeWithMarker(MarkerToken(), ": ");
2878         if (redact_debug_string_) {
2879           OutOfLinePrintString(generator, "UNKNOWN_VARINT ");
2880           OutOfLinePrintString(generator, kFieldValueReplacement);
2881         } else {
2882           OutOfLinePrintString(generator, field.varint());
2883         }
2884         if (single_line_mode_) {
2885           generator->PrintLiteral(" ");
2886         } else {
2887           generator->PrintLiteral("\n");
2888         }
2889         break;
2890       case UnknownField::TYPE_FIXED32: {
2891         OutOfLinePrintString(generator, field.number());
2892         if (redact_debug_string_) {
2893           generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2894                                           "UNKNOWN_FIXED32 ");
2895           OutOfLinePrintString(generator, kFieldValueReplacement);
2896         } else {
2897           generator->PrintMaybeWithMarker(MarkerToken(), ": ", "0x");
2898           OutOfLinePrintString(generator,
2899                                absl::Hex(field.fixed32(), absl::kZeroPad8));
2900         }
2901         if (single_line_mode_) {
2902           generator->PrintLiteral(" ");
2903         } else {
2904           generator->PrintLiteral("\n");
2905         }
2906         break;
2907       }
2908       case UnknownField::TYPE_FIXED64: {
2909         OutOfLinePrintString(generator, field.number());
2910         if (redact_debug_string_) {
2911           generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2912                                           "UNKNOWN_FIXED64 ");
2913           OutOfLinePrintString(generator, kFieldValueReplacement);
2914         } else {
2915           generator->PrintMaybeWithMarker(MarkerToken(), ": ", "0x");
2916           OutOfLinePrintString(generator,
2917                                absl::Hex(field.fixed64(), absl::kZeroPad16));
2918         }
2919         if (single_line_mode_) {
2920           generator->PrintLiteral(" ");
2921         } else {
2922           generator->PrintLiteral("\n");
2923         }
2924         break;
2925       }
2926       case UnknownField::TYPE_LENGTH_DELIMITED: {
2927         OutOfLinePrintString(generator, field.number());
2928         const absl::string_view value = field.length_delimited();
2929         // We create a CodedInputStream so that we can adhere to our recursion
2930         // budget when we attempt to parse the data. UnknownFieldSet parsing is
2931         // recursive because of groups.
2932         io::CodedInputStream input_stream(
2933             reinterpret_cast<const uint8_t*>(value.data()), value.size());
2934         input_stream.SetRecursionLimit(recursion_budget);
2935         UnknownFieldSet embedded_unknown_fields;
2936         if (!value.empty() && recursion_budget > 0 &&
2937             embedded_unknown_fields.ParseFromCodedStream(&input_stream)) {
2938           // This field is parseable as a Message.
2939           // So it is probably an embedded message.
2940           if (redact_debug_string_) {
2941             generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2942                                             "UNKNOWN_MESSAGE ");
2943             OutOfLinePrintString(generator, kFieldValueReplacement);
2944             if (single_line_mode_) {
2945               generator->PrintLiteral(" ");
2946             } else {
2947               generator->PrintLiteral("\n");
2948             }
2949             break;
2950           }
2951           if (single_line_mode_) {
2952             generator->PrintMaybeWithMarker(MarkerToken(), " ", "{ ");
2953           } else {
2954             generator->PrintMaybeWithMarker(MarkerToken(), " ", "{\n");
2955             generator->Indent();
2956           }
2957           PrintUnknownFields(embedded_unknown_fields, generator,
2958                              recursion_budget - 1);
2959           if (single_line_mode_) {
2960             generator->PrintLiteral("} ");
2961           } else {
2962             generator->Outdent();
2963             generator->PrintLiteral("}\n");
2964           }
2965         } else {
2966           // This field is not parseable as a Message (or we ran out of
2967           // recursion budget). So it is probably just a plain string.
2968           if (redact_debug_string_) {
2969             generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2970                                             "UNKNOWN_STRING ");
2971             OutOfLinePrintString(generator, kFieldValueReplacement);
2972             if (single_line_mode_) {
2973               generator->PrintLiteral(" ");
2974             } else {
2975               generator->PrintLiteral("\n");
2976             }
2977             break;
2978           }
2979           generator->PrintMaybeWithMarker(MarkerToken(), ": ", "\"");
2980           generator->PrintString(absl::CEscape(value));
2981           if (single_line_mode_) {
2982             generator->PrintLiteral("\" ");
2983           } else {
2984             generator->PrintLiteral("\"\n");
2985           }
2986         }
2987         break;
2988       }
2989       case UnknownField::TYPE_GROUP:
2990         OutOfLinePrintString(generator, field.number());
2991         if (redact_debug_string_) {
2992           generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2993                                           "UNKNOWN_GROUP ");
2994           OutOfLinePrintString(generator, kFieldValueReplacement);
2995           if (single_line_mode_) {
2996             generator->PrintLiteral(" ");
2997           } else {
2998             generator->PrintLiteral("\n");
2999           }
3000           break;
3001         }
3002         if (single_line_mode_) {
3003           generator->PrintMaybeWithMarker(MarkerToken(), " ", "{ ");
3004         } else {
3005           generator->PrintMaybeWithMarker(MarkerToken(), " ", "{\n");
3006           generator->Indent();
3007         }
3008         // For groups, we recurse without checking the budget. This is OK,
3009         // because if the groups were too deeply nested then we would have
3010         // already rejected the message when we originally parsed it.
3011         PrintUnknownFields(field.group(), generator, recursion_budget - 1);
3012         if (single_line_mode_) {
3013           generator->PrintLiteral("} ");
3014         } else {
3015           generator->Outdent();
3016           generator->PrintLiteral("}\n");
3017         }
3018         break;
3019     }
3020   }
3021 }
3022 
3023 namespace internal {
3024 
3025 // Check if the field is sensitive and should be redacted.
ShouldRedactField(const FieldDescriptor * field)3026 bool ShouldRedactField(const FieldDescriptor* field) {
3027   if (field->options().debug_redact()) return true;
3028   return false;
3029 }
3030 
3031 }  // namespace internal
3032 
TryRedactFieldValue(const Message & message,const FieldDescriptor * field,BaseTextGenerator * generator,bool insert_value_separator) const3033 bool TextFormat::Printer::TryRedactFieldValue(
3034     const Message& message, const FieldDescriptor* field,
3035     BaseTextGenerator* generator, bool insert_value_separator) const {
3036   if (internal::ShouldRedactField(field)) {
3037     if (redact_debug_string_) {
3038       IncrementRedactedFieldCounter();
3039       if (insert_value_separator) {
3040         generator->PrintMaybeWithMarker(MarkerToken(), ": ");
3041       }
3042       generator->PrintString(kFieldValueReplacement);
3043       if (insert_value_separator) {
3044         if (single_line_mode_) {
3045           generator->PrintLiteral(" ");
3046         } else {
3047           generator->PrintLiteral("\n");
3048         }
3049       }
3050       return true;
3051     }
3052   }
3053   return false;
3054 }
3055 
3056 }  // namespace protobuf
3057 }  // namespace google
3058 
3059 #include "google/protobuf/port_undef.inc"
3060