1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: jschorr@google.com (Joseph Schorr)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11
12 #include "google/protobuf/text_format.h"
13
14 #include <stdio.h>
15
16 #include <algorithm>
17 #include <atomic>
18 #include <climits>
19 #include <cmath>
20 #include <cstddef>
21 #include <cstdint>
22 #include <limits>
23 #include <memory>
24 #include <string>
25 #include <utility>
26 #include <vector>
27
28 #include "absl/container/btree_set.h"
29 #include "absl/log/absl_check.h"
30 #include "absl/strings/ascii.h"
31 #include "absl/strings/cord.h"
32 #include "absl/strings/escaping.h"
33 #include "absl/strings/match.h"
34 #include "absl/strings/numbers.h"
35 #include "absl/strings/str_cat.h"
36 #include "absl/strings/str_format.h"
37 #include "absl/strings/str_join.h"
38 #include "absl/strings/string_view.h"
39 #include "google/protobuf/any.h"
40 #include "google/protobuf/descriptor.h"
41 #include "google/protobuf/descriptor.pb.h"
42 #include "google/protobuf/dynamic_message.h"
43 #include "google/protobuf/io/coded_stream.h"
44 #include "google/protobuf/io/strtod.h"
45 #include "google/protobuf/io/tokenizer.h"
46 #include "google/protobuf/io/zero_copy_stream.h"
47 #include "google/protobuf/io/zero_copy_stream_impl_lite.h"
48 #include "google/protobuf/map_field.h"
49 #include "google/protobuf/message.h"
50 #include "google/protobuf/reflection_mode.h"
51 #include "google/protobuf/repeated_field.h"
52 #include "google/protobuf/unknown_field_set.h"
53 #include "google/protobuf/wire_format_lite.h"
54 #include "utf8_validity.h"
55
56 // Must be included last.
57 #include "google/protobuf/port_def.inc"
58
59 namespace google {
60 namespace protobuf {
61
62 using internal::FieldReporterLevel;
63 using internal::ReflectionMode;
64 using internal::ScopedReflectionMode;
65
66 namespace {
67
68 const absl::string_view kFieldValueReplacement = "[REDACTED]";
69
IsHexNumber(const std::string & str)70 inline bool IsHexNumber(const std::string& str) {
71 return (str.length() >= 2 && str[0] == '0' &&
72 (str[1] == 'x' || str[1] == 'X'));
73 }
74
IsOctNumber(const std::string & str)75 inline bool IsOctNumber(const std::string& str) {
76 return (str.length() >= 2 && str[0] == '0' &&
77 (str[1] >= '0' && str[1] < '8'));
78 }
79
80 // The number of fields that are redacted in AbslStringify.
81 std::atomic<int64_t> num_redacted_field{0};
82
IncrementRedactedFieldCounter()83 inline void IncrementRedactedFieldCounter() {
84 num_redacted_field.fetch_add(1, std::memory_order_relaxed);
85 }
86
TrimTrailingSpace(std::string & debug_string)87 inline void TrimTrailingSpace(std::string& debug_string) {
88 // Single line mode currently might have an extra space at the end.
89 if (!debug_string.empty() && debug_string.back() == ' ') {
90 debug_string.pop_back();
91 }
92 }
93
94 } // namespace
95
96 namespace internal {
97 const char kDebugStringSilentMarker[] = "";
98 const char kDebugStringSilentMarkerForDetection[] = "\t ";
99
100 // Controls insertion of a marker making debug strings non-parseable, and
101 // redacting annotated fields in Protobuf's DebugString APIs.
102 PROTOBUF_EXPORT std::atomic<bool> enable_debug_string_safe_format{false};
103
GetRedactedFieldCount()104 int64_t GetRedactedFieldCount() {
105 return num_redacted_field.load(std::memory_order_relaxed);
106 }
107
108 enum class Option { kNone, kShort, kUTF8 };
109
StringifyMessage(const Message & message,Option option,FieldReporterLevel reporter_level,bool enable_safe_format)110 std::string StringifyMessage(const Message& message, Option option,
111 FieldReporterLevel reporter_level,
112 bool enable_safe_format) {
113 // Indicate all scoped reflection calls are from DebugString function.
114 ScopedReflectionMode scope(ReflectionMode::kDebugString);
115
116 TextFormat::Printer printer;
117 internal::FieldReporterLevel reporter = reporter_level;
118 switch (option) {
119 case Option::kShort:
120 printer.SetSingleLineMode(true);
121 break;
122 case Option::kUTF8:
123 printer.SetUseUtf8StringEscaping(true);
124 break;
125 case Option::kNone:
126 break;
127 }
128 printer.SetExpandAny(true);
129 printer.SetRedactDebugString(enable_safe_format);
130 printer.SetRandomizeDebugString(enable_safe_format);
131 printer.SetReportSensitiveFields(reporter);
132 std::string result;
133 printer.PrintToString(message, &result);
134
135 if (option == Option::kShort) {
136 TrimTrailingSpace(result);
137 }
138
139 return result;
140 }
141
StringifyMessage(const Message & message)142 PROTOBUF_EXPORT std::string StringifyMessage(const Message& message) {
143 return StringifyMessage(message, Option::kNone,
144 FieldReporterLevel::kAbslStringify, true);
145 }
146 } // namespace internal
147
DebugString() const148 std::string Message::DebugString() const {
149 bool enable_safe_format =
150 internal::enable_debug_string_safe_format.load(std::memory_order_relaxed);
151 if (enable_safe_format) {
152 return StringifyMessage(*this, internal::Option::kNone,
153 FieldReporterLevel::kDebugString, true);
154 }
155 // Indicate all scoped reflection calls are from DebugString function.
156 ScopedReflectionMode scope(ReflectionMode::kDebugString);
157 std::string debug_string;
158
159 TextFormat::Printer printer;
160 printer.SetExpandAny(true);
161 printer.SetInsertSilentMarker(true);
162 printer.SetReportSensitiveFields(FieldReporterLevel::kDebugString);
163
164 printer.PrintToString(*this, &debug_string);
165
166 return debug_string;
167 }
168
ShortDebugString() const169 std::string Message::ShortDebugString() const {
170 bool enable_safe_format =
171 internal::enable_debug_string_safe_format.load(std::memory_order_relaxed);
172 if (enable_safe_format) {
173 return StringifyMessage(*this, internal::Option::kShort,
174 FieldReporterLevel::kShortDebugString, true);
175 }
176 // Indicate all scoped reflection calls are from DebugString function.
177 ScopedReflectionMode scope(ReflectionMode::kDebugString);
178 std::string debug_string;
179
180 TextFormat::Printer printer;
181 printer.SetSingleLineMode(true);
182 printer.SetExpandAny(true);
183 printer.SetInsertSilentMarker(true);
184 printer.SetReportSensitiveFields(FieldReporterLevel::kShortDebugString);
185
186 printer.PrintToString(*this, &debug_string);
187 TrimTrailingSpace(debug_string);
188
189 return debug_string;
190 }
191
Utf8DebugString() const192 std::string Message::Utf8DebugString() const {
193 bool enable_safe_format =
194 internal::enable_debug_string_safe_format.load(std::memory_order_relaxed);
195 if (enable_safe_format) {
196 return StringifyMessage(*this, internal::Option::kUTF8,
197 FieldReporterLevel::kUtf8DebugString, true);
198 }
199 // Indicate all scoped reflection calls are from DebugString function.
200 ScopedReflectionMode scope(ReflectionMode::kDebugString);
201 std::string debug_string;
202
203 TextFormat::Printer printer;
204 printer.SetUseUtf8StringEscaping(true);
205 printer.SetExpandAny(true);
206 printer.SetInsertSilentMarker(true);
207 printer.SetReportSensitiveFields(FieldReporterLevel::kUtf8DebugString);
208
209 printer.PrintToString(*this, &debug_string);
210
211 return debug_string;
212 }
213
PrintDebugString() const214 void Message::PrintDebugString() const { printf("%s", DebugString().c_str()); }
215
ShortFormat(const Message & message)216 PROTOBUF_EXPORT std::string ShortFormat(const Message& message) {
217 return internal::StringifyMessage(message, internal::Option::kShort,
218 FieldReporterLevel::kShortFormat, true);
219 }
220
Utf8Format(const Message & message)221 PROTOBUF_EXPORT std::string Utf8Format(const Message& message) {
222 return internal::StringifyMessage(message, internal::Option::kUTF8,
223 FieldReporterLevel::kUtf8Format, true);
224 }
225
226
227 // ===========================================================================
228 // Implementation of the parse information tree class.
RecordLocation(const FieldDescriptor * field,TextFormat::ParseLocationRange range)229 void TextFormat::ParseInfoTree::RecordLocation(
230 const FieldDescriptor* field, TextFormat::ParseLocationRange range) {
231 locations_[field].push_back(range);
232 }
233
CreateNested(const FieldDescriptor * field)234 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
235 const FieldDescriptor* field) {
236 // Owned by us in the map.
237 auto& vec = nested_[field];
238 vec.emplace_back(new TextFormat::ParseInfoTree());
239 return vec.back().get();
240 }
241
CheckFieldIndex(const FieldDescriptor * field,int index)242 void CheckFieldIndex(const FieldDescriptor* field, int index) {
243 if (field == nullptr) {
244 return;
245 }
246
247 if (field->is_repeated() && index == -1) {
248 ABSL_DLOG(FATAL) << "Index must be in range of repeated field values. "
249 << "Field: " << field->name();
250 } else if (!field->is_repeated() && index != -1) {
251 ABSL_DLOG(FATAL) << "Index must be -1 for singular fields."
252 << "Field: " << field->name();
253 }
254 }
255
GetLocationRange(const FieldDescriptor * field,int index) const256 TextFormat::ParseLocationRange TextFormat::ParseInfoTree::GetLocationRange(
257 const FieldDescriptor* field, int index) const {
258 CheckFieldIndex(field, index);
259 if (index == -1) {
260 index = 0;
261 }
262
263 auto it = locations_.find(field);
264 if (it == locations_.end() ||
265 index >= static_cast<int64_t>(it->second.size())) {
266 return TextFormat::ParseLocationRange();
267 }
268
269 return it->second[static_cast<size_t>(index)];
270 }
271
GetTreeForNested(const FieldDescriptor * field,int index) const272 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
273 const FieldDescriptor* field, int index) const {
274 CheckFieldIndex(field, index);
275 if (index == -1) {
276 index = 0;
277 }
278
279 auto it = nested_.find(field);
280 if (it == nested_.end() || index >= static_cast<int64_t>(it->second.size())) {
281 return nullptr;
282 }
283
284 return it->second[static_cast<size_t>(index)].get();
285 }
286
287 namespace {
288 // These functions implement the behavior of the "default" TextFormat::Finder,
289 // they are defined as standalone to be called when finder_ is nullptr.
DefaultFinderFindExtension(Message * message,const std::string & name)290 const FieldDescriptor* DefaultFinderFindExtension(Message* message,
291 const std::string& name) {
292 const Descriptor* descriptor = message->GetDescriptor();
293 return descriptor->file()->pool()->FindExtensionByPrintableName(descriptor,
294 name);
295 }
296
DefaultFinderFindExtensionByNumber(const Descriptor * descriptor,int number)297 const FieldDescriptor* DefaultFinderFindExtensionByNumber(
298 const Descriptor* descriptor, int number) {
299 return descriptor->file()->pool()->FindExtensionByNumber(descriptor, number);
300 }
301
DefaultFinderFindAnyType(const Message & message,const std::string & prefix,const std::string & name)302 const Descriptor* DefaultFinderFindAnyType(const Message& message,
303 const std::string& prefix,
304 const std::string& name) {
305 if (prefix != internal::kTypeGoogleApisComPrefix &&
306 prefix != internal::kTypeGoogleProdComPrefix) {
307 return nullptr;
308 }
309 return message.GetDescriptor()->file()->pool()->FindMessageTypeByName(name);
310 }
311 } // namespace
312
GetUnsetFieldId(const Message & message,const FieldDescriptor & fd)313 auto TextFormat::Parser::UnsetFieldsMetadata::GetUnsetFieldId(
314 const Message& message, const FieldDescriptor& fd) -> Id {
315 return {&message, &fd};
316 }
317
318 // ===========================================================================
319 // Internal class for parsing an ASCII representation of a Protocol Message.
320 // This class makes use of the Protocol Message compiler's tokenizer found
321 // in //third_party/protobuf/io/tokenizer.h. Note that class's Parse
322 // method is *not* thread-safe and should only be used in a single thread at
323 // a time.
324
325 // Makes code slightly more readable. The meaning of "DO(foo)" is
326 // "Execute foo and fail if it fails.", where failure is indicated by
327 // returning false. Borrowed from parser.cc (Thanks Kenton!).
328 #define DO(STATEMENT) \
329 if (STATEMENT) { \
330 } else { \
331 return false; \
332 }
333
334 class TextFormat::Parser::ParserImpl {
335 public:
336 // Determines if repeated values for non-repeated fields and
337 // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
338 // required/optional field named "foo", or "baz: 1 bar: 2"
339 // where "baz" and "bar" are members of the same oneof.
340 enum SingularOverwritePolicy {
341 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
342 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
343 };
344
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,const TextFormat::Finder * finder,ParseInfoTree * parse_info_tree,SingularOverwritePolicy singular_overwrite_policy,bool allow_case_insensitive_field,bool allow_unknown_field,bool allow_unknown_extension,bool allow_unknown_enum,bool allow_field_number,bool allow_relaxed_whitespace,bool allow_partial,int recursion_limit,UnsetFieldsMetadata * no_op_fields)345 ParserImpl(const Descriptor* root_message_type,
346 io::ZeroCopyInputStream* input_stream,
347 io::ErrorCollector* error_collector,
348 const TextFormat::Finder* finder, ParseInfoTree* parse_info_tree,
349 SingularOverwritePolicy singular_overwrite_policy,
350 bool allow_case_insensitive_field, bool allow_unknown_field,
351 bool allow_unknown_extension, bool allow_unknown_enum,
352 bool allow_field_number, bool allow_relaxed_whitespace,
353 bool allow_partial, int recursion_limit,
354 UnsetFieldsMetadata* no_op_fields)
355 : error_collector_(error_collector),
356 finder_(finder),
357 parse_info_tree_(parse_info_tree),
358 tokenizer_error_collector_(this),
359 tokenizer_(input_stream, &tokenizer_error_collector_),
360 root_message_type_(root_message_type),
361 singular_overwrite_policy_(singular_overwrite_policy),
362 allow_case_insensitive_field_(allow_case_insensitive_field),
363 allow_unknown_field_(allow_unknown_field),
364 allow_unknown_extension_(allow_unknown_extension),
365 allow_unknown_enum_(allow_unknown_enum),
366 allow_field_number_(allow_field_number),
367 allow_partial_(allow_partial),
368 initial_recursion_limit_(recursion_limit),
369 recursion_limit_(recursion_limit),
370 had_silent_marker_(false),
371 had_errors_(false),
372 no_op_fields_(no_op_fields) {
373 // For backwards-compatibility with proto1, we need to allow the 'f' suffix
374 // for floats.
375 tokenizer_.set_allow_f_after_float(true);
376
377 // '#' starts a comment.
378 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
379
380 if (allow_relaxed_whitespace) {
381 tokenizer_.set_require_space_after_number(false);
382 tokenizer_.set_allow_multiline_strings(true);
383 }
384
385 // Consume the starting token.
386 tokenizer_.Next();
387 }
388 ParserImpl(const ParserImpl&) = delete;
389 ParserImpl& operator=(const ParserImpl&) = delete;
~ParserImpl()390 ~ParserImpl() {}
391
392 // Parses the ASCII representation specified in input and saves the
393 // information into the output pointer (a Message). Returns
394 // false if an error occurs (an error will also be logged to
395 // ABSL_LOG(ERROR)).
Parse(Message * output)396 bool Parse(Message* output) {
397 // Consume fields until we cannot do so anymore.
398 while (true) {
399 if (LookingAtType(io::Tokenizer::TYPE_END)) {
400 // Ensures recursion limit properly unwinded, but only for success
401 // cases. This implicitly avoids the check when `Parse` returns false
402 // via `DO(...)`.
403 ABSL_DCHECK(had_errors_ || recursion_limit_ == initial_recursion_limit_)
404 << "Recursion limit at end of parse should be "
405 << initial_recursion_limit_ << ", but was " << recursion_limit_
406 << ". Difference of " << initial_recursion_limit_ - recursion_limit_
407 << " stack frames not accounted for stack unwind.";
408
409 return !had_errors_;
410 }
411
412 DO(ConsumeField(output));
413 }
414 }
415
ParseField(const FieldDescriptor * field,Message * output)416 bool ParseField(const FieldDescriptor* field, Message* output) {
417 bool suc;
418 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
419 suc = ConsumeFieldMessage(output, output->GetReflection(), field);
420 } else {
421 suc = ConsumeFieldValue(output, output->GetReflection(), field);
422 }
423 return suc && LookingAtType(io::Tokenizer::TYPE_END);
424 }
425
ReportError(int line,int col,absl::string_view message)426 void ReportError(int line, int col, absl::string_view message) {
427 had_errors_ = true;
428 if (error_collector_ == nullptr) {
429 if (line >= 0) {
430 ABSL_LOG(ERROR) << "Error parsing text-format "
431 << root_message_type_->full_name() << ": " << (line + 1)
432 << ":" << (col + 1) << ": " << message;
433 } else {
434 ABSL_LOG(ERROR) << "Error parsing text-format "
435 << root_message_type_->full_name() << ": " << message;
436 }
437 } else {
438 error_collector_->RecordError(line, col, message);
439 }
440 }
441
ReportWarning(int line,int col,const absl::string_view message)442 void ReportWarning(int line, int col, const absl::string_view message) {
443 if (error_collector_ == nullptr) {
444 if (line >= 0) {
445 ABSL_LOG_EVERY_POW_2(WARNING)
446 << "Warning parsing text-format " << root_message_type_->full_name()
447 << ": " << (line + 1) << ":" << (col + 1) << " (N = " << COUNTER
448 << "): " << message;
449 } else {
450 ABSL_LOG_EVERY_POW_2(WARNING)
451 << "Warning parsing text-format " << root_message_type_->full_name()
452 << " (N = " << COUNTER << "): " << message;
453 }
454 } else {
455 error_collector_->RecordWarning(line, col, message);
456 }
457 }
458
459 private:
460 static constexpr int32_t kint32max = std::numeric_limits<int32_t>::max();
461 static constexpr uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
462 static constexpr int64_t kint64min = std::numeric_limits<int64_t>::min();
463 static constexpr int64_t kint64max = std::numeric_limits<int64_t>::max();
464 static constexpr uint64_t kuint64max = std::numeric_limits<uint64_t>::max();
465
466 // Reports an error with the given message with information indicating
467 // the position (as derived from the current token).
ReportError(absl::string_view message)468 void ReportError(absl::string_view message) {
469 ReportError(tokenizer_.current().line, tokenizer_.current().column,
470 message);
471 }
472
473 // Reports a warning with the given message with information indicating
474 // the position (as derived from the current token).
ReportWarning(absl::string_view message)475 void ReportWarning(absl::string_view message) {
476 ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
477 message);
478 }
479
480 // Consumes the specified message with the given starting delimiter.
481 // This method checks to see that the end delimiter at the conclusion of
482 // the consumption matches the starting delimiter passed in here.
ConsumeMessage(Message * message,const std::string delimiter)483 bool ConsumeMessage(Message* message, const std::string delimiter) {
484 while (!LookingAt(">") && !LookingAt("}")) {
485 DO(ConsumeField(message));
486 }
487
488 // Confirm that we have a valid ending delimiter.
489 DO(Consume(delimiter));
490 return true;
491 }
492
493 // Consume either "<" or "{".
ConsumeMessageDelimiter(std::string * delimiter)494 bool ConsumeMessageDelimiter(std::string* delimiter) {
495 if (TryConsume("<")) {
496 *delimiter = ">";
497 } else {
498 DO(Consume("{"));
499 *delimiter = "}";
500 }
501 return true;
502 }
503
504 // Consumes the current field (as returned by the tokenizer) on the
505 // passed in message.
ConsumeField(Message * message)506 bool ConsumeField(Message* message) {
507 const Reflection* reflection = message->GetReflection();
508 const Descriptor* descriptor = message->GetDescriptor();
509
510 std::string field_name;
511 bool reserved_field = false;
512 const FieldDescriptor* field = nullptr;
513 int start_line = tokenizer_.current().line;
514 int start_column = tokenizer_.current().column;
515
516 const FieldDescriptor* any_type_url_field;
517 const FieldDescriptor* any_value_field;
518 if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field,
519 &any_value_field) &&
520 TryConsume("[")) {
521 std::string full_type_name, prefix;
522 DO(ConsumeAnyTypeUrl(&full_type_name, &prefix));
523 std::string prefix_and_full_type_name =
524 absl::StrCat(prefix, full_type_name);
525 DO(ConsumeBeforeWhitespace("]"));
526 TryConsumeWhitespace();
527 // ':' is optional between message labels and values.
528 if (TryConsumeBeforeWhitespace(":")) {
529 TryConsumeWhitespace();
530 }
531 std::string serialized_value;
532 const Descriptor* value_descriptor =
533 finder_ ? finder_->FindAnyType(*message, prefix, full_type_name)
534 : DefaultFinderFindAnyType(*message, prefix, full_type_name);
535 if (value_descriptor == nullptr) {
536 ReportError(absl::StrCat("Could not find type \"",
537 prefix_and_full_type_name,
538 "\" stored in google.protobuf.Any."));
539 return false;
540 }
541 DO(ConsumeAnyValue(value_descriptor, &serialized_value));
542 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
543 // Fail if any_type_url_field has already been specified.
544 if ((!any_type_url_field->is_repeated() &&
545 reflection->HasField(*message, any_type_url_field)) ||
546 (!any_value_field->is_repeated() &&
547 reflection->HasField(*message, any_value_field))) {
548 ReportError("Non-repeated Any specified multiple times.");
549 return false;
550 }
551 }
552 reflection->SetString(message, any_type_url_field,
553 std::move(prefix_and_full_type_name));
554 reflection->SetString(message, any_value_field,
555 std::move(serialized_value));
556 return true;
557 }
558 if (TryConsume("[")) {
559 // Extension.
560 DO(ConsumeFullTypeName(&field_name));
561 DO(ConsumeBeforeWhitespace("]"));
562 TryConsumeWhitespace();
563
564 field = finder_ ? finder_->FindExtension(message, field_name)
565 : DefaultFinderFindExtension(message, field_name);
566
567 if (field == nullptr) {
568 if (!allow_unknown_field_ && !allow_unknown_extension_) {
569 ReportError(absl::StrCat("Extension \"", field_name,
570 "\" is not defined or "
571 "is not an extension of \"",
572 descriptor->full_name(), "\"."));
573 return false;
574 } else {
575 ReportWarning(absl::StrCat(
576 "Ignoring extension \"", field_name,
577 "\" which is not defined or is not an extension of \"",
578 descriptor->full_name(), "\"."));
579 }
580 }
581 } else {
582 DO(ConsumeIdentifierBeforeWhitespace(&field_name));
583 TryConsumeWhitespace();
584
585 int32_t field_number;
586 if (allow_field_number_ && absl::SimpleAtoi(field_name, &field_number)) {
587 if (descriptor->IsExtensionNumber(field_number)) {
588 field = finder_
589 ? finder_->FindExtensionByNumber(descriptor, field_number)
590 : DefaultFinderFindExtensionByNumber(descriptor,
591 field_number);
592 } else if (descriptor->IsReservedNumber(field_number)) {
593 reserved_field = true;
594 } else {
595 field = descriptor->FindFieldByNumber(field_number);
596 }
597 } else {
598 field = descriptor->FindFieldByName(field_name);
599 // Group-like delimited fields will accept both the capitalized type
600 // names as well.
601 if (field == nullptr) {
602 std::string lower_field_name = field_name;
603 absl::AsciiStrToLower(&lower_field_name);
604 field = descriptor->FindFieldByName(lower_field_name);
605 // If the case-insensitive match worked but the field is NOT a group,
606 if (field != nullptr && !internal::cpp::IsGroupLike(*field)) {
607 field = nullptr;
608 }
609 if (field != nullptr && field->message_type()->name() != field_name) {
610 field = nullptr;
611 }
612 }
613
614 if (field == nullptr && allow_case_insensitive_field_) {
615 std::string lower_field_name = field_name;
616 absl::AsciiStrToLower(&lower_field_name);
617 field = descriptor->FindFieldByLowercaseName(lower_field_name);
618 }
619
620 if (field == nullptr) {
621 reserved_field = descriptor->IsReservedName(field_name);
622 }
623 }
624 if (field == nullptr && !reserved_field) {
625 if (!allow_unknown_field_) {
626 ReportError(absl::StrCat("Message type \"", descriptor->full_name(),
627 "\" has no field named \"", field_name,
628 "\"."));
629 return false;
630 } else {
631 ReportWarning(absl::StrCat("Message type \"", descriptor->full_name(),
632 "\" has no field named \"", field_name,
633 "\"."));
634 }
635 }
636 }
637
638 // Skips unknown or reserved fields.
639 if (field == nullptr) {
640 ABSL_CHECK(allow_unknown_field_ || allow_unknown_extension_ ||
641 reserved_field);
642
643 // Try to guess the type of this field.
644 // If this field is not a message, there should be a ":" between the
645 // field name and the field value and also the field value should not
646 // start with "{" or "<" which indicates the beginning of a message body.
647 // If there is no ":" or there is a "{" or "<" after ":", this field has
648 // to be a message or the input is ill-formed.
649 if (TryConsumeBeforeWhitespace(":")) {
650 TryConsumeWhitespace();
651 if (!LookingAt("{") && !LookingAt("<")) {
652 return SkipFieldValue();
653 }
654 }
655 return SkipFieldMessage();
656 }
657
658 if (field->options().deprecated()) {
659 ReportWarning(absl::StrCat("text format contains deprecated field \"",
660 field_name, "\""));
661 }
662
663 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
664 // Fail if the field is not repeated and it has already been specified.
665 if (!field->is_repeated() && reflection->HasField(*message, field)) {
666 ReportError(absl::StrCat("Non-repeated field \"", field_name,
667 "\" is specified multiple times."));
668 return false;
669 }
670 // Fail if the field is a member of a oneof and another member has already
671 // been specified.
672 const OneofDescriptor* oneof = field->containing_oneof();
673 if (oneof != nullptr && reflection->HasOneof(*message, oneof)) {
674 const FieldDescriptor* other_field =
675 reflection->GetOneofFieldDescriptor(*message, oneof);
676 ReportError(absl::StrCat("Field \"", field_name,
677 "\" is specified along with "
678 "field \"",
679 other_field->name(),
680 "\", another member "
681 "of oneof \"",
682 oneof->name(), "\"."));
683 return false;
684 }
685 }
686
687 // Perform special handling for embedded message types.
688 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
689 // ':' is optional here.
690 bool consumed_semicolon = TryConsumeBeforeWhitespace(":");
691 if (consumed_semicolon) {
692 TryConsumeWhitespace();
693 }
694 if (consumed_semicolon && field->options().weak() &&
695 LookingAtType(io::Tokenizer::TYPE_STRING)) {
696 // we are getting a bytes string for a weak field.
697 std::string tmp;
698 DO(ConsumeString(&tmp));
699 MessageFactory* factory =
700 finder_ ? finder_->FindExtensionFactory(field) : nullptr;
701 reflection->MutableMessage(message, field, factory)
702 ->ParseFromString(tmp);
703 goto label_skip_parsing;
704 }
705 } else {
706 // ':' is required here.
707 DO(ConsumeBeforeWhitespace(":"));
708 TryConsumeWhitespace();
709 }
710
711 if (field->is_repeated() && TryConsume("[")) {
712 // Short repeated format, e.g. "foo: [1, 2, 3]".
713 if (!TryConsume("]")) {
714 // "foo: []" is treated as empty.
715 while (true) {
716 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
717 // Perform special handling for embedded message types.
718 DO(ConsumeFieldMessage(message, reflection, field));
719 } else {
720 DO(ConsumeFieldValue(message, reflection, field));
721 }
722 if (TryConsume("]")) {
723 break;
724 }
725 DO(Consume(","));
726 }
727 }
728 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
729 DO(ConsumeFieldMessage(message, reflection, field));
730 } else {
731 DO(ConsumeFieldValue(message, reflection, field));
732 }
733 label_skip_parsing:
734 // For historical reasons, fields may optionally be separated by commas or
735 // semicolons.
736 TryConsume(";") || TryConsume(",");
737
738 // If a parse info tree exists, add the location for the parsed
739 // field.
740 if (parse_info_tree_ != nullptr) {
741 int end_line = tokenizer_.previous().line;
742 int end_column = tokenizer_.previous().end_column;
743
744 RecordLocation(parse_info_tree_, field,
745 ParseLocationRange(ParseLocation(start_line, start_column),
746 ParseLocation(end_line, end_column)));
747 }
748
749 return true;
750 }
751
752 // Skips the next field including the field's name and value.
SkipField()753 bool SkipField() {
754 std::string field_name;
755 if (TryConsume("[")) {
756 // Extension name or type URL.
757 DO(ConsumeTypeUrlOrFullTypeName(&field_name));
758 DO(ConsumeBeforeWhitespace("]"));
759 } else {
760 DO(ConsumeIdentifierBeforeWhitespace(&field_name));
761 }
762 TryConsumeWhitespace();
763
764 // Try to guess the type of this field.
765 // If this field is not a message, there should be a ":" between the
766 // field name and the field value and also the field value should not
767 // start with "{" or "<" which indicates the beginning of a message body.
768 // If there is no ":" or there is a "{" or "<" after ":", this field has
769 // to be a message or the input is ill-formed.
770 if (TryConsumeBeforeWhitespace(":")) {
771 TryConsumeWhitespace();
772 if (!LookingAt("{") && !LookingAt("<")) {
773 DO(SkipFieldValue());
774 } else {
775 DO(SkipFieldMessage());
776 }
777 } else {
778 DO(SkipFieldMessage());
779 }
780 // For historical reasons, fields may optionally be separated by commas or
781 // semicolons.
782 TryConsume(";") || TryConsume(",");
783 return true;
784 }
785
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)786 bool ConsumeFieldMessage(Message* message, const Reflection* reflection,
787 const FieldDescriptor* field) {
788 if (--recursion_limit_ < 0) {
789 ReportError(
790 absl::StrCat("Message is too deep, the parser exceeded the "
791 "configured recursion limit of ",
792 initial_recursion_limit_, "."));
793 return false;
794 }
795 // If the parse information tree is not nullptr, create a nested one
796 // for the nested message.
797 ParseInfoTree* parent = parse_info_tree_;
798 if (parent != nullptr) {
799 parse_info_tree_ = CreateNested(parent, field);
800 }
801
802 std::string delimiter;
803 DO(ConsumeMessageDelimiter(&delimiter));
804 MessageFactory* factory =
805 finder_ ? finder_->FindExtensionFactory(field) : nullptr;
806 if (field->is_repeated()) {
807 DO(ConsumeMessage(reflection->AddMessage(message, field, factory),
808 delimiter));
809 } else {
810 DO(ConsumeMessage(reflection->MutableMessage(message, field, factory),
811 delimiter));
812 }
813
814 ++recursion_limit_;
815
816 // Reset the parse information tree.
817 parse_info_tree_ = parent;
818 return true;
819 }
820
821 // Skips the whole body of a message including the beginning delimiter and
822 // the ending delimiter.
SkipFieldMessage()823 bool SkipFieldMessage() {
824 if (--recursion_limit_ < 0) {
825 ReportError(
826 absl::StrCat("Message is too deep, the parser exceeded the "
827 "configured recursion limit of ",
828 initial_recursion_limit_, "."));
829 return false;
830 }
831
832 std::string delimiter;
833 DO(ConsumeMessageDelimiter(&delimiter));
834 while (!LookingAt(">") && !LookingAt("}")) {
835 DO(SkipField());
836 }
837 DO(Consume(delimiter));
838
839 ++recursion_limit_;
840 return true;
841 }
842
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)843 bool ConsumeFieldValue(Message* message, const Reflection* reflection,
844 const FieldDescriptor* field) {
845 // Define an easy to use macro for setting fields. This macro checks
846 // to see if the field is repeated (in which case we need to use the Add
847 // methods or not (in which case we need to use the Set methods).
848 // When checking for no-op operations, We verify that both the existing value in
849 // the message and the new value are the default. If the existing field value is
850 // not the default, setting it to the default should not be treated as a no-op.
851 // The pointer of this is kept in no_op_fields_ for bookkeeping.
852 #define SET_FIELD(CPPTYPE, CPPTYPELCASE, VALUE) \
853 if (field->is_repeated()) { \
854 reflection->Add##CPPTYPE(message, field, VALUE); \
855 } else { \
856 if (no_op_fields_ && !field->has_presence() && \
857 field->default_value_##CPPTYPELCASE() == \
858 reflection->Get##CPPTYPE(*message, field) && \
859 field->default_value_##CPPTYPELCASE() == VALUE) { \
860 no_op_fields_->ids_.insert( \
861 UnsetFieldsMetadata::GetUnsetFieldId(*message, *field)); \
862 } else { \
863 reflection->Set##CPPTYPE(message, field, std::move(VALUE)); \
864 } \
865 }
866
867 switch (field->cpp_type()) {
868 case FieldDescriptor::CPPTYPE_INT32: {
869 int64_t value;
870 DO(ConsumeSignedInteger(&value, kint32max));
871 SET_FIELD(Int32, int32, static_cast<int32_t>(value));
872 break;
873 }
874
875 case FieldDescriptor::CPPTYPE_UINT32: {
876 uint64_t value;
877 DO(ConsumeUnsignedInteger(&value, kuint32max));
878 SET_FIELD(UInt32, uint32, static_cast<uint32_t>(value));
879 break;
880 }
881
882 case FieldDescriptor::CPPTYPE_INT64: {
883 int64_t value;
884 DO(ConsumeSignedInteger(&value, kint64max));
885 SET_FIELD(Int64, int64, value);
886 break;
887 }
888
889 case FieldDescriptor::CPPTYPE_UINT64: {
890 uint64_t value;
891 DO(ConsumeUnsignedInteger(&value, kuint64max));
892 SET_FIELD(UInt64, uint64, value);
893 break;
894 }
895
896 case FieldDescriptor::CPPTYPE_FLOAT: {
897 double value;
898 DO(ConsumeDouble(&value));
899 SET_FIELD(Float, float, io::SafeDoubleToFloat(value));
900 break;
901 }
902
903 case FieldDescriptor::CPPTYPE_DOUBLE: {
904 double value;
905 DO(ConsumeDouble(&value));
906 SET_FIELD(Double, double, value);
907 break;
908 }
909
910 case FieldDescriptor::CPPTYPE_STRING: {
911 std::string value;
912 DO(ConsumeString(&value));
913 SET_FIELD(String, string, std::move(value));
914 break;
915 }
916
917 case FieldDescriptor::CPPTYPE_BOOL: {
918 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
919 uint64_t value;
920 DO(ConsumeUnsignedInteger(&value, 1));
921 SET_FIELD(Bool, bool, static_cast<bool>(value));
922 } else {
923 std::string value;
924 DO(ConsumeIdentifier(&value));
925 if (value == "true" || value == "True" || value == "t") {
926 SET_FIELD(Bool, bool, true);
927 } else if (value == "false" || value == "False" || value == "f") {
928 SET_FIELD(Bool, bool, false);
929 } else {
930 ReportError(absl::StrCat("Invalid value for boolean field \"",
931 field->name(), "\". Value: \"", value,
932 "\"."));
933 return false;
934 }
935 }
936 break;
937 }
938
939 case FieldDescriptor::CPPTYPE_ENUM: {
940 std::string value;
941 int64_t int_value = kint64max;
942 const EnumDescriptor* enum_type = field->enum_type();
943 const EnumValueDescriptor* enum_value = nullptr;
944
945 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
946 DO(ConsumeIdentifier(&value));
947 // Find the enumeration value.
948 enum_value = enum_type->FindValueByName(value);
949
950 } else if (LookingAt("-") ||
951 LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
952 DO(ConsumeSignedInteger(&int_value, kint32max));
953 value = absl::StrCat(int_value); // for error reporting
954 enum_value = enum_type->FindValueByNumber(int_value);
955 } else {
956 ReportError(absl::StrCat("Expected integer or identifier, got: ",
957 tokenizer_.current().text));
958 return false;
959 }
960
961 if (enum_value == nullptr) {
962 if (int_value != kint64max &&
963 !field->legacy_enum_field_treated_as_closed()) {
964 SET_FIELD(EnumValue, int64, int_value);
965 return true;
966 } else if (!allow_unknown_enum_) {
967 ReportError(absl::StrCat("Unknown enumeration value of \"", value,
968 "\" for field \"", field->name(), "\"."));
969 return false;
970 } else {
971 ReportWarning(absl::StrCat("Unknown enumeration value of \"", value,
972 "\" for field \"", field->name(),
973 "\"."));
974 return true;
975 }
976 }
977
978 SET_FIELD(Enum, enum, enum_value);
979 break;
980 }
981
982 case FieldDescriptor::CPPTYPE_MESSAGE: {
983 // We should never get here. Put here instead of a default
984 // so that if new types are added, we get a nice compiler warning.
985 ABSL_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
986 break;
987 }
988 }
989 #undef SET_FIELD
990 return true;
991 }
992
SkipFieldValue()993 bool SkipFieldValue() {
994 if (--recursion_limit_ < 0) {
995 ReportError(
996 absl::StrCat("Message is too deep, the parser exceeded the "
997 "configured recursion limit of ",
998 initial_recursion_limit_, "."));
999 return false;
1000 }
1001
1002 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1003 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1004 tokenizer_.Next();
1005 }
1006 ++recursion_limit_;
1007 return true;
1008 }
1009 if (TryConsume("[")) {
1010 if (!TryConsume("]")) {
1011 while (true) {
1012 if (!LookingAt("{") && !LookingAt("<")) {
1013 DO(SkipFieldValue());
1014 } else {
1015 DO(SkipFieldMessage());
1016 }
1017 if (TryConsume("]")) {
1018 break;
1019 }
1020 DO(Consume(","));
1021 }
1022 }
1023 ++recursion_limit_;
1024 return true;
1025 }
1026 // Possible field values other than string:
1027 // 12345 => TYPE_INTEGER
1028 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
1029 // 1.2345 => TYPE_FLOAT
1030 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
1031 // inf => TYPE_IDENTIFIER
1032 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
1033 // TYPE_INTEGER => TYPE_IDENTIFIER
1034 // Divides them into two group, one with TYPE_SYMBOL
1035 // and the other without:
1036 // Group one:
1037 // 12345 => TYPE_INTEGER
1038 // 1.2345 => TYPE_FLOAT
1039 // inf => TYPE_IDENTIFIER
1040 // TYPE_INTEGER => TYPE_IDENTIFIER
1041 // Group two:
1042 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
1043 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
1044 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
1045 // As we can see, the field value consists of an optional '-' and one of
1046 // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
1047 bool has_minus = TryConsume("-");
1048 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
1049 !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
1050 !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1051 std::string text = tokenizer_.current().text;
1052 ReportError(
1053 absl::StrCat("Cannot skip field value, unexpected token: ", text));
1054 ++recursion_limit_;
1055 return false;
1056 }
1057 // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
1058 // value while other combinations all generate valid values.
1059 // We check if the value of this combination is valid here.
1060 // TYPE_IDENTIFIER after a '-' should be one of the float values listed
1061 // below:
1062 // inf, inff, infinity, nan
1063 if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1064 std::string text = tokenizer_.current().text;
1065 absl::AsciiStrToLower(&text);
1066 if (text != "inf" &&
1067 text != "infinity" && text != "nan") {
1068 ReportError(absl::StrCat("Invalid float number: ", text));
1069 ++recursion_limit_;
1070 return false;
1071 }
1072 }
1073 tokenizer_.Next();
1074 ++recursion_limit_;
1075 return true;
1076 }
1077
1078 // Returns true if the current token's text is equal to that specified.
LookingAt(const std::string & text)1079 bool LookingAt(const std::string& text) {
1080 return tokenizer_.current().text == text;
1081 }
1082
1083 // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)1084 bool LookingAtType(io::Tokenizer::TokenType token_type) {
1085 return tokenizer_.current().type == token_type;
1086 }
1087
1088 // Consumes an identifier and saves its value in the identifier parameter.
1089 // Returns false if the token is not of type IDENTIFIER.
ConsumeIdentifier(std::string * identifier)1090 bool ConsumeIdentifier(std::string* identifier) {
1091 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1092 *identifier = tokenizer_.current().text;
1093 tokenizer_.Next();
1094 return true;
1095 }
1096
1097 // If allow_field_number_ or allow_unknown_field_ is true, we should able
1098 // to parse integer identifiers.
1099 if ((allow_field_number_ || allow_unknown_field_ ||
1100 allow_unknown_extension_) &&
1101 LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1102 *identifier = tokenizer_.current().text;
1103 tokenizer_.Next();
1104 return true;
1105 }
1106
1107 ReportError(
1108 absl::StrCat("Expected identifier, got: ", tokenizer_.current().text));
1109 return false;
1110 }
1111
1112 // Similar to `ConsumeIdentifier`, but any following whitespace token may
1113 // be reported.
ConsumeIdentifierBeforeWhitespace(std::string * identifier)1114 bool ConsumeIdentifierBeforeWhitespace(std::string* identifier) {
1115 tokenizer_.set_report_whitespace(true);
1116 bool result = ConsumeIdentifier(identifier);
1117 tokenizer_.set_report_whitespace(false);
1118 return result;
1119 }
1120
1121 // Consume a string of form "<id1>.<id2>....<idN>".
ConsumeFullTypeName(std::string * name)1122 bool ConsumeFullTypeName(std::string* name) {
1123 DO(ConsumeIdentifier(name));
1124 while (TryConsume(".")) {
1125 std::string part;
1126 DO(ConsumeIdentifier(&part));
1127 absl::StrAppend(name, ".", part);
1128 }
1129 return true;
1130 }
1131
ConsumeTypeUrlOrFullTypeName(std::string * name)1132 bool ConsumeTypeUrlOrFullTypeName(std::string* name) {
1133 DO(ConsumeIdentifier(name));
1134 while (true) {
1135 std::string connector;
1136 if (TryConsume(".")) {
1137 connector = ".";
1138 } else if (TryConsume("/")) {
1139 connector = "/";
1140 } else {
1141 break;
1142 }
1143 std::string part;
1144 DO(ConsumeIdentifier(&part));
1145 *name += connector;
1146 *name += part;
1147 }
1148 return true;
1149 }
1150
1151 // Consumes a string and saves its value in the text parameter.
1152 // Returns false if the token is not of type STRING.
ConsumeString(std::string * text)1153 bool ConsumeString(std::string* text) {
1154 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
1155 ReportError(
1156 absl::StrCat("Expected string, got: ", tokenizer_.current().text));
1157 return false;
1158 }
1159
1160 text->clear();
1161 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1162 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
1163
1164 tokenizer_.Next();
1165 }
1166
1167 return true;
1168 }
1169
1170 // Consumes a uint64_t and saves its value in the value parameter.
1171 // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64_t * value,uint64_t max_value)1172 bool ConsumeUnsignedInteger(uint64_t* value, uint64_t max_value) {
1173 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1174 ReportError(
1175 absl::StrCat("Expected integer, got: ", tokenizer_.current().text));
1176 return false;
1177 }
1178
1179 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text, max_value,
1180 value)) {
1181 ReportError(absl::StrCat("Integer out of range (",
1182 tokenizer_.current().text, ")"));
1183 return false;
1184 }
1185
1186 tokenizer_.Next();
1187 return true;
1188 }
1189
1190 // Consumes an int64_t and saves its value in the value parameter.
1191 // Note that since the tokenizer does not support negative numbers,
1192 // we actually may consume an additional token (for the minus sign) in this
1193 // method. Returns false if the token is not an integer
1194 // (signed or otherwise).
ConsumeSignedInteger(int64_t * value,uint64_t max_value)1195 bool ConsumeSignedInteger(int64_t* value, uint64_t max_value) {
1196 bool negative = false;
1197
1198 if (TryConsume("-")) {
1199 negative = true;
1200 // Two's complement always allows one more negative integer than
1201 // positive.
1202 ++max_value;
1203 }
1204
1205 uint64_t unsigned_value;
1206
1207 DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
1208
1209 if (negative) {
1210 if ((static_cast<uint64_t>(kint64max) + 1) == unsigned_value) {
1211 *value = kint64min;
1212 } else {
1213 *value = -static_cast<int64_t>(unsigned_value);
1214 }
1215 } else {
1216 *value = static_cast<int64_t>(unsigned_value);
1217 }
1218
1219 return true;
1220 }
1221
1222 // Consumes a double and saves its value in the value parameter.
1223 // Accepts decimal numbers only, rejects hex or oct numbers.
ConsumeUnsignedDecimalAsDouble(double * value,uint64_t max_value)1224 bool ConsumeUnsignedDecimalAsDouble(double* value, uint64_t max_value) {
1225 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1226 ReportError(
1227 absl::StrCat("Expected integer, got: ", tokenizer_.current().text));
1228 return false;
1229 }
1230
1231 const std::string& text = tokenizer_.current().text;
1232 if (IsHexNumber(text) || IsOctNumber(text)) {
1233 ReportError(absl::StrCat("Expect a decimal number, got: ", text));
1234 return false;
1235 }
1236
1237 uint64_t uint64_value;
1238 if (io::Tokenizer::ParseInteger(text, max_value, &uint64_value)) {
1239 *value = static_cast<double>(uint64_value);
1240 } else {
1241 // Uint64 overflow, attempt to parse as a double instead.
1242 *value = io::Tokenizer::ParseFloat(text);
1243 }
1244
1245 tokenizer_.Next();
1246 return true;
1247 }
1248
1249 // Consumes a double and saves its value in the value parameter.
1250 // Note that since the tokenizer does not support negative numbers,
1251 // we actually may consume an additional token (for the minus sign) in this
1252 // method. Returns false if the token is not a double
1253 // (signed or otherwise).
ConsumeDouble(double * value)1254 bool ConsumeDouble(double* value) {
1255 bool negative = false;
1256
1257 if (TryConsume("-")) {
1258 negative = true;
1259 }
1260
1261 // A double can actually be an integer, according to the tokenizer.
1262 // Therefore, we must check both cases here.
1263 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1264 // We have found an integer value for the double.
1265 DO(ConsumeUnsignedDecimalAsDouble(value, kuint64max));
1266 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
1267 // We have found a float value for the double.
1268 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
1269
1270 // Mark the current token as consumed.
1271 tokenizer_.Next();
1272 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1273 std::string text = tokenizer_.current().text;
1274 absl::AsciiStrToLower(&text);
1275 if (text == "inf" ||
1276 text == "infinity") {
1277 *value = std::numeric_limits<double>::infinity();
1278 tokenizer_.Next();
1279 } else if (text == "nan") {
1280 *value = std::numeric_limits<double>::quiet_NaN();
1281 tokenizer_.Next();
1282 } else {
1283 ReportError(absl::StrCat("Expected double, got: ", text));
1284 return false;
1285 }
1286 } else {
1287 ReportError(
1288 absl::StrCat("Expected double, got: ", tokenizer_.current().text));
1289 return false;
1290 }
1291
1292 if (negative) {
1293 *value = -*value;
1294 }
1295
1296 return true;
1297 }
1298
1299 // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name"
1300 // or "type.googleprod.com/full.type.Name"
ConsumeAnyTypeUrl(std::string * full_type_name,std::string * prefix)1301 bool ConsumeAnyTypeUrl(std::string* full_type_name, std::string* prefix) {
1302 // TODO Extend Consume() to consume multiple tokens at once, so that
1303 // this code can be written as just DO(Consume(kGoogleApisTypePrefix)).
1304 DO(ConsumeIdentifier(prefix));
1305 while (TryConsume(".")) {
1306 std::string url;
1307 DO(ConsumeIdentifier(&url));
1308 absl::StrAppend(prefix, ".", url);
1309 }
1310 DO(Consume("/"));
1311 absl::StrAppend(prefix, "/");
1312 DO(ConsumeFullTypeName(full_type_name));
1313
1314 return true;
1315 }
1316
1317 // A helper function for reconstructing Any::value. Consumes a text of
1318 // full_type_name, then serializes it into serialized_value.
ConsumeAnyValue(const Descriptor * value_descriptor,std::string * serialized_value)1319 bool ConsumeAnyValue(const Descriptor* value_descriptor,
1320 std::string* serialized_value) {
1321 DynamicMessageFactory factory;
1322 const Message* value_prototype = factory.GetPrototype(value_descriptor);
1323 if (value_prototype == nullptr) {
1324 return false;
1325 }
1326 std::unique_ptr<Message> value(value_prototype->New());
1327 std::string sub_delimiter;
1328 DO(ConsumeMessageDelimiter(&sub_delimiter));
1329 DO(ConsumeMessage(value.get(), sub_delimiter));
1330
1331 if (allow_partial_) {
1332 value->AppendPartialToString(serialized_value);
1333 } else {
1334 if (!value->IsInitialized()) {
1335 ReportError(absl::StrCat(
1336 "Value of type \"", value_descriptor->full_name(),
1337 "\" stored in google.protobuf.Any has missing required fields"));
1338 return false;
1339 }
1340 value->AppendToString(serialized_value);
1341 }
1342 return true;
1343 }
1344
1345 // Consumes a token and confirms that it matches that specified in the
1346 // value parameter. Returns false if the token found does not match that
1347 // which was specified.
Consume(const std::string & value)1348 bool Consume(const std::string& value) {
1349 const std::string& current_value = tokenizer_.current().text;
1350
1351 if (current_value != value) {
1352 ReportError(absl::StrCat("Expected \"", value, "\", found \"",
1353 current_value, "\"."));
1354 return false;
1355 }
1356
1357 tokenizer_.Next();
1358
1359 return true;
1360 }
1361
1362 // Similar to `Consume`, but the following token may be tokenized as
1363 // TYPE_WHITESPACE.
ConsumeBeforeWhitespace(const std::string & value)1364 bool ConsumeBeforeWhitespace(const std::string& value) {
1365 // Report whitespace after this token, but only once.
1366 tokenizer_.set_report_whitespace(true);
1367 bool result = Consume(value);
1368 tokenizer_.set_report_whitespace(false);
1369 return result;
1370 }
1371
1372 // Attempts to consume the supplied value. Returns false if the token found
1373 // does not match the value specified.
TryConsume(const std::string & value)1374 bool TryConsume(const std::string& value) {
1375 if (tokenizer_.current().text == value) {
1376 tokenizer_.Next();
1377 return true;
1378 } else {
1379 return false;
1380 }
1381 }
1382
1383 // Similar to `TryConsume`, but the following token may be tokenized as
1384 // TYPE_WHITESPACE.
TryConsumeBeforeWhitespace(const std::string & value)1385 bool TryConsumeBeforeWhitespace(const std::string& value) {
1386 // Report whitespace after this token, but only once.
1387 tokenizer_.set_report_whitespace(true);
1388 bool result = TryConsume(value);
1389 tokenizer_.set_report_whitespace(false);
1390 return result;
1391 }
1392
TryConsumeWhitespace()1393 bool TryConsumeWhitespace() {
1394 had_silent_marker_ = false;
1395 if (LookingAtType(io::Tokenizer::TYPE_WHITESPACE)) {
1396 if (tokenizer_.current().text ==
1397 absl::StrCat(" ", internal::kDebugStringSilentMarkerForDetection)) {
1398 had_silent_marker_ = true;
1399 }
1400 tokenizer_.Next();
1401 return true;
1402 }
1403 return false;
1404 }
1405
1406 // An internal instance of the Tokenizer's error collector, used to
1407 // collect any base-level parse errors and feed them to the ParserImpl.
1408 class ParserErrorCollector : public io::ErrorCollector {
1409 public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)1410 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser)
1411 : parser_(parser) {}
1412
1413 ParserErrorCollector(const ParserErrorCollector&) = delete;
1414 ParserErrorCollector& operator=(const ParserErrorCollector&) = delete;
~ParserErrorCollector()1415 ~ParserErrorCollector() override {}
1416
RecordError(int line,int column,absl::string_view message)1417 void RecordError(int line, int column, absl::string_view message) override {
1418 parser_->ReportError(line, column, message);
1419 }
1420
RecordWarning(int line,int column,absl::string_view message)1421 void RecordWarning(int line, int column,
1422 absl::string_view message) override {
1423 parser_->ReportWarning(line, column, message);
1424 }
1425
1426 private:
1427 TextFormat::Parser::ParserImpl* parser_;
1428 };
1429
1430 io::ErrorCollector* error_collector_;
1431 const TextFormat::Finder* finder_;
1432 ParseInfoTree* parse_info_tree_;
1433 ParserErrorCollector tokenizer_error_collector_;
1434 io::Tokenizer tokenizer_;
1435 const Descriptor* root_message_type_;
1436 SingularOverwritePolicy singular_overwrite_policy_;
1437 const bool allow_case_insensitive_field_;
1438 const bool allow_unknown_field_;
1439 const bool allow_unknown_extension_;
1440 const bool allow_unknown_enum_;
1441 const bool allow_field_number_;
1442 const bool allow_partial_;
1443 const int initial_recursion_limit_;
1444 int recursion_limit_;
1445 bool had_silent_marker_;
1446 bool had_errors_;
1447 UnsetFieldsMetadata* no_op_fields_{};
1448
1449 };
1450
1451 // ===========================================================================
1452 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1453 // from the Printer found in //third_party/protobuf/io/printer.h
1454 class TextFormat::Printer::TextGenerator
1455 : public TextFormat::BaseTextGenerator {
1456 public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)1457 explicit TextGenerator(io::ZeroCopyOutputStream* output,
1458 int initial_indent_level)
1459 : output_(output),
1460 buffer_(nullptr),
1461 buffer_size_(0),
1462 at_start_of_line_(true),
1463 failed_(false),
1464 insert_silent_marker_(false),
1465 indent_level_(initial_indent_level),
1466 initial_indent_level_(initial_indent_level) {}
1467
TextGenerator(io::ZeroCopyOutputStream * output,bool insert_silent_marker,int initial_indent_level)1468 explicit TextGenerator(io::ZeroCopyOutputStream* output,
1469 bool insert_silent_marker, int initial_indent_level)
1470 : output_(output),
1471 buffer_(nullptr),
1472 buffer_size_(0),
1473 at_start_of_line_(true),
1474 failed_(false),
1475 insert_silent_marker_(insert_silent_marker),
1476 indent_level_(initial_indent_level),
1477 initial_indent_level_(initial_indent_level) {}
1478
1479 TextGenerator(const TextGenerator&) = delete;
1480 TextGenerator& operator=(const TextGenerator&) = delete;
~TextGenerator()1481 ~TextGenerator() override {
1482 // Only BackUp() if we're sure we've successfully called Next() at least
1483 // once.
1484 if (!failed_) {
1485 output_->BackUp(buffer_size_);
1486 }
1487 }
1488
1489 // Indent text by two spaces. After calling Indent(), two spaces will be
1490 // inserted at the beginning of each line of text. Indent() may be called
1491 // multiple times to produce deeper indents.
Indent()1492 void Indent() override { ++indent_level_; }
1493
1494 // Reduces the current indent level by two spaces, or crashes if the indent
1495 // level is zero.
Outdent()1496 void Outdent() override {
1497 if (indent_level_ == 0 || indent_level_ < initial_indent_level_) {
1498 ABSL_DLOG(FATAL) << " Outdent() without matching Indent().";
1499 return;
1500 }
1501
1502 --indent_level_;
1503 }
1504
GetCurrentIndentationSize() const1505 size_t GetCurrentIndentationSize() const override {
1506 return 2 * indent_level_;
1507 }
1508
1509 // Print text to the output stream.
Print(const char * text,size_t size)1510 void Print(const char* text, size_t size) override {
1511 if (indent_level_ > 0) {
1512 size_t pos = 0; // The number of bytes we've written so far.
1513 for (size_t i = 0; i < size; i++) {
1514 if (text[i] == '\n') {
1515 // Saw newline. If there is more text, we may need to insert an
1516 // indent here. So, write what we have so far, including the '\n'.
1517 Write(text + pos, i - pos + 1);
1518 pos = i + 1;
1519
1520 // Setting this true will cause the next Write() to insert an indent
1521 // first.
1522 at_start_of_line_ = true;
1523 }
1524 }
1525 // Write the rest.
1526 Write(text + pos, size - pos);
1527 } else {
1528 Write(text, size);
1529 if (size > 0 && text[size - 1] == '\n') {
1530 at_start_of_line_ = true;
1531 }
1532 }
1533 }
1534
1535 // True if any write to the underlying stream failed. (We don't just
1536 // crash in this case because this is an I/O failure, not a programming
1537 // error.)
failed() const1538 bool failed() const { return failed_; }
1539
PrintMaybeWithMarker(MarkerToken,absl::string_view text)1540 void PrintMaybeWithMarker(MarkerToken, absl::string_view text) override {
1541 Print(text.data(), text.size());
1542 if (ConsumeInsertSilentMarker()) {
1543 PrintLiteral(internal::kDebugStringSilentMarker);
1544 }
1545 }
1546
PrintMaybeWithMarker(MarkerToken,absl::string_view text_head,absl::string_view text_tail)1547 void PrintMaybeWithMarker(MarkerToken, absl::string_view text_head,
1548 absl::string_view text_tail) override {
1549 Print(text_head.data(), text_head.size());
1550 if (ConsumeInsertSilentMarker()) {
1551 PrintLiteral(internal::kDebugStringSilentMarker);
1552 }
1553 Print(text_tail.data(), text_tail.size());
1554 }
1555
1556 private:
Write(const char * data,size_t size)1557 void Write(const char* data, size_t size) {
1558 if (failed_) return;
1559 if (size == 0) return;
1560
1561 if (at_start_of_line_) {
1562 // Insert an indent.
1563 at_start_of_line_ = false;
1564 WriteIndent();
1565 if (failed_) return;
1566 }
1567
1568 while (static_cast<int64_t>(size) > buffer_size_) {
1569 // Data exceeds space in the buffer. Copy what we can and request a
1570 // new buffer.
1571 if (buffer_size_ > 0) {
1572 memcpy(buffer_, data, buffer_size_);
1573 data += buffer_size_;
1574 size -= buffer_size_;
1575 }
1576 void* void_buffer = nullptr;
1577 failed_ = !output_->Next(&void_buffer, &buffer_size_);
1578 if (failed_) return;
1579 buffer_ = reinterpret_cast<char*>(void_buffer);
1580 }
1581
1582 // Buffer is big enough to receive the data; copy it.
1583 memcpy(buffer_, data, size);
1584 buffer_ += size;
1585 buffer_size_ -= size;
1586 }
1587
WriteIndent()1588 void WriteIndent() {
1589 if (indent_level_ == 0) {
1590 return;
1591 }
1592 ABSL_DCHECK(!failed_);
1593 int size = GetCurrentIndentationSize();
1594
1595 while (size > buffer_size_) {
1596 // Data exceeds space in the buffer. Write what we can and request a new
1597 // buffer.
1598 if (buffer_size_ > 0) {
1599 memset(buffer_, ' ', buffer_size_);
1600 }
1601 size -= buffer_size_;
1602 void* void_buffer;
1603 failed_ = !output_->Next(&void_buffer, &buffer_size_);
1604 if (failed_) return;
1605 buffer_ = reinterpret_cast<char*>(void_buffer);
1606 }
1607
1608 // Buffer is big enough to receive the data; copy it.
1609 memset(buffer_, ' ', size);
1610 buffer_ += size;
1611 buffer_size_ -= size;
1612 }
1613
1614 // Return the current value of insert_silent_marker_. If it is true, set it
1615 // to false as we assume that a silent marker is inserted after a call to this
1616 // function.
ConsumeInsertSilentMarker()1617 bool ConsumeInsertSilentMarker() {
1618 if (insert_silent_marker_) {
1619 insert_silent_marker_ = false;
1620 return true;
1621 }
1622 return false;
1623 }
1624
1625 io::ZeroCopyOutputStream* const output_;
1626 char* buffer_;
1627 int buffer_size_;
1628 bool at_start_of_line_;
1629 bool failed_;
1630 // This flag is false when inserting silent marker is disabled or a silent
1631 // marker has been inserted.
1632 bool insert_silent_marker_;
1633
1634 int indent_level_;
1635 int initial_indent_level_;
1636 };
1637
1638 // ===========================================================================
1639 // An internal field value printer that may insert a silent marker in
1640 // DebugStrings.
1641 class TextFormat::Printer::DebugStringFieldValuePrinter
1642 : public TextFormat::FastFieldValuePrinter {
1643 public:
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1644 void PrintMessageStart(const Message& /*message*/, int /*field_index*/,
1645 int /*field_count*/, bool single_line_mode,
1646 BaseTextGenerator* generator) const override {
1647 if (single_line_mode) {
1648 generator->PrintMaybeWithMarker(MarkerToken(), " ", "{ ");
1649 } else {
1650 generator->PrintMaybeWithMarker(MarkerToken(), " ", "{\n");
1651 }
1652 }
1653 };
1654
1655 namespace {
1656
1657 // Returns true if `ch` needs to be escaped in TextFormat, independent of any
1658 // UTF-8 validity issues.
DefinitelyNeedsEscape(unsigned char ch)1659 bool DefinitelyNeedsEscape(unsigned char ch) {
1660 if (ch >= 0x80) {
1661 return false; // High byte; no escapes necessary if UTF-8 is valid.
1662 }
1663
1664 if (!absl::ascii_isprint(ch)) {
1665 return true; // Unprintable characters need escape.
1666 }
1667
1668 switch (ch) {
1669 case '\"':
1670 case '\'':
1671 case '\\':
1672 // These characters need escapes despite being printable.
1673 return true;
1674 }
1675
1676 return false;
1677 }
1678
1679 // Returns true if this is a high byte that requires UTF-8 validation. If the
1680 // UTF-8 validation fails, we must escape the byte.
NeedsUtf8Validation(unsigned char ch)1681 bool NeedsUtf8Validation(unsigned char ch) { return ch > 127; }
1682
1683 // Returns the number of bytes in the prefix of `val` that do not need escaping.
1684 // This is like utf8_range::SpanStructurallyValid(), except that it also
1685 // terminates at any ASCII char that needs to be escaped in TextFormat (any char
1686 // that has `DefinitelyNeedsEscape(ch) == true`).
1687 //
1688 // If we could get a variant of utf8_range::SpanStructurallyValid() that could
1689 // terminate on any of these chars, that might be more efficient, but it would
1690 // be much more complicated to modify that heavily SIMD code.
SkipPassthroughBytes(absl::string_view val)1691 size_t SkipPassthroughBytes(absl::string_view val) {
1692 for (size_t i = 0; i < val.size(); i++) {
1693 unsigned char uc = val[i];
1694 if (DefinitelyNeedsEscape(uc)) return i;
1695 if (NeedsUtf8Validation(uc)) {
1696 // Find the end of this region of consecutive high bytes, so that we only
1697 // give high bytes to the UTF-8 checker. This avoids needing to perform
1698 // a second scan of the ASCII characters looking for characters that
1699 // need escaping.
1700 //
1701 // We assume that high bytes are less frequent than plain, printable ASCII
1702 // bytes, so we accept the double-scan of high bytes.
1703 size_t end = i + 1;
1704 for (; end < val.size(); end++) {
1705 if (!NeedsUtf8Validation(val[end])) break;
1706 }
1707 size_t n = end - i;
1708 size_t ok = utf8_range::SpanStructurallyValid(val.substr(i, n));
1709 if (ok != n) return i + ok;
1710 i += ok - 1;
1711 }
1712 }
1713 return val.size();
1714 }
1715
1716 } // namespace
1717
HardenedPrintString(absl::string_view src,TextFormat::BaseTextGenerator * generator)1718 void TextFormat::Printer::HardenedPrintString(
1719 absl::string_view src, TextFormat::BaseTextGenerator* generator) {
1720 // Print as UTF-8, while guarding against any invalid UTF-8 in the string
1721 // field.
1722 //
1723 // If in the future we have a guaranteed invariant that invalid UTF-8 will
1724 // never be present, we could avoid the UTF-8 check here.
1725
1726 generator->PrintLiteral("\"");
1727 while (!src.empty()) {
1728 size_t n = SkipPassthroughBytes(src);
1729 if (n != 0) {
1730 generator->PrintString(src.substr(0, n));
1731 src.remove_prefix(n);
1732 if (src.empty()) break;
1733 }
1734
1735 // If repeated calls to CEscape() and PrintString() are expensive, we could
1736 // consider batching them, at the cost of some complexity.
1737 generator->PrintString(absl::CEscape(src.substr(0, 1)));
1738 src.remove_prefix(1);
1739 }
1740 generator->PrintLiteral("\"");
1741 }
1742
1743 // ===========================================================================
1744 // An internal field value printer that escape UTF8 strings.
1745 class TextFormat::Printer::FastFieldValuePrinterUtf8Escaping
1746 : public TextFormat::Printer::DebugStringFieldValuePrinter {
1747 public:
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const1748 void PrintString(const std::string& val,
1749 TextFormat::BaseTextGenerator* generator) const override {
1750 TextFormat::Printer::HardenedPrintString(val, generator);
1751 }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const1752 void PrintBytes(const std::string& val,
1753 TextFormat::BaseTextGenerator* generator) const override {
1754 return FastFieldValuePrinter::PrintString(val, generator);
1755 }
1756 };
1757
1758 // ===========================================================================
1759 // Implementation of the default Finder for extensions.
~Finder()1760 TextFormat::Finder::~Finder() {}
1761
FindExtension(Message * message,const std::string & name) const1762 const FieldDescriptor* TextFormat::Finder::FindExtension(
1763 Message* message, const std::string& name) const {
1764 return DefaultFinderFindExtension(message, name);
1765 }
1766
FindExtensionByNumber(const Descriptor * descriptor,int number) const1767 const FieldDescriptor* TextFormat::Finder::FindExtensionByNumber(
1768 const Descriptor* descriptor, int number) const {
1769 return DefaultFinderFindExtensionByNumber(descriptor, number);
1770 }
1771
FindAnyType(const Message & message,const std::string & prefix,const std::string & name) const1772 const Descriptor* TextFormat::Finder::FindAnyType(
1773 const Message& message, const std::string& prefix,
1774 const std::string& name) const {
1775 return DefaultFinderFindAnyType(message, prefix, name);
1776 }
1777
FindExtensionFactory(const FieldDescriptor *) const1778 MessageFactory* TextFormat::Finder::FindExtensionFactory(
1779 const FieldDescriptor* /*field*/) const {
1780 return nullptr;
1781 }
1782
1783 // ===========================================================================
1784
Parser()1785 TextFormat::Parser::Parser()
1786 : error_collector_(nullptr),
1787 finder_(nullptr),
1788 parse_info_tree_(nullptr),
1789 allow_partial_(false),
1790 allow_case_insensitive_field_(false),
1791 allow_unknown_field_(false),
1792 allow_unknown_extension_(false),
1793 allow_unknown_enum_(false),
1794 allow_field_number_(false),
1795 allow_relaxed_whitespace_(false),
1796 allow_singular_overwrites_(false),
1797 recursion_limit_(std::numeric_limits<int>::max()) {}
1798
~Parser()1799 TextFormat::Parser::~Parser() {}
1800
1801 namespace {
1802
1803 template <typename T>
CheckParseInputSize(T & input,io::ErrorCollector * error_collector)1804 bool CheckParseInputSize(T& input, io::ErrorCollector* error_collector) {
1805 if (input.size() > INT_MAX) {
1806 error_collector->RecordError(
1807 -1, 0,
1808 absl::StrCat(
1809 "Input size too large: ", static_cast<int64_t>(input.size()),
1810 " bytes", " > ", INT_MAX, " bytes."));
1811 return false;
1812 }
1813 return true;
1814 }
1815
1816 } // namespace
1817
Parse(io::ZeroCopyInputStream * input,Message * output)1818 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1819 Message* output) {
1820 output->Clear();
1821
1822 ParserImpl::SingularOverwritePolicy overwrites_policy =
1823 allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1824 : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1825
1826 ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1827 parse_info_tree_, overwrites_policy,
1828 allow_case_insensitive_field_, allow_unknown_field_,
1829 allow_unknown_extension_, allow_unknown_enum_,
1830 allow_field_number_, allow_relaxed_whitespace_,
1831 allow_partial_, recursion_limit_, no_op_fields_);
1832 return MergeUsingImpl(input, output, &parser);
1833 }
1834
ParseFromString(absl::string_view input,Message * output)1835 bool TextFormat::Parser::ParseFromString(absl::string_view input,
1836 Message* output) {
1837 DO(CheckParseInputSize(input, error_collector_));
1838 io::ArrayInputStream input_stream(input.data(), input.size());
1839 return Parse(&input_stream, output);
1840 }
1841
ParseFromCord(const absl::Cord & input,Message * output)1842 bool TextFormat::Parser::ParseFromCord(const absl::Cord& input,
1843 Message* output) {
1844 DO(CheckParseInputSize(input, error_collector_));
1845 io::CordInputStream input_stream(&input);
1846 return Parse(&input_stream, output);
1847 }
1848
Merge(io::ZeroCopyInputStream * input,Message * output)1849 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1850 Message* output) {
1851 ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1852 parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1853 allow_case_insensitive_field_, allow_unknown_field_,
1854 allow_unknown_extension_, allow_unknown_enum_,
1855 allow_field_number_, allow_relaxed_whitespace_,
1856 allow_partial_, recursion_limit_, no_op_fields_);
1857 return MergeUsingImpl(input, output, &parser);
1858 }
1859
MergeFromString(absl::string_view input,Message * output)1860 bool TextFormat::Parser::MergeFromString(absl::string_view input,
1861 Message* output) {
1862 DO(CheckParseInputSize(input, error_collector_));
1863 io::ArrayInputStream input_stream(input.data(), input.size());
1864 return Merge(&input_stream, output);
1865 }
1866
MergeUsingImpl(io::ZeroCopyInputStream *,Message * output,ParserImpl * parser_impl)1867 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1868 Message* output,
1869 ParserImpl* parser_impl) {
1870 if (!parser_impl->Parse(output)) return false;
1871 if (!allow_partial_ && !output->IsInitialized()) {
1872 std::vector<std::string> missing_fields;
1873 output->FindInitializationErrors(&missing_fields);
1874 parser_impl->ReportError(-1, 0,
1875 absl::StrCat("Message missing required fields: ",
1876 absl::StrJoin(missing_fields, ", ")));
1877 return false;
1878 }
1879 return true;
1880 }
1881
ParseFieldValueFromString(absl::string_view input,const FieldDescriptor * field,Message * output)1882 bool TextFormat::Parser::ParseFieldValueFromString(absl::string_view input,
1883 const FieldDescriptor* field,
1884 Message* output) {
1885 io::ArrayInputStream input_stream(input.data(), input.size());
1886 ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
1887 finder_, parse_info_tree_,
1888 ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1889 allow_case_insensitive_field_, allow_unknown_field_,
1890 allow_unknown_extension_, allow_unknown_enum_,
1891 allow_field_number_, allow_relaxed_whitespace_,
1892 allow_partial_, recursion_limit_, no_op_fields_);
1893 return parser.ParseField(field, output);
1894 }
1895
Parse(io::ZeroCopyInputStream * input,Message * output)1896 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1897 Message* output) {
1898 return Parser().Parse(input, output);
1899 }
1900
Merge(io::ZeroCopyInputStream * input,Message * output)1901 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1902 Message* output) {
1903 return Parser().Merge(input, output);
1904 }
1905
ParseFromString(absl::string_view input,Message * output)1906 /* static */ bool TextFormat::ParseFromString(absl::string_view input,
1907 Message* output) {
1908 return Parser().ParseFromString(input, output);
1909 }
1910
ParseFromCord(const absl::Cord & input,Message * output)1911 /* static */ bool TextFormat::ParseFromCord(const absl::Cord& input,
1912 Message* output) {
1913 return Parser().ParseFromCord(input, output);
1914 }
1915
MergeFromString(absl::string_view input,Message * output)1916 /* static */ bool TextFormat::MergeFromString(absl::string_view input,
1917 Message* output) {
1918 return Parser().MergeFromString(input, output);
1919 }
1920
1921 #undef DO
1922
1923 // ===========================================================================
1924
~BaseTextGenerator()1925 TextFormat::BaseTextGenerator::~BaseTextGenerator() {}
1926
1927 namespace {
1928
1929 // A BaseTextGenerator that writes to a string.
1930 class StringBaseTextGenerator : public TextFormat::BaseTextGenerator {
1931 public:
Print(const char * text,size_t size)1932 void Print(const char* text, size_t size) override {
1933 output_.append(text, size);
1934 }
1935
Consume()1936 std::string Consume() && { return std::move(output_); }
1937
1938 private:
1939 std::string output_;
1940 };
1941
1942 } // namespace
1943
1944 // The default implementation for FieldValuePrinter. We just delegate the
1945 // implementation to the default FastFieldValuePrinter to avoid duplicating the
1946 // logic.
FieldValuePrinter()1947 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
~FieldValuePrinter()1948 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
1949
1950 #define FORWARD_IMPL(fn, ...) \
1951 StringBaseTextGenerator generator; \
1952 delegate_.fn(__VA_ARGS__, &generator); \
1953 return std::move(generator).Consume()
1954
PrintBool(bool val) const1955 std::string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1956 FORWARD_IMPL(PrintBool, val);
1957 }
PrintInt32(int32_t val) const1958 std::string TextFormat::FieldValuePrinter::PrintInt32(int32_t val) const {
1959 FORWARD_IMPL(PrintInt32, val);
1960 }
PrintUInt32(uint32_t val) const1961 std::string TextFormat::FieldValuePrinter::PrintUInt32(uint32_t val) const {
1962 FORWARD_IMPL(PrintUInt32, val);
1963 }
PrintInt64(int64_t val) const1964 std::string TextFormat::FieldValuePrinter::PrintInt64(int64_t val) const {
1965 FORWARD_IMPL(PrintInt64, val);
1966 }
PrintUInt64(uint64_t val) const1967 std::string TextFormat::FieldValuePrinter::PrintUInt64(uint64_t val) const {
1968 FORWARD_IMPL(PrintUInt64, val);
1969 }
PrintFloat(float val) const1970 std::string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1971 FORWARD_IMPL(PrintFloat, val);
1972 }
PrintDouble(double val) const1973 std::string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1974 FORWARD_IMPL(PrintDouble, val);
1975 }
PrintString(const std::string & val) const1976 std::string TextFormat::FieldValuePrinter::PrintString(
1977 const std::string& val) const {
1978 FORWARD_IMPL(PrintString, val);
1979 }
PrintBytes(const std::string & val) const1980 std::string TextFormat::FieldValuePrinter::PrintBytes(
1981 const std::string& val) const {
1982 return PrintString(val);
1983 }
PrintEnum(int32_t val,const std::string & name) const1984 std::string TextFormat::FieldValuePrinter::PrintEnum(
1985 int32_t val, const std::string& name) const {
1986 FORWARD_IMPL(PrintEnum, val, name);
1987 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field) const1988 std::string TextFormat::FieldValuePrinter::PrintFieldName(
1989 const Message& message, const Reflection* reflection,
1990 const FieldDescriptor* field) const {
1991 FORWARD_IMPL(PrintFieldName, message, reflection, field);
1992 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode) const1993 std::string TextFormat::FieldValuePrinter::PrintMessageStart(
1994 const Message& message, int field_index, int field_count,
1995 bool single_line_mode) const {
1996 FORWARD_IMPL(PrintMessageStart, message, field_index, field_count,
1997 single_line_mode);
1998 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode) const1999 std::string TextFormat::FieldValuePrinter::PrintMessageEnd(
2000 const Message& message, int field_index, int field_count,
2001 bool single_line_mode) const {
2002 FORWARD_IMPL(PrintMessageEnd, message, field_index, field_count,
2003 single_line_mode);
2004 }
2005 #undef FORWARD_IMPL
2006
FastFieldValuePrinter()2007 TextFormat::FastFieldValuePrinter::FastFieldValuePrinter() {}
~FastFieldValuePrinter()2008 TextFormat::FastFieldValuePrinter::~FastFieldValuePrinter() {}
PrintBool(bool val,BaseTextGenerator * generator) const2009 void TextFormat::FastFieldValuePrinter::PrintBool(
2010 bool val, BaseTextGenerator* generator) const {
2011 if (val) {
2012 generator->PrintLiteral("true");
2013 } else {
2014 generator->PrintLiteral("false");
2015 }
2016 }
PrintInt32(int32_t val,BaseTextGenerator * generator) const2017 void TextFormat::FastFieldValuePrinter::PrintInt32(
2018 int32_t val, BaseTextGenerator* generator) const {
2019 generator->PrintString(absl::StrCat(val));
2020 }
PrintUInt32(uint32_t val,BaseTextGenerator * generator) const2021 void TextFormat::FastFieldValuePrinter::PrintUInt32(
2022 uint32_t val, BaseTextGenerator* generator) const {
2023 generator->PrintString(absl::StrCat(val));
2024 }
PrintInt64(int64_t val,BaseTextGenerator * generator) const2025 void TextFormat::FastFieldValuePrinter::PrintInt64(
2026 int64_t val, BaseTextGenerator* generator) const {
2027 generator->PrintString(absl::StrCat(val));
2028 }
PrintUInt64(uint64_t val,BaseTextGenerator * generator) const2029 void TextFormat::FastFieldValuePrinter::PrintUInt64(
2030 uint64_t val, BaseTextGenerator* generator) const {
2031 generator->PrintString(absl::StrCat(val));
2032 }
PrintFloat(float val,BaseTextGenerator * generator) const2033 void TextFormat::FastFieldValuePrinter::PrintFloat(
2034 float val, BaseTextGenerator* generator) const {
2035 generator->PrintString(!std::isnan(val) ? io::SimpleFtoa(val) : "nan");
2036 }
PrintDouble(double val,BaseTextGenerator * generator) const2037 void TextFormat::FastFieldValuePrinter::PrintDouble(
2038 double val, BaseTextGenerator* generator) const {
2039 generator->PrintString(!std::isnan(val) ? io::SimpleDtoa(val) : "nan");
2040 }
PrintEnum(int32_t,const std::string & name,BaseTextGenerator * generator) const2041 void TextFormat::FastFieldValuePrinter::PrintEnum(
2042 int32_t /*val*/, const std::string& name,
2043 BaseTextGenerator* generator) const {
2044 generator->PrintString(name);
2045 }
2046
PrintString(const std::string & val,BaseTextGenerator * generator) const2047 void TextFormat::FastFieldValuePrinter::PrintString(
2048 const std::string& val, BaseTextGenerator* generator) const {
2049 generator->PrintLiteral("\"");
2050 if (!val.empty()) {
2051 generator->PrintString(absl::CEscape(val));
2052 }
2053 generator->PrintLiteral("\"");
2054 }
PrintBytes(const std::string & val,BaseTextGenerator * generator) const2055 void TextFormat::FastFieldValuePrinter::PrintBytes(
2056 const std::string& val, BaseTextGenerator* generator) const {
2057 PrintString(val, generator);
2058 }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2059 void TextFormat::FastFieldValuePrinter::PrintFieldName(
2060 const Message& message, int /*field_index*/, int /*field_count*/,
2061 const Reflection* reflection, const FieldDescriptor* field,
2062 BaseTextGenerator* generator) const {
2063 PrintFieldName(message, reflection, field, generator);
2064 }
PrintFieldName(const Message &,const Reflection *,const FieldDescriptor * field,BaseTextGenerator * generator) const2065 void TextFormat::FastFieldValuePrinter::PrintFieldName(
2066 const Message& /*message*/, const Reflection* /*reflection*/,
2067 const FieldDescriptor* field, BaseTextGenerator* generator) const {
2068 if (field->is_extension()) {
2069 generator->PrintLiteral("[");
2070 generator->PrintString(field->PrintableNameForExtension());
2071 generator->PrintLiteral("]");
2072 } else if (internal::cpp::IsGroupLike(*field)) {
2073 // Groups must be serialized with their original capitalization.
2074 generator->PrintString(field->message_type()->name());
2075 } else {
2076 generator->PrintString(field->name());
2077 }
2078 }
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const2079 void TextFormat::FastFieldValuePrinter::PrintMessageStart(
2080 const Message& /*message*/, int /*field_index*/, int /*field_count*/,
2081 bool single_line_mode, BaseTextGenerator* generator) const {
2082 if (single_line_mode) {
2083 generator->PrintLiteral(" { ");
2084 } else {
2085 generator->PrintLiteral(" {\n");
2086 }
2087 }
PrintMessageContent(const Message &,int,int,bool,BaseTextGenerator *) const2088 bool TextFormat::FastFieldValuePrinter::PrintMessageContent(
2089 const Message& /*message*/, int /*field_index*/, int /*field_count*/,
2090 bool /*single_line_mode*/, BaseTextGenerator* /*generator*/) const {
2091 return false; // Use the default printing function.
2092 }
PrintMessageEnd(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const2093 void TextFormat::FastFieldValuePrinter::PrintMessageEnd(
2094 const Message& /*message*/, int /*field_index*/, int /*field_count*/,
2095 bool single_line_mode, BaseTextGenerator* generator) const {
2096 if (single_line_mode) {
2097 generator->PrintLiteral("} ");
2098 } else {
2099 generator->PrintLiteral("}\n");
2100 }
2101 }
2102
2103 namespace {
2104
2105 // A legacy compatibility wrapper. Takes ownership of the delegate.
2106 class FieldValuePrinterWrapper : public TextFormat::FastFieldValuePrinter {
2107 public:
FieldValuePrinterWrapper(const TextFormat::FieldValuePrinter * delegate)2108 explicit FieldValuePrinterWrapper(
2109 const TextFormat::FieldValuePrinter* delegate)
2110 : delegate_(delegate) {}
2111
SetDelegate(const TextFormat::FieldValuePrinter * delegate)2112 void SetDelegate(const TextFormat::FieldValuePrinter* delegate) {
2113 delegate_.reset(delegate);
2114 }
2115
PrintBool(bool val,TextFormat::BaseTextGenerator * generator) const2116 void PrintBool(bool val,
2117 TextFormat::BaseTextGenerator* generator) const override {
2118 generator->PrintString(delegate_->PrintBool(val));
2119 }
PrintInt32(int32_t val,TextFormat::BaseTextGenerator * generator) const2120 void PrintInt32(int32_t val,
2121 TextFormat::BaseTextGenerator* generator) const override {
2122 generator->PrintString(delegate_->PrintInt32(val));
2123 }
PrintUInt32(uint32_t val,TextFormat::BaseTextGenerator * generator) const2124 void PrintUInt32(uint32_t val,
2125 TextFormat::BaseTextGenerator* generator) const override {
2126 generator->PrintString(delegate_->PrintUInt32(val));
2127 }
PrintInt64(int64_t val,TextFormat::BaseTextGenerator * generator) const2128 void PrintInt64(int64_t val,
2129 TextFormat::BaseTextGenerator* generator) const override {
2130 generator->PrintString(delegate_->PrintInt64(val));
2131 }
PrintUInt64(uint64_t val,TextFormat::BaseTextGenerator * generator) const2132 void PrintUInt64(uint64_t val,
2133 TextFormat::BaseTextGenerator* generator) const override {
2134 generator->PrintString(delegate_->PrintUInt64(val));
2135 }
PrintFloat(float val,TextFormat::BaseTextGenerator * generator) const2136 void PrintFloat(float val,
2137 TextFormat::BaseTextGenerator* generator) const override {
2138 generator->PrintString(delegate_->PrintFloat(val));
2139 }
PrintDouble(double val,TextFormat::BaseTextGenerator * generator) const2140 void PrintDouble(double val,
2141 TextFormat::BaseTextGenerator* generator) const override {
2142 generator->PrintString(delegate_->PrintDouble(val));
2143 }
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const2144 void PrintString(const std::string& val,
2145 TextFormat::BaseTextGenerator* generator) const override {
2146 generator->PrintString(delegate_->PrintString(val));
2147 }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const2148 void PrintBytes(const std::string& val,
2149 TextFormat::BaseTextGenerator* generator) const override {
2150 generator->PrintString(delegate_->PrintBytes(val));
2151 }
PrintEnum(int32_t val,const std::string & name,TextFormat::BaseTextGenerator * generator) const2152 void PrintEnum(int32_t val, const std::string& name,
2153 TextFormat::BaseTextGenerator* generator) const override {
2154 generator->PrintString(delegate_->PrintEnum(val, name));
2155 }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const2156 void PrintFieldName(const Message& message, int /*field_index*/,
2157 int /*field_count*/, const Reflection* reflection,
2158 const FieldDescriptor* field,
2159 TextFormat::BaseTextGenerator* generator) const override {
2160 generator->PrintString(
2161 delegate_->PrintFieldName(message, reflection, field));
2162 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const2163 void PrintFieldName(const Message& message, const Reflection* reflection,
2164 const FieldDescriptor* field,
2165 TextFormat::BaseTextGenerator* generator) const override {
2166 generator->PrintString(
2167 delegate_->PrintFieldName(message, reflection, field));
2168 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const2169 void PrintMessageStart(
2170 const Message& message, int field_index, int field_count,
2171 bool single_line_mode,
2172 TextFormat::BaseTextGenerator* generator) const override {
2173 generator->PrintString(delegate_->PrintMessageStart(
2174 message, field_index, field_count, single_line_mode));
2175 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const2176 void PrintMessageEnd(
2177 const Message& message, int field_index, int field_count,
2178 bool single_line_mode,
2179 TextFormat::BaseTextGenerator* generator) const override {
2180 generator->PrintString(delegate_->PrintMessageEnd(
2181 message, field_index, field_count, single_line_mode));
2182 }
2183
2184 private:
2185 std::unique_ptr<const TextFormat::FieldValuePrinter> delegate_;
2186 };
2187
2188 } // namespace
2189
Printer()2190 TextFormat::Printer::Printer()
2191 : initial_indent_level_(0),
2192 single_line_mode_(false),
2193 use_field_number_(false),
2194 use_short_repeated_primitives_(false),
2195 insert_silent_marker_(false),
2196 redact_debug_string_(false),
2197 randomize_debug_string_(false),
2198 report_sensitive_fields_(internal::FieldReporterLevel::kNoReport),
2199 hide_unknown_fields_(false),
2200 print_message_fields_in_index_order_(false),
2201 expand_any_(false),
2202 truncate_string_field_longer_than_(0LL),
2203 finder_(nullptr) {
2204 SetUseUtf8StringEscaping(false);
2205 }
2206
SetUseUtf8StringEscaping(bool as_utf8)2207 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
2208 SetDefaultFieldValuePrinter(as_utf8 ? new FastFieldValuePrinterUtf8Escaping()
2209 : new DebugStringFieldValuePrinter());
2210 }
2211
SetDefaultFieldValuePrinter(const FieldValuePrinter * printer)2212 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2213 const FieldValuePrinter* printer) {
2214 default_field_value_printer_.reset(new FieldValuePrinterWrapper(printer));
2215 }
2216
SetDefaultFieldValuePrinter(const FastFieldValuePrinter * printer)2217 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2218 const FastFieldValuePrinter* printer) {
2219 default_field_value_printer_.reset(printer);
2220 }
2221
RegisterFieldValuePrinter(const FieldDescriptor * field,const FieldValuePrinter * printer)2222 bool TextFormat::Printer::RegisterFieldValuePrinter(
2223 const FieldDescriptor* field, const FieldValuePrinter* printer) {
2224 if (field == nullptr || printer == nullptr) {
2225 return false;
2226 }
2227 std::unique_ptr<FieldValuePrinterWrapper> wrapper(
2228 new FieldValuePrinterWrapper(nullptr));
2229 auto pair = custom_printers_.emplace(field, nullptr);
2230 if (pair.second) {
2231 wrapper->SetDelegate(printer);
2232 pair.first->second = std::move(wrapper);
2233 return true;
2234 } else {
2235 return false;
2236 }
2237 }
2238
RegisterFieldValuePrinter(const FieldDescriptor * field,const FastFieldValuePrinter * printer)2239 bool TextFormat::Printer::RegisterFieldValuePrinter(
2240 const FieldDescriptor* field, const FastFieldValuePrinter* printer) {
2241 if (field == nullptr || printer == nullptr) {
2242 return false;
2243 }
2244 auto pair = custom_printers_.emplace(field, nullptr);
2245 if (pair.second) {
2246 pair.first->second.reset(printer);
2247 return true;
2248 } else {
2249 return false;
2250 }
2251 }
2252
RegisterMessagePrinter(const Descriptor * descriptor,const MessagePrinter * printer)2253 bool TextFormat::Printer::RegisterMessagePrinter(
2254 const Descriptor* descriptor, const MessagePrinter* printer) {
2255 if (descriptor == nullptr || printer == nullptr) {
2256 return false;
2257 }
2258 auto pair = custom_message_printers_.emplace(descriptor, nullptr);
2259 if (pair.second) {
2260 pair.first->second.reset(printer);
2261 return true;
2262 } else {
2263 return false;
2264 }
2265 }
2266
PrintToString(const Message & message,std::string * output) const2267 bool TextFormat::Printer::PrintToString(const Message& message,
2268 std::string* output) const {
2269 ABSL_DCHECK(output) << "output specified is nullptr";
2270
2271 output->clear();
2272 io::StringOutputStream output_stream(output);
2273
2274 return Print(message, &output_stream,
2275 internal::FieldReporterLevel::kMemberPrintToString);
2276 }
2277
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output) const2278 bool TextFormat::Printer::PrintUnknownFieldsToString(
2279 const UnknownFieldSet& unknown_fields, std::string* output) const {
2280 ABSL_DCHECK(output) << "output specified is nullptr";
2281
2282 output->clear();
2283 io::StringOutputStream output_stream(output);
2284 return PrintUnknownFields(unknown_fields, &output_stream);
2285 }
2286
Print(const Message & message,io::ZeroCopyOutputStream * output) const2287 bool TextFormat::Printer::Print(const Message& message,
2288 io::ZeroCopyOutputStream* output) const {
2289 return Print(message, output, internal::FieldReporterLevel::kPrintWithStream);
2290 }
2291
Print(const Message & message,io::ZeroCopyOutputStream * output,internal::FieldReporterLevel reporter) const2292 bool TextFormat::Printer::Print(const Message& message,
2293 io::ZeroCopyOutputStream* output,
2294 internal::FieldReporterLevel reporter) const {
2295 TextGenerator generator(output, insert_silent_marker_, initial_indent_level_);
2296
2297
2298 Print(message, &generator);
2299
2300 // Output false if the generator failed internally.
2301 return !generator.failed();
2302 }
2303
2304 // Maximum recursion depth for heuristically printing out length-prefixed
2305 // unknown fields as messages.
2306 static constexpr int kUnknownFieldRecursionLimit = 10;
2307
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output) const2308 bool TextFormat::Printer::PrintUnknownFields(
2309 const UnknownFieldSet& unknown_fields,
2310 io::ZeroCopyOutputStream* output) const {
2311 TextGenerator generator(output, initial_indent_level_);
2312
2313 PrintUnknownFields(unknown_fields, &generator, kUnknownFieldRecursionLimit);
2314
2315 // Output false if the generator failed internally.
2316 return !generator.failed();
2317 }
2318
2319 namespace {
2320 // Comparison functor for sorting FieldDescriptors by field index.
2321 // Normal fields have higher precedence than extensions.
2322 struct FieldIndexSorter {
operator ()google::protobuf::__anonbeb475080711::FieldIndexSorter2323 bool operator()(const FieldDescriptor* left,
2324 const FieldDescriptor* right) const {
2325 if (left->is_extension() && right->is_extension()) {
2326 return left->number() < right->number();
2327 } else if (left->is_extension()) {
2328 return false;
2329 } else if (right->is_extension()) {
2330 return true;
2331 } else {
2332 return left->index() < right->index();
2333 }
2334 }
2335 };
2336
2337 } // namespace
2338
PrintAny(const Message & message,BaseTextGenerator * generator) const2339 bool TextFormat::Printer::PrintAny(const Message& message,
2340 BaseTextGenerator* generator) const {
2341 const FieldDescriptor* type_url_field;
2342 const FieldDescriptor* value_field;
2343 if (!internal::GetAnyFieldDescriptors(message, &type_url_field,
2344 &value_field)) {
2345 return false;
2346 }
2347
2348 const Reflection* reflection = message.GetReflection();
2349
2350 // Extract the full type name from the type_url field.
2351 const std::string& type_url = reflection->GetString(message, type_url_field);
2352 std::string url_prefix;
2353 std::string full_type_name;
2354
2355 if (!internal::ParseAnyTypeUrl(type_url, &url_prefix, &full_type_name)) {
2356 return false;
2357 }
2358
2359 // Print the "value" in text.
2360 const Descriptor* value_descriptor =
2361 finder_ ? finder_->FindAnyType(message, url_prefix, full_type_name)
2362 : DefaultFinderFindAnyType(message, url_prefix, full_type_name);
2363 if (value_descriptor == nullptr) {
2364 ABSL_LOG(WARNING) << "Can't print proto content: proto type " << type_url
2365 << " not found";
2366 return false;
2367 }
2368 DynamicMessageFactory factory;
2369 std::unique_ptr<Message> value_message(
2370 factory.GetPrototype(value_descriptor)->New());
2371 std::string serialized_value = reflection->GetString(message, value_field);
2372 if (!value_message->ParseFromString(serialized_value)) {
2373 ABSL_LOG(WARNING) << type_url << ": failed to parse contents";
2374 return false;
2375 }
2376 generator->PrintLiteral("[");
2377 generator->PrintString(type_url);
2378 generator->PrintLiteral("]");
2379 const FastFieldValuePrinter* printer = GetFieldPrinter(value_field);
2380 printer->PrintMessageStart(message, -1, 0, single_line_mode_, generator);
2381 generator->Indent();
2382 Print(*value_message, generator);
2383 generator->Outdent();
2384 printer->PrintMessageEnd(message, -1, 0, single_line_mode_, generator);
2385 return true;
2386 }
2387
Print(const Message & message,BaseTextGenerator * generator) const2388 void TextFormat::Printer::Print(const Message& message,
2389 BaseTextGenerator* generator) const {
2390 const Reflection* reflection = message.GetReflection();
2391 if (!reflection) {
2392 // This message does not provide any way to describe its structure.
2393 // Parse it again in an UnknownFieldSet, and display this instead.
2394 UnknownFieldSet unknown_fields;
2395 {
2396 std::string serialized = message.SerializeAsString();
2397 io::ArrayInputStream input(serialized.data(), serialized.size());
2398 unknown_fields.ParseFromZeroCopyStream(&input);
2399 }
2400 PrintUnknownFields(unknown_fields, generator, kUnknownFieldRecursionLimit);
2401 return;
2402 }
2403 const Descriptor* descriptor = message.GetDescriptor();
2404 auto itr = custom_message_printers_.find(descriptor);
2405 if (itr != custom_message_printers_.end()) {
2406 itr->second->Print(message, single_line_mode_, generator);
2407 return;
2408 }
2409 PrintMessage(message, generator);
2410 }
2411
PrintMessage(const Message & message,BaseTextGenerator * generator) const2412 void TextFormat::Printer::PrintMessage(const Message& message,
2413 BaseTextGenerator* generator) const {
2414 if (generator == nullptr) {
2415 return;
2416 }
2417 const Descriptor* descriptor = message.GetDescriptor();
2418 if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ &&
2419 PrintAny(message, generator)) {
2420 return;
2421 }
2422 const Reflection* reflection = message.GetReflection();
2423 std::vector<const FieldDescriptor*> fields;
2424 if (descriptor->options().map_entry()) {
2425 fields.push_back(descriptor->field(0));
2426 fields.push_back(descriptor->field(1));
2427 } else {
2428 reflection->ListFields(message, &fields);
2429 }
2430
2431 if (print_message_fields_in_index_order_) {
2432 std::sort(fields.begin(), fields.end(), FieldIndexSorter());
2433 }
2434 for (const FieldDescriptor* field : fields) {
2435 PrintField(message, reflection, field, generator);
2436 }
2437 if (!hide_unknown_fields_) {
2438 PrintUnknownFields(reflection->GetUnknownFields(message), generator,
2439 kUnknownFieldRecursionLimit);
2440 }
2441 }
2442
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output) const2443 void TextFormat::Printer::PrintFieldValueToString(const Message& message,
2444 const FieldDescriptor* field,
2445 int index,
2446 std::string* output) const {
2447 ABSL_DCHECK(output) << "output specified is nullptr";
2448
2449 output->clear();
2450 io::StringOutputStream output_stream(output);
2451 TextGenerator generator(&output_stream, initial_indent_level_);
2452
2453 PrintFieldValue(message, message.GetReflection(), field, index, &generator);
2454 }
2455
2456 class MapEntryMessageComparator {
2457 public:
MapEntryMessageComparator(const Descriptor * descriptor)2458 explicit MapEntryMessageComparator(const Descriptor* descriptor)
2459 : field_(descriptor->field(0)) {}
2460
operator ()(const Message * a,const Message * b)2461 bool operator()(const Message* a, const Message* b) {
2462 const Reflection* reflection = a->GetReflection();
2463 switch (field_->cpp_type()) {
2464 case FieldDescriptor::CPPTYPE_BOOL: {
2465 bool first = reflection->GetBool(*a, field_);
2466 bool second = reflection->GetBool(*b, field_);
2467 return first < second;
2468 }
2469 case FieldDescriptor::CPPTYPE_INT32: {
2470 int32_t first = reflection->GetInt32(*a, field_);
2471 int32_t second = reflection->GetInt32(*b, field_);
2472 return first < second;
2473 }
2474 case FieldDescriptor::CPPTYPE_INT64: {
2475 int64_t first = reflection->GetInt64(*a, field_);
2476 int64_t second = reflection->GetInt64(*b, field_);
2477 return first < second;
2478 }
2479 case FieldDescriptor::CPPTYPE_UINT32: {
2480 uint32_t first = reflection->GetUInt32(*a, field_);
2481 uint32_t second = reflection->GetUInt32(*b, field_);
2482 return first < second;
2483 }
2484 case FieldDescriptor::CPPTYPE_UINT64: {
2485 uint64_t first = reflection->GetUInt64(*a, field_);
2486 uint64_t second = reflection->GetUInt64(*b, field_);
2487 return first < second;
2488 }
2489 case FieldDescriptor::CPPTYPE_STRING: {
2490 std::string first = reflection->GetString(*a, field_);
2491 std::string second = reflection->GetString(*b, field_);
2492 return first < second;
2493 }
2494 default:
2495 ABSL_DLOG(FATAL) << "Invalid key for map field.";
2496 return true;
2497 }
2498 }
2499
2500 private:
2501 const FieldDescriptor* field_;
2502 };
2503
2504 namespace internal {
2505 class MapFieldPrinterHelper {
2506 public:
2507 // DynamicMapSorter::Sort cannot be used because it enforces syncing with
2508 // repeated field.
2509 static bool SortMap(const Message& message, const Reflection* reflection,
2510 const FieldDescriptor* field,
2511 std::vector<const Message*>* sorted_map_field);
2512 static void CopyKey(const MapKey& key, Message* message,
2513 const FieldDescriptor* field_desc);
2514 static void CopyValue(const MapValueRef& value, Message* message,
2515 const FieldDescriptor* field_desc);
2516 };
2517
2518 // Returns true if elements contained in sorted_map_field need to be released.
SortMap(const Message & message,const Reflection * reflection,const FieldDescriptor * field,std::vector<const Message * > * sorted_map_field)2519 bool MapFieldPrinterHelper::SortMap(
2520 const Message& message, const Reflection* reflection,
2521 const FieldDescriptor* field,
2522 std::vector<const Message*>* sorted_map_field) {
2523 bool need_release = false;
2524 const MapFieldBase& base = *reflection->GetMapData(message, field);
2525
2526 if (base.IsRepeatedFieldValid()) {
2527 const RepeatedPtrField<Message>& map_field =
2528 reflection->GetRepeatedPtrFieldInternal<Message>(message, field);
2529 for (int i = 0; i < map_field.size(); ++i) {
2530 sorted_map_field->push_back(
2531 const_cast<RepeatedPtrField<Message>*>(&map_field)->Mutable(i));
2532 }
2533 } else {
2534 // TODO: For performance, instead of creating map entry message
2535 // for each element, just store map keys and sort them.
2536 const Descriptor* map_entry_desc = field->message_type();
2537 const Message* prototype =
2538 reflection->GetMessageFactory()->GetPrototype(map_entry_desc);
2539 for (MapIterator iter =
2540 reflection->MapBegin(const_cast<Message*>(&message), field);
2541 iter != reflection->MapEnd(const_cast<Message*>(&message), field);
2542 ++iter) {
2543 Message* map_entry_message = prototype->New();
2544 CopyKey(iter.GetKey(), map_entry_message, map_entry_desc->field(0));
2545 CopyValue(iter.GetValueRef(), map_entry_message,
2546 map_entry_desc->field(1));
2547 sorted_map_field->push_back(map_entry_message);
2548 }
2549 need_release = true;
2550 }
2551
2552 MapEntryMessageComparator comparator(field->message_type());
2553 std::stable_sort(sorted_map_field->begin(), sorted_map_field->end(),
2554 comparator);
2555 return need_release;
2556 }
2557
CopyKey(const MapKey & key,Message * message,const FieldDescriptor * field_desc)2558 void MapFieldPrinterHelper::CopyKey(const MapKey& key, Message* message,
2559 const FieldDescriptor* field_desc) {
2560 const Reflection* reflection = message->GetReflection();
2561 switch (field_desc->cpp_type()) {
2562 case FieldDescriptor::CPPTYPE_DOUBLE:
2563 case FieldDescriptor::CPPTYPE_FLOAT:
2564 case FieldDescriptor::CPPTYPE_ENUM:
2565 case FieldDescriptor::CPPTYPE_MESSAGE:
2566 ABSL_LOG(ERROR) << "Not supported.";
2567 break;
2568 case FieldDescriptor::CPPTYPE_STRING:
2569 reflection->SetString(message, field_desc,
2570 std::string(key.GetStringValue()));
2571 return;
2572 case FieldDescriptor::CPPTYPE_INT64:
2573 reflection->SetInt64(message, field_desc, key.GetInt64Value());
2574 return;
2575 case FieldDescriptor::CPPTYPE_INT32:
2576 reflection->SetInt32(message, field_desc, key.GetInt32Value());
2577 return;
2578 case FieldDescriptor::CPPTYPE_UINT64:
2579 reflection->SetUInt64(message, field_desc, key.GetUInt64Value());
2580 return;
2581 case FieldDescriptor::CPPTYPE_UINT32:
2582 reflection->SetUInt32(message, field_desc, key.GetUInt32Value());
2583 return;
2584 case FieldDescriptor::CPPTYPE_BOOL:
2585 reflection->SetBool(message, field_desc, key.GetBoolValue());
2586 return;
2587 }
2588 }
2589
CopyValue(const MapValueRef & value,Message * message,const FieldDescriptor * field_desc)2590 void MapFieldPrinterHelper::CopyValue(const MapValueRef& value,
2591 Message* message,
2592 const FieldDescriptor* field_desc) {
2593 const Reflection* reflection = message->GetReflection();
2594 switch (field_desc->cpp_type()) {
2595 case FieldDescriptor::CPPTYPE_DOUBLE:
2596 reflection->SetDouble(message, field_desc, value.GetDoubleValue());
2597 return;
2598 case FieldDescriptor::CPPTYPE_FLOAT:
2599 reflection->SetFloat(message, field_desc, value.GetFloatValue());
2600 return;
2601 case FieldDescriptor::CPPTYPE_ENUM:
2602 reflection->SetEnumValue(message, field_desc, value.GetEnumValue());
2603 return;
2604 case FieldDescriptor::CPPTYPE_MESSAGE: {
2605 Message* sub_message = value.GetMessageValue().New();
2606 sub_message->CopyFrom(value.GetMessageValue());
2607 reflection->SetAllocatedMessage(message, sub_message, field_desc);
2608 return;
2609 }
2610 case FieldDescriptor::CPPTYPE_STRING:
2611 reflection->SetString(message, field_desc, value.GetStringValue());
2612 return;
2613 case FieldDescriptor::CPPTYPE_INT64:
2614 reflection->SetInt64(message, field_desc, value.GetInt64Value());
2615 return;
2616 case FieldDescriptor::CPPTYPE_INT32:
2617 reflection->SetInt32(message, field_desc, value.GetInt32Value());
2618 return;
2619 case FieldDescriptor::CPPTYPE_UINT64:
2620 reflection->SetUInt64(message, field_desc, value.GetUInt64Value());
2621 return;
2622 case FieldDescriptor::CPPTYPE_UINT32:
2623 reflection->SetUInt32(message, field_desc, value.GetUInt32Value());
2624 return;
2625 case FieldDescriptor::CPPTYPE_BOOL:
2626 reflection->SetBool(message, field_desc, value.GetBoolValue());
2627 return;
2628 }
2629 }
2630 } // namespace internal
2631
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2632 void TextFormat::Printer::PrintField(const Message& message,
2633 const Reflection* reflection,
2634 const FieldDescriptor* field,
2635 BaseTextGenerator* generator) const {
2636 if (use_short_repeated_primitives_ && field->is_repeated() &&
2637 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
2638 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
2639 PrintShortRepeatedField(message, reflection, field, generator);
2640 return;
2641 }
2642
2643 int count = 0;
2644
2645 if (field->is_repeated()) {
2646 count = reflection->FieldSize(message, field);
2647 } else if (reflection->HasField(message, field) ||
2648 field->containing_type()->options().map_entry()) {
2649 count = 1;
2650 }
2651
2652 std::vector<const Message*> sorted_map_field;
2653 bool need_release = false;
2654 bool is_map = field->is_map();
2655 if (is_map) {
2656 need_release = internal::MapFieldPrinterHelper::SortMap(
2657 message, reflection, field, &sorted_map_field);
2658 }
2659
2660 for (int j = 0; j < count; ++j) {
2661 const int field_index = field->is_repeated() ? j : -1;
2662
2663 PrintFieldName(message, field_index, count, reflection, field, generator);
2664
2665 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2666 if (TryRedactFieldValue(message, field, generator,
2667 /*insert_value_separator=*/true)) {
2668 break;
2669 }
2670 const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2671 const Message& sub_message =
2672 field->is_repeated()
2673 ? (is_map ? *sorted_map_field[j]
2674 : reflection->GetRepeatedMessage(message, field, j))
2675 : reflection->GetMessage(message, field);
2676 printer->PrintMessageStart(sub_message, field_index, count,
2677 single_line_mode_, generator);
2678 generator->Indent();
2679 if (!printer->PrintMessageContent(sub_message, field_index, count,
2680 single_line_mode_, generator)) {
2681 Print(sub_message, generator);
2682 }
2683 generator->Outdent();
2684 printer->PrintMessageEnd(sub_message, field_index, count,
2685 single_line_mode_, generator);
2686 } else {
2687 generator->PrintMaybeWithMarker(MarkerToken(), ": ");
2688 // Write the field value.
2689 PrintFieldValue(message, reflection, field, field_index, generator);
2690 if (single_line_mode_) {
2691 generator->PrintLiteral(" ");
2692 } else {
2693 generator->PrintLiteral("\n");
2694 }
2695 }
2696 }
2697
2698 if (need_release) {
2699 for (const Message* message_to_delete : sorted_map_field) {
2700 delete message_to_delete;
2701 }
2702 }
2703 }
2704
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2705 void TextFormat::Printer::PrintShortRepeatedField(
2706 const Message& message, const Reflection* reflection,
2707 const FieldDescriptor* field, BaseTextGenerator* generator) const {
2708 // Print primitive repeated field in short form.
2709 int size = reflection->FieldSize(message, field);
2710 PrintFieldName(message, /*field_index=*/-1, /*field_count=*/size, reflection,
2711 field, generator);
2712 generator->PrintMaybeWithMarker(MarkerToken(), ": ", "[");
2713 for (int i = 0; i < size; i++) {
2714 if (i > 0) generator->PrintLiteral(", ");
2715 PrintFieldValue(message, reflection, field, i, generator);
2716 }
2717 if (single_line_mode_) {
2718 generator->PrintLiteral("] ");
2719 } else {
2720 generator->PrintLiteral("]\n");
2721 }
2722 }
2723
PrintFieldName(const Message & message,int field_index,int field_count,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const2724 void TextFormat::Printer::PrintFieldName(const Message& message,
2725 int field_index, int field_count,
2726 const Reflection* reflection,
2727 const FieldDescriptor* field,
2728 BaseTextGenerator* generator) const {
2729 // if use_field_number_ is true, prints field number instead
2730 // of field name.
2731 if (use_field_number_) {
2732 generator->PrintString(absl::StrCat(field->number()));
2733 return;
2734 }
2735
2736 const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2737 printer->PrintFieldName(message, field_index, field_count, reflection, field,
2738 generator);
2739 }
2740
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,BaseTextGenerator * generator) const2741 void TextFormat::Printer::PrintFieldValue(const Message& message,
2742 const Reflection* reflection,
2743 const FieldDescriptor* field,
2744 int index,
2745 BaseTextGenerator* generator) const {
2746 ABSL_DCHECK(field->is_repeated() || (index == -1))
2747 << "Index must be -1 for non-repeated fields";
2748
2749 const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2750 if (TryRedactFieldValue(message, field, generator,
2751 /*insert_value_separator=*/false)) {
2752 return;
2753 }
2754
2755 switch (field->cpp_type()) {
2756 #define OUTPUT_FIELD(CPPTYPE, METHOD) \
2757 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
2758 printer->Print##METHOD( \
2759 field->is_repeated() \
2760 ? reflection->GetRepeated##METHOD(message, field, index) \
2761 : reflection->Get##METHOD(message, field), \
2762 generator); \
2763 break
2764
2765 OUTPUT_FIELD(INT32, Int32);
2766 OUTPUT_FIELD(INT64, Int64);
2767 OUTPUT_FIELD(UINT32, UInt32);
2768 OUTPUT_FIELD(UINT64, UInt64);
2769 OUTPUT_FIELD(FLOAT, Float);
2770 OUTPUT_FIELD(DOUBLE, Double);
2771 OUTPUT_FIELD(BOOL, Bool);
2772 #undef OUTPUT_FIELD
2773
2774 case FieldDescriptor::CPPTYPE_STRING: {
2775 std::string scratch;
2776 const std::string& value =
2777 field->is_repeated()
2778 ? reflection->GetRepeatedStringReference(message, field, index,
2779 &scratch)
2780 : reflection->GetStringReference(message, field, &scratch);
2781 const std::string* value_to_print = &value;
2782 std::string truncated_value;
2783 if (truncate_string_field_longer_than_ > 0 &&
2784 static_cast<size_t>(truncate_string_field_longer_than_) <
2785 value.size()) {
2786 truncated_value = value.substr(0, truncate_string_field_longer_than_) +
2787 "...<truncated>...";
2788 value_to_print = &truncated_value;
2789 }
2790 if (field->type() == FieldDescriptor::TYPE_STRING) {
2791 printer->PrintString(*value_to_print, generator);
2792 } else {
2793 ABSL_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
2794 printer->PrintBytes(*value_to_print, generator);
2795 }
2796 break;
2797 }
2798
2799 case FieldDescriptor::CPPTYPE_ENUM: {
2800 int enum_value =
2801 field->is_repeated()
2802 ? reflection->GetRepeatedEnumValue(message, field, index)
2803 : reflection->GetEnumValue(message, field);
2804 const EnumValueDescriptor* enum_desc =
2805 field->enum_type()->FindValueByNumber(enum_value);
2806 if (enum_desc != nullptr) {
2807 printer->PrintEnum(enum_value, internal::NameOfEnumAsString(enum_desc),
2808 generator);
2809 } else {
2810 // Ordinarily, enum_desc should not be null, because proto2 has the
2811 // invariant that set enum field values must be in-range, but with the
2812 // new integer-based API for enums (or the RepeatedField<int> loophole),
2813 // it is possible for the user to force an unknown integer value. So we
2814 // simply use the integer value itself as the enum value name in this
2815 // case.
2816 printer->PrintEnum(enum_value, absl::StrCat(enum_value), generator);
2817 }
2818 break;
2819 }
2820
2821 case FieldDescriptor::CPPTYPE_MESSAGE:
2822 Print(field->is_repeated()
2823 ? reflection->GetRepeatedMessage(message, field, index)
2824 : reflection->GetMessage(message, field),
2825 generator);
2826 break;
2827 }
2828 }
2829
Print(const Message & message,io::ZeroCopyOutputStream * output)2830 /* static */ bool TextFormat::Print(const Message& message,
2831 io::ZeroCopyOutputStream* output) {
2832 return Printer().Print(message, output);
2833 }
2834
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)2835 /* static */ bool TextFormat::PrintUnknownFields(
2836 const UnknownFieldSet& unknown_fields, io::ZeroCopyOutputStream* output) {
2837 return Printer().PrintUnknownFields(unknown_fields, output);
2838 }
2839
PrintToString(const Message & message,std::string * output)2840 /* static */ bool TextFormat::PrintToString(const Message& message,
2841 std::string* output) {
2842 auto printer = Printer();
2843 return printer.PrintToString(message, output);
2844 }
2845
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output)2846 /* static */ bool TextFormat::PrintUnknownFieldsToString(
2847 const UnknownFieldSet& unknown_fields, std::string* output) {
2848 return Printer().PrintUnknownFieldsToString(unknown_fields, output);
2849 }
2850
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output)2851 /* static */ void TextFormat::PrintFieldValueToString(
2852 const Message& message, const FieldDescriptor* field, int index,
2853 std::string* output) {
2854 return Printer().PrintFieldValueToString(message, field, index, output);
2855 }
2856
ParseFieldValueFromString(absl::string_view input,const FieldDescriptor * field,Message * message)2857 /* static */ bool TextFormat::ParseFieldValueFromString(
2858 absl::string_view input, const FieldDescriptor* field, Message* message) {
2859 return Parser().ParseFieldValueFromString(input, field, message);
2860 }
2861
2862 template <typename... T>
OutOfLinePrintString(BaseTextGenerator * generator,const T &...values)2863 PROTOBUF_NOINLINE void TextFormat::OutOfLinePrintString(
2864 BaseTextGenerator* generator, const T&... values) {
2865 generator->PrintString(absl::StrCat(values...));
2866 }
2867
PrintUnknownFields(const UnknownFieldSet & unknown_fields,BaseTextGenerator * generator,int recursion_budget) const2868 void TextFormat::Printer::PrintUnknownFields(
2869 const UnknownFieldSet& unknown_fields, BaseTextGenerator* generator,
2870 int recursion_budget) const {
2871 for (int i = 0; i < unknown_fields.field_count(); i++) {
2872 const UnknownField& field = unknown_fields.field(i);
2873
2874 switch (field.type()) {
2875 case UnknownField::TYPE_VARINT:
2876 OutOfLinePrintString(generator, field.number());
2877 generator->PrintMaybeWithMarker(MarkerToken(), ": ");
2878 if (redact_debug_string_) {
2879 OutOfLinePrintString(generator, "UNKNOWN_VARINT ");
2880 OutOfLinePrintString(generator, kFieldValueReplacement);
2881 } else {
2882 OutOfLinePrintString(generator, field.varint());
2883 }
2884 if (single_line_mode_) {
2885 generator->PrintLiteral(" ");
2886 } else {
2887 generator->PrintLiteral("\n");
2888 }
2889 break;
2890 case UnknownField::TYPE_FIXED32: {
2891 OutOfLinePrintString(generator, field.number());
2892 if (redact_debug_string_) {
2893 generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2894 "UNKNOWN_FIXED32 ");
2895 OutOfLinePrintString(generator, kFieldValueReplacement);
2896 } else {
2897 generator->PrintMaybeWithMarker(MarkerToken(), ": ", "0x");
2898 OutOfLinePrintString(generator,
2899 absl::Hex(field.fixed32(), absl::kZeroPad8));
2900 }
2901 if (single_line_mode_) {
2902 generator->PrintLiteral(" ");
2903 } else {
2904 generator->PrintLiteral("\n");
2905 }
2906 break;
2907 }
2908 case UnknownField::TYPE_FIXED64: {
2909 OutOfLinePrintString(generator, field.number());
2910 if (redact_debug_string_) {
2911 generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2912 "UNKNOWN_FIXED64 ");
2913 OutOfLinePrintString(generator, kFieldValueReplacement);
2914 } else {
2915 generator->PrintMaybeWithMarker(MarkerToken(), ": ", "0x");
2916 OutOfLinePrintString(generator,
2917 absl::Hex(field.fixed64(), absl::kZeroPad16));
2918 }
2919 if (single_line_mode_) {
2920 generator->PrintLiteral(" ");
2921 } else {
2922 generator->PrintLiteral("\n");
2923 }
2924 break;
2925 }
2926 case UnknownField::TYPE_LENGTH_DELIMITED: {
2927 OutOfLinePrintString(generator, field.number());
2928 const absl::string_view value = field.length_delimited();
2929 // We create a CodedInputStream so that we can adhere to our recursion
2930 // budget when we attempt to parse the data. UnknownFieldSet parsing is
2931 // recursive because of groups.
2932 io::CodedInputStream input_stream(
2933 reinterpret_cast<const uint8_t*>(value.data()), value.size());
2934 input_stream.SetRecursionLimit(recursion_budget);
2935 UnknownFieldSet embedded_unknown_fields;
2936 if (!value.empty() && recursion_budget > 0 &&
2937 embedded_unknown_fields.ParseFromCodedStream(&input_stream)) {
2938 // This field is parseable as a Message.
2939 // So it is probably an embedded message.
2940 if (redact_debug_string_) {
2941 generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2942 "UNKNOWN_MESSAGE ");
2943 OutOfLinePrintString(generator, kFieldValueReplacement);
2944 if (single_line_mode_) {
2945 generator->PrintLiteral(" ");
2946 } else {
2947 generator->PrintLiteral("\n");
2948 }
2949 break;
2950 }
2951 if (single_line_mode_) {
2952 generator->PrintMaybeWithMarker(MarkerToken(), " ", "{ ");
2953 } else {
2954 generator->PrintMaybeWithMarker(MarkerToken(), " ", "{\n");
2955 generator->Indent();
2956 }
2957 PrintUnknownFields(embedded_unknown_fields, generator,
2958 recursion_budget - 1);
2959 if (single_line_mode_) {
2960 generator->PrintLiteral("} ");
2961 } else {
2962 generator->Outdent();
2963 generator->PrintLiteral("}\n");
2964 }
2965 } else {
2966 // This field is not parseable as a Message (or we ran out of
2967 // recursion budget). So it is probably just a plain string.
2968 if (redact_debug_string_) {
2969 generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2970 "UNKNOWN_STRING ");
2971 OutOfLinePrintString(generator, kFieldValueReplacement);
2972 if (single_line_mode_) {
2973 generator->PrintLiteral(" ");
2974 } else {
2975 generator->PrintLiteral("\n");
2976 }
2977 break;
2978 }
2979 generator->PrintMaybeWithMarker(MarkerToken(), ": ", "\"");
2980 generator->PrintString(absl::CEscape(value));
2981 if (single_line_mode_) {
2982 generator->PrintLiteral("\" ");
2983 } else {
2984 generator->PrintLiteral("\"\n");
2985 }
2986 }
2987 break;
2988 }
2989 case UnknownField::TYPE_GROUP:
2990 OutOfLinePrintString(generator, field.number());
2991 if (redact_debug_string_) {
2992 generator->PrintMaybeWithMarker(MarkerToken(), ": ",
2993 "UNKNOWN_GROUP ");
2994 OutOfLinePrintString(generator, kFieldValueReplacement);
2995 if (single_line_mode_) {
2996 generator->PrintLiteral(" ");
2997 } else {
2998 generator->PrintLiteral("\n");
2999 }
3000 break;
3001 }
3002 if (single_line_mode_) {
3003 generator->PrintMaybeWithMarker(MarkerToken(), " ", "{ ");
3004 } else {
3005 generator->PrintMaybeWithMarker(MarkerToken(), " ", "{\n");
3006 generator->Indent();
3007 }
3008 // For groups, we recurse without checking the budget. This is OK,
3009 // because if the groups were too deeply nested then we would have
3010 // already rejected the message when we originally parsed it.
3011 PrintUnknownFields(field.group(), generator, recursion_budget - 1);
3012 if (single_line_mode_) {
3013 generator->PrintLiteral("} ");
3014 } else {
3015 generator->Outdent();
3016 generator->PrintLiteral("}\n");
3017 }
3018 break;
3019 }
3020 }
3021 }
3022
3023 namespace internal {
3024
3025 // Check if the field is sensitive and should be redacted.
ShouldRedactField(const FieldDescriptor * field)3026 bool ShouldRedactField(const FieldDescriptor* field) {
3027 if (field->options().debug_redact()) return true;
3028 return false;
3029 }
3030
3031 } // namespace internal
3032
TryRedactFieldValue(const Message & message,const FieldDescriptor * field,BaseTextGenerator * generator,bool insert_value_separator) const3033 bool TextFormat::Printer::TryRedactFieldValue(
3034 const Message& message, const FieldDescriptor* field,
3035 BaseTextGenerator* generator, bool insert_value_separator) const {
3036 if (internal::ShouldRedactField(field)) {
3037 if (redact_debug_string_) {
3038 IncrementRedactedFieldCounter();
3039 if (insert_value_separator) {
3040 generator->PrintMaybeWithMarker(MarkerToken(), ": ");
3041 }
3042 generator->PrintString(kFieldValueReplacement);
3043 if (insert_value_separator) {
3044 if (single_line_mode_) {
3045 generator->PrintLiteral(" ");
3046 } else {
3047 generator->PrintLiteral("\n");
3048 }
3049 }
3050 return true;
3051 }
3052 }
3053 return false;
3054 }
3055
3056 } // namespace protobuf
3057 } // namespace google
3058
3059 #include "google/protobuf/port_undef.inc"
3060