1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <google/protobuf/text_format.h>
36
37 #include <float.h>
38 #include <stdio.h>
39
40 #include <algorithm>
41 #include <atomic>
42 #include <climits>
43 #include <cmath>
44 #include <limits>
45 #include <vector>
46
47 #include <google/protobuf/io/coded_stream.h>
48 #include <google/protobuf/io/tokenizer.h>
49 #include <google/protobuf/io/zero_copy_stream.h>
50 #include <google/protobuf/io/zero_copy_stream_impl.h>
51 #include <google/protobuf/stubs/strutil.h>
52 #include <google/protobuf/any.h>
53 #include <google/protobuf/descriptor.h>
54 #include <google/protobuf/descriptor.pb.h>
55 #include <google/protobuf/dynamic_message.h>
56 #include <google/protobuf/io/strtod.h>
57 #include <google/protobuf/map_field.h>
58 #include <google/protobuf/message.h>
59 #include <google/protobuf/repeated_field.h>
60 #include <google/protobuf/unknown_field_set.h>
61 #include <google/protobuf/wire_format_lite.h>
62 #include <google/protobuf/stubs/map_util.h>
63 #include <google/protobuf/stubs/stl_util.h>
64
65 // Must be included last.
66 #include <google/protobuf/port_def.inc>
67
68 #define DEBUG_STRING_SILENT_MARKER "\t "
69
70 namespace google {
71 namespace protobuf {
72
73 namespace {
74
IsHexNumber(const std::string & str)75 inline bool IsHexNumber(const std::string& str) {
76 return (str.length() >= 2 && str[0] == '0' &&
77 (str[1] == 'x' || str[1] == 'X'));
78 }
79
IsOctNumber(const std::string & str)80 inline bool IsOctNumber(const std::string& str) {
81 return (str.length() >= 2 && str[0] == '0' &&
82 (str[1] >= '0' && str[1] < '8'));
83 }
84
85 } // namespace
86
87 namespace internal {
88 // Controls insertion of DEBUG_STRING_SILENT_MARKER.
89 PROTOBUF_EXPORT std::atomic<bool> enable_debug_text_format_marker;
90 } // namespace internal
91
DebugString() const92 std::string Message::DebugString() const {
93 std::string debug_string;
94
95 TextFormat::Printer printer;
96 printer.SetExpandAny(true);
97 printer.SetInsertSilentMarker(internal::enable_debug_text_format_marker.load(
98 std::memory_order_relaxed));
99
100 printer.PrintToString(*this, &debug_string);
101
102 return debug_string;
103 }
104
ShortDebugString() const105 std::string Message::ShortDebugString() const {
106 std::string debug_string;
107
108 TextFormat::Printer printer;
109 printer.SetSingleLineMode(true);
110 printer.SetExpandAny(true);
111 printer.SetInsertSilentMarker(internal::enable_debug_text_format_marker.load(
112 std::memory_order_relaxed));
113
114 printer.PrintToString(*this, &debug_string);
115 // Single line mode currently might have an extra space at the end.
116 if (!debug_string.empty() && debug_string[debug_string.size() - 1] == ' ') {
117 debug_string.resize(debug_string.size() - 1);
118 }
119
120 return debug_string;
121 }
122
Utf8DebugString() const123 std::string Message::Utf8DebugString() const {
124 std::string debug_string;
125
126 TextFormat::Printer printer;
127 printer.SetUseUtf8StringEscaping(true);
128 printer.SetExpandAny(true);
129 printer.SetInsertSilentMarker(internal::enable_debug_text_format_marker.load(
130 std::memory_order_relaxed));
131
132 printer.PrintToString(*this, &debug_string);
133
134 return debug_string;
135 }
136
PrintDebugString() const137 void Message::PrintDebugString() const { printf("%s", DebugString().c_str()); }
138
139
140 // ===========================================================================
141 // Implementation of the parse information tree class.
RecordLocation(const FieldDescriptor * field,TextFormat::ParseLocationRange range)142 void TextFormat::ParseInfoTree::RecordLocation(
143 const FieldDescriptor* field, TextFormat::ParseLocationRange range) {
144 locations_[field].push_back(range);
145 }
146
CreateNested(const FieldDescriptor * field)147 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
148 const FieldDescriptor* field) {
149 // Owned by us in the map.
150 auto& vec = nested_[field];
151 vec.emplace_back(new TextFormat::ParseInfoTree());
152 return vec.back().get();
153 }
154
CheckFieldIndex(const FieldDescriptor * field,int index)155 void CheckFieldIndex(const FieldDescriptor* field, int index) {
156 if (field == nullptr) {
157 return;
158 }
159
160 if (field->is_repeated() && index == -1) {
161 GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
162 << "Field: " << field->name();
163 } else if (!field->is_repeated() && index != -1) {
164 GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
165 << "Field: " << field->name();
166 }
167 }
168
GetLocationRange(const FieldDescriptor * field,int index) const169 TextFormat::ParseLocationRange TextFormat::ParseInfoTree::GetLocationRange(
170 const FieldDescriptor* field, int index) const {
171 CheckFieldIndex(field, index);
172 if (index == -1) {
173 index = 0;
174 }
175
176 const std::vector<TextFormat::ParseLocationRange>* locations =
177 FindOrNull(locations_, field);
178 if (locations == nullptr ||
179 index >= static_cast<int64_t>(locations->size())) {
180 return TextFormat::ParseLocationRange();
181 }
182
183 return (*locations)[index];
184 }
185
GetTreeForNested(const FieldDescriptor * field,int index) const186 TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
187 const FieldDescriptor* field, int index) const {
188 CheckFieldIndex(field, index);
189 if (index == -1) {
190 index = 0;
191 }
192
193 auto it = nested_.find(field);
194 if (it == nested_.end() || index >= static_cast<int64_t>(it->second.size())) {
195 return nullptr;
196 }
197
198 return it->second[index].get();
199 }
200
201 namespace {
202 // These functions implement the behavior of the "default" TextFormat::Finder,
203 // they are defined as standalone to be called when finder_ is nullptr.
DefaultFinderFindExtension(Message * message,const std::string & name)204 const FieldDescriptor* DefaultFinderFindExtension(Message* message,
205 const std::string& name) {
206 const Descriptor* descriptor = message->GetDescriptor();
207 return descriptor->file()->pool()->FindExtensionByPrintableName(descriptor,
208 name);
209 }
210
DefaultFinderFindExtensionByNumber(const Descriptor * descriptor,int number)211 const FieldDescriptor* DefaultFinderFindExtensionByNumber(
212 const Descriptor* descriptor, int number) {
213 return descriptor->file()->pool()->FindExtensionByNumber(descriptor, number);
214 }
215
DefaultFinderFindAnyType(const Message & message,const std::string & prefix,const std::string & name)216 const Descriptor* DefaultFinderFindAnyType(const Message& message,
217 const std::string& prefix,
218 const std::string& name) {
219 if (prefix != internal::kTypeGoogleApisComPrefix &&
220 prefix != internal::kTypeGoogleProdComPrefix) {
221 return nullptr;
222 }
223 return message.GetDescriptor()->file()->pool()->FindMessageTypeByName(name);
224 }
225 } // namespace
226
227 // ===========================================================================
228 // Internal class for parsing an ASCII representation of a Protocol Message.
229 // This class makes use of the Protocol Message compiler's tokenizer found
230 // in //net/proto2/io/public/tokenizer.h. Note that class's Parse
231 // method is *not* thread-safe and should only be used in a single thread at
232 // a time.
233
234 // Makes code slightly more readable. The meaning of "DO(foo)" is
235 // "Execute foo and fail if it fails.", where failure is indicated by
236 // returning false. Borrowed from parser.cc (Thanks Kenton!).
237 #define DO(STATEMENT) \
238 if (STATEMENT) { \
239 } else { \
240 return false; \
241 }
242
243 class TextFormat::Parser::ParserImpl {
244 public:
245 // Determines if repeated values for non-repeated fields and
246 // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
247 // required/optional field named "foo", or "baz: 1 qux: 2"
248 // where "baz" and "qux" are members of the same oneof.
249 enum SingularOverwritePolicy {
250 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
251 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
252 };
253
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,const TextFormat::Finder * finder,ParseInfoTree * parse_info_tree,SingularOverwritePolicy singular_overwrite_policy,bool allow_case_insensitive_field,bool allow_unknown_field,bool allow_unknown_extension,bool allow_unknown_enum,bool allow_field_number,bool allow_relaxed_whitespace,bool allow_partial,int recursion_limit)254 ParserImpl(const Descriptor* root_message_type,
255 io::ZeroCopyInputStream* input_stream,
256 io::ErrorCollector* error_collector,
257 const TextFormat::Finder* finder, ParseInfoTree* parse_info_tree,
258 SingularOverwritePolicy singular_overwrite_policy,
259 bool allow_case_insensitive_field, bool allow_unknown_field,
260 bool allow_unknown_extension, bool allow_unknown_enum,
261 bool allow_field_number, bool allow_relaxed_whitespace,
262 bool allow_partial, int recursion_limit)
263 : error_collector_(error_collector),
264 finder_(finder),
265 parse_info_tree_(parse_info_tree),
266 tokenizer_error_collector_(this),
267 tokenizer_(input_stream, &tokenizer_error_collector_),
268 root_message_type_(root_message_type),
269 singular_overwrite_policy_(singular_overwrite_policy),
270 allow_case_insensitive_field_(allow_case_insensitive_field),
271 allow_unknown_field_(allow_unknown_field),
272 allow_unknown_extension_(allow_unknown_extension),
273 allow_unknown_enum_(allow_unknown_enum),
274 allow_field_number_(allow_field_number),
275 allow_partial_(allow_partial),
276 initial_recursion_limit_(recursion_limit),
277 recursion_limit_(recursion_limit),
278 had_silent_marker_(false),
279 had_errors_(false) {
280 // For backwards-compatibility with proto1, we need to allow the 'f' suffix
281 // for floats.
282 tokenizer_.set_allow_f_after_float(true);
283
284 // '#' starts a comment.
285 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
286
287 if (allow_relaxed_whitespace) {
288 tokenizer_.set_require_space_after_number(false);
289 tokenizer_.set_allow_multiline_strings(true);
290 }
291
292 // Consume the starting token.
293 tokenizer_.Next();
294 }
~ParserImpl()295 ~ParserImpl() {}
296
297 // Parses the ASCII representation specified in input and saves the
298 // information into the output pointer (a Message). Returns
299 // false if an error occurs (an error will also be logged to
300 // GOOGLE_LOG(ERROR)).
Parse(Message * output)301 bool Parse(Message* output) {
302 // Consume fields until we cannot do so anymore.
303 while (true) {
304 if (LookingAtType(io::Tokenizer::TYPE_END)) {
305 // Ensures recursion limit properly unwinded, but only for success
306 // cases. This implicitly avoids the check when `Parse` returns false
307 // via `DO(...)`.
308 GOOGLE_DCHECK(had_errors_ || recursion_limit_ == initial_recursion_limit_)
309 << "Recursion limit at end of parse should be "
310 << initial_recursion_limit_ << ", but was " << recursion_limit_
311 << ". Difference of " << initial_recursion_limit_ - recursion_limit_
312 << " stack frames not accounted for stack unwind.";
313
314 return !had_errors_;
315 }
316
317 DO(ConsumeField(output));
318 }
319 }
320
ParseField(const FieldDescriptor * field,Message * output)321 bool ParseField(const FieldDescriptor* field, Message* output) {
322 bool suc;
323 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
324 suc = ConsumeFieldMessage(output, output->GetReflection(), field);
325 } else {
326 suc = ConsumeFieldValue(output, output->GetReflection(), field);
327 }
328 return suc && LookingAtType(io::Tokenizer::TYPE_END);
329 }
330
ReportError(int line,int col,const std::string & message)331 void ReportError(int line, int col, const std::string& message) {
332 had_errors_ = true;
333 if (error_collector_ == nullptr) {
334 if (line >= 0) {
335 GOOGLE_LOG(ERROR) << "Error parsing text-format "
336 << root_message_type_->full_name() << ": " << (line + 1)
337 << ":" << (col + 1) << ": " << message;
338 } else {
339 GOOGLE_LOG(ERROR) << "Error parsing text-format "
340 << root_message_type_->full_name() << ": " << message;
341 }
342 } else {
343 error_collector_->AddError(line, col, message);
344 }
345 }
346
ReportWarning(int line,int col,const std::string & message)347 void ReportWarning(int line, int col, const std::string& message) {
348 if (error_collector_ == nullptr) {
349 if (line >= 0) {
350 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
351 << root_message_type_->full_name() << ": " << (line + 1)
352 << ":" << (col + 1) << ": " << message;
353 } else {
354 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
355 << root_message_type_->full_name() << ": " << message;
356 }
357 } else {
358 error_collector_->AddWarning(line, col, message);
359 }
360 }
361
362 private:
363 static constexpr int32_t kint32max = std::numeric_limits<int32_t>::max();
364 static constexpr uint32_t kuint32max = std::numeric_limits<uint32_t>::max();
365 static constexpr int64_t kint64min = std::numeric_limits<int64_t>::min();
366 static constexpr int64_t kint64max = std::numeric_limits<int64_t>::max();
367 static constexpr uint64_t kuint64max = std::numeric_limits<uint64_t>::max();
368
369 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
370
371 // Reports an error with the given message with information indicating
372 // the position (as derived from the current token).
ReportError(const std::string & message)373 void ReportError(const std::string& message) {
374 ReportError(tokenizer_.current().line, tokenizer_.current().column,
375 message);
376 }
377
378 // Reports a warning with the given message with information indicating
379 // the position (as derived from the current token).
ReportWarning(const std::string & message)380 void ReportWarning(const std::string& message) {
381 ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
382 message);
383 }
384
385 // Consumes the specified message with the given starting delimiter.
386 // This method checks to see that the end delimiter at the conclusion of
387 // the consumption matches the starting delimiter passed in here.
ConsumeMessage(Message * message,const std::string delimiter)388 bool ConsumeMessage(Message* message, const std::string delimiter) {
389 while (!LookingAt(">") && !LookingAt("}")) {
390 DO(ConsumeField(message));
391 }
392
393 // Confirm that we have a valid ending delimiter.
394 DO(Consume(delimiter));
395 return true;
396 }
397
398 // Consume either "<" or "{".
ConsumeMessageDelimiter(std::string * delimiter)399 bool ConsumeMessageDelimiter(std::string* delimiter) {
400 if (TryConsume("<")) {
401 *delimiter = ">";
402 } else {
403 DO(Consume("{"));
404 *delimiter = "}";
405 }
406 return true;
407 }
408
409
410 // Consumes the current field (as returned by the tokenizer) on the
411 // passed in message.
ConsumeField(Message * message)412 bool ConsumeField(Message* message) {
413 const Reflection* reflection = message->GetReflection();
414 const Descriptor* descriptor = message->GetDescriptor();
415
416 std::string field_name;
417 bool reserved_field = false;
418 const FieldDescriptor* field = nullptr;
419 int start_line = tokenizer_.current().line;
420 int start_column = tokenizer_.current().column;
421
422 const FieldDescriptor* any_type_url_field;
423 const FieldDescriptor* any_value_field;
424 if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field,
425 &any_value_field) &&
426 TryConsume("[")) {
427 std::string full_type_name, prefix;
428 DO(ConsumeAnyTypeUrl(&full_type_name, &prefix));
429 std::string prefix_and_full_type_name =
430 StrCat(prefix, full_type_name);
431 DO(ConsumeBeforeWhitespace("]"));
432 TryConsumeWhitespace();
433 // ':' is optional between message labels and values.
434 if (TryConsumeBeforeWhitespace(":")) {
435 TryConsumeWhitespace();
436 }
437 std::string serialized_value;
438 const Descriptor* value_descriptor =
439 finder_ ? finder_->FindAnyType(*message, prefix, full_type_name)
440 : DefaultFinderFindAnyType(*message, prefix, full_type_name);
441 if (value_descriptor == nullptr) {
442 ReportError("Could not find type \"" + prefix_and_full_type_name +
443 "\" stored in google.protobuf.Any.");
444 return false;
445 }
446 DO(ConsumeAnyValue(value_descriptor, &serialized_value));
447 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
448 // Fail if any_type_url_field has already been specified.
449 if ((!any_type_url_field->is_repeated() &&
450 reflection->HasField(*message, any_type_url_field)) ||
451 (!any_value_field->is_repeated() &&
452 reflection->HasField(*message, any_value_field))) {
453 ReportError("Non-repeated Any specified multiple times.");
454 return false;
455 }
456 }
457 reflection->SetString(message, any_type_url_field,
458 prefix_and_full_type_name);
459 reflection->SetString(message, any_value_field, serialized_value);
460 return true;
461 }
462 if (TryConsume("[")) {
463 // Extension.
464 DO(ConsumeFullTypeName(&field_name));
465 DO(ConsumeBeforeWhitespace("]"));
466 TryConsumeWhitespace();
467
468 field = finder_ ? finder_->FindExtension(message, field_name)
469 : DefaultFinderFindExtension(message, field_name);
470
471 if (field == nullptr) {
472 if (!allow_unknown_field_ && !allow_unknown_extension_) {
473 ReportError("Extension \"" + field_name +
474 "\" is not defined or "
475 "is not an extension of \"" +
476 descriptor->full_name() + "\".");
477 return false;
478 } else {
479 ReportWarning("Ignoring extension \"" + field_name +
480 "\" which is not defined or is not an extension of \"" +
481 descriptor->full_name() + "\".");
482 }
483 }
484 } else {
485 DO(ConsumeIdentifierBeforeWhitespace(&field_name));
486 TryConsumeWhitespace();
487
488 int32_t field_number;
489 if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
490 if (descriptor->IsExtensionNumber(field_number)) {
491 field = finder_
492 ? finder_->FindExtensionByNumber(descriptor, field_number)
493 : DefaultFinderFindExtensionByNumber(descriptor,
494 field_number);
495 } else if (descriptor->IsReservedNumber(field_number)) {
496 reserved_field = true;
497 } else {
498 field = descriptor->FindFieldByNumber(field_number);
499 }
500 } else {
501 field = descriptor->FindFieldByName(field_name);
502 // Group names are expected to be capitalized as they appear in the
503 // .proto file, which actually matches their type names, not their
504 // field names.
505 if (field == nullptr) {
506 std::string lower_field_name = field_name;
507 LowerString(&lower_field_name);
508 field = descriptor->FindFieldByName(lower_field_name);
509 // If the case-insensitive match worked but the field is NOT a group,
510 if (field != nullptr &&
511 field->type() != FieldDescriptor::TYPE_GROUP) {
512 field = nullptr;
513 }
514 }
515 // Again, special-case group names as described above.
516 if (field != nullptr && field->type() == FieldDescriptor::TYPE_GROUP &&
517 field->message_type()->name() != field_name) {
518 field = nullptr;
519 }
520
521 if (field == nullptr && allow_case_insensitive_field_) {
522 std::string lower_field_name = field_name;
523 LowerString(&lower_field_name);
524 field = descriptor->FindFieldByLowercaseName(lower_field_name);
525 }
526
527 if (field == nullptr) {
528 reserved_field = descriptor->IsReservedName(field_name);
529 }
530 }
531
532 if (field == nullptr && !reserved_field) {
533 if (!allow_unknown_field_) {
534 ReportError("Message type \"" + descriptor->full_name() +
535 "\" has no field named \"" + field_name + "\".");
536 return false;
537 } else {
538 ReportWarning("Message type \"" + descriptor->full_name() +
539 "\" has no field named \"" + field_name + "\".");
540 }
541 }
542 }
543
544 // Skips unknown or reserved fields.
545 if (field == nullptr) {
546 GOOGLE_CHECK(allow_unknown_field_ || allow_unknown_extension_ || reserved_field);
547
548 // Try to guess the type of this field.
549 // If this field is not a message, there should be a ":" between the
550 // field name and the field value and also the field value should not
551 // start with "{" or "<" which indicates the beginning of a message body.
552 // If there is no ":" or there is a "{" or "<" after ":", this field has
553 // to be a message or the input is ill-formed.
554 if (TryConsumeBeforeWhitespace(":")) {
555 TryConsumeWhitespace();
556 if (!LookingAt("{") && !LookingAt("<")) {
557 return SkipFieldValue();
558 }
559 }
560 return SkipFieldMessage();
561 }
562
563 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
564 // Fail if the field is not repeated and it has already been specified.
565 if (!field->is_repeated() && reflection->HasField(*message, field)) {
566 ReportError("Non-repeated field \"" + field_name +
567 "\" is specified multiple times.");
568 return false;
569 }
570 // Fail if the field is a member of a oneof and another member has already
571 // been specified.
572 const OneofDescriptor* oneof = field->containing_oneof();
573 if (oneof != nullptr && reflection->HasOneof(*message, oneof)) {
574 const FieldDescriptor* other_field =
575 reflection->GetOneofFieldDescriptor(*message, oneof);
576 ReportError("Field \"" + field_name +
577 "\" is specified along with "
578 "field \"" +
579 other_field->name() +
580 "\", another member "
581 "of oneof \"" +
582 oneof->name() + "\".");
583 return false;
584 }
585 }
586
587 // Perform special handling for embedded message types.
588 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
589 // ':' is optional here.
590 bool consumed_semicolon = TryConsumeBeforeWhitespace(":");
591 if (consumed_semicolon) {
592 TryConsumeWhitespace();
593 }
594 if (consumed_semicolon && field->options().weak() &&
595 LookingAtType(io::Tokenizer::TYPE_STRING)) {
596 // we are getting a bytes string for a weak field.
597 std::string tmp;
598 DO(ConsumeString(&tmp));
599 MessageFactory* factory =
600 finder_ ? finder_->FindExtensionFactory(field) : nullptr;
601 reflection->MutableMessage(message, field, factory)
602 ->ParseFromString(tmp);
603 goto label_skip_parsing;
604 }
605 } else {
606 // ':' is required here.
607 DO(ConsumeBeforeWhitespace(":"));
608 TryConsumeWhitespace();
609 }
610
611 if (field->is_repeated() && TryConsume("[")) {
612 // Short repeated format, e.g. "foo: [1, 2, 3]".
613 if (!TryConsume("]")) {
614 // "foo: []" is treated as empty.
615 while (true) {
616 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
617 // Perform special handling for embedded message types.
618 DO(ConsumeFieldMessage(message, reflection, field));
619 } else {
620 DO(ConsumeFieldValue(message, reflection, field));
621 }
622 if (TryConsume("]")) {
623 break;
624 }
625 DO(Consume(","));
626 }
627 }
628 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
629 DO(ConsumeFieldMessage(message, reflection, field));
630 } else {
631 DO(ConsumeFieldValue(message, reflection, field));
632 }
633 label_skip_parsing:
634 // For historical reasons, fields may optionally be separated by commas or
635 // semicolons.
636 TryConsume(";") || TryConsume(",");
637
638 if (field->options().deprecated()) {
639 ReportWarning("text format contains deprecated field \"" + field_name +
640 "\"");
641 }
642
643 // If a parse info tree exists, add the location for the parsed
644 // field.
645 if (parse_info_tree_ != nullptr) {
646 int end_line = tokenizer_.previous().line;
647 int end_column = tokenizer_.previous().end_column;
648
649 RecordLocation(parse_info_tree_, field,
650 ParseLocationRange(ParseLocation(start_line, start_column),
651 ParseLocation(end_line, end_column)));
652 }
653
654 return true;
655 }
656
657 // Skips the next field including the field's name and value.
SkipField()658 bool SkipField() {
659 std::string field_name;
660 if (TryConsume("[")) {
661 // Extension name or type URL.
662 DO(ConsumeTypeUrlOrFullTypeName(&field_name));
663 DO(ConsumeBeforeWhitespace("]"));
664 } else {
665 DO(ConsumeIdentifierBeforeWhitespace(&field_name));
666 }
667 TryConsumeWhitespace();
668
669 // Try to guess the type of this field.
670 // If this field is not a message, there should be a ":" between the
671 // field name and the field value and also the field value should not
672 // start with "{" or "<" which indicates the beginning of a message body.
673 // If there is no ":" or there is a "{" or "<" after ":", this field has
674 // to be a message or the input is ill-formed.
675 if (TryConsumeBeforeWhitespace(":")) {
676 TryConsumeWhitespace();
677 if (!LookingAt("{") && !LookingAt("<")) {
678 DO(SkipFieldValue());
679 } else {
680 DO(SkipFieldMessage());
681 }
682 } else {
683 DO(SkipFieldMessage());
684 }
685 // For historical reasons, fields may optionally be separated by commas or
686 // semicolons.
687 TryConsume(";") || TryConsume(",");
688 return true;
689 }
690
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)691 bool ConsumeFieldMessage(Message* message, const Reflection* reflection,
692 const FieldDescriptor* field) {
693 if (--recursion_limit_ < 0) {
694 ReportError(
695 StrCat("Message is too deep, the parser exceeded the "
696 "configured recursion limit of ",
697 initial_recursion_limit_, "."));
698 return false;
699 }
700 // If the parse information tree is not nullptr, create a nested one
701 // for the nested message.
702 ParseInfoTree* parent = parse_info_tree_;
703 if (parent != nullptr) {
704 parse_info_tree_ = CreateNested(parent, field);
705 }
706
707 std::string delimiter;
708 DO(ConsumeMessageDelimiter(&delimiter));
709 MessageFactory* factory =
710 finder_ ? finder_->FindExtensionFactory(field) : nullptr;
711 if (field->is_repeated()) {
712 DO(ConsumeMessage(reflection->AddMessage(message, field, factory),
713 delimiter));
714 } else {
715 DO(ConsumeMessage(reflection->MutableMessage(message, field, factory),
716 delimiter));
717 }
718
719 ++recursion_limit_;
720
721 // Reset the parse information tree.
722 parse_info_tree_ = parent;
723 return true;
724 }
725
726 // Skips the whole body of a message including the beginning delimiter and
727 // the ending delimiter.
SkipFieldMessage()728 bool SkipFieldMessage() {
729 if (--recursion_limit_ < 0) {
730 ReportError(
731 StrCat("Message is too deep, the parser exceeded the "
732 "configured recursion limit of ",
733 initial_recursion_limit_, "."));
734 return false;
735 }
736
737 std::string delimiter;
738 DO(ConsumeMessageDelimiter(&delimiter));
739 while (!LookingAt(">") && !LookingAt("}")) {
740 DO(SkipField());
741 }
742 DO(Consume(delimiter));
743
744 ++recursion_limit_;
745 return true;
746 }
747
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)748 bool ConsumeFieldValue(Message* message, const Reflection* reflection,
749 const FieldDescriptor* field) {
750 // Define an easy to use macro for setting fields. This macro checks
751 // to see if the field is repeated (in which case we need to use the Add
752 // methods or not (in which case we need to use the Set methods).
753 #define SET_FIELD(CPPTYPE, VALUE) \
754 if (field->is_repeated()) { \
755 reflection->Add##CPPTYPE(message, field, VALUE); \
756 } else { \
757 reflection->Set##CPPTYPE(message, field, VALUE); \
758 }
759
760 switch (field->cpp_type()) {
761 case FieldDescriptor::CPPTYPE_INT32: {
762 int64_t value;
763 DO(ConsumeSignedInteger(&value, kint32max));
764 SET_FIELD(Int32, static_cast<int32_t>(value));
765 break;
766 }
767
768 case FieldDescriptor::CPPTYPE_UINT32: {
769 uint64_t value;
770 DO(ConsumeUnsignedInteger(&value, kuint32max));
771 SET_FIELD(UInt32, static_cast<uint32_t>(value));
772 break;
773 }
774
775 case FieldDescriptor::CPPTYPE_INT64: {
776 int64_t value;
777 DO(ConsumeSignedInteger(&value, kint64max));
778 SET_FIELD(Int64, value);
779 break;
780 }
781
782 case FieldDescriptor::CPPTYPE_UINT64: {
783 uint64_t value;
784 DO(ConsumeUnsignedInteger(&value, kuint64max));
785 SET_FIELD(UInt64, value);
786 break;
787 }
788
789 case FieldDescriptor::CPPTYPE_FLOAT: {
790 double value;
791 DO(ConsumeDouble(&value));
792 SET_FIELD(Float, io::SafeDoubleToFloat(value));
793 break;
794 }
795
796 case FieldDescriptor::CPPTYPE_DOUBLE: {
797 double value;
798 DO(ConsumeDouble(&value));
799 SET_FIELD(Double, value);
800 break;
801 }
802
803 case FieldDescriptor::CPPTYPE_STRING: {
804 std::string value;
805 DO(ConsumeString(&value));
806 SET_FIELD(String, value);
807 break;
808 }
809
810 case FieldDescriptor::CPPTYPE_BOOL: {
811 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
812 uint64_t value;
813 DO(ConsumeUnsignedInteger(&value, 1));
814 SET_FIELD(Bool, value);
815 } else {
816 std::string value;
817 DO(ConsumeIdentifier(&value));
818 if (value == "true" || value == "True" || value == "t") {
819 SET_FIELD(Bool, true);
820 } else if (value == "false" || value == "False" || value == "f") {
821 SET_FIELD(Bool, false);
822 } else {
823 ReportError("Invalid value for boolean field \"" + field->name() +
824 "\". Value: \"" + value + "\".");
825 return false;
826 }
827 }
828 break;
829 }
830
831 case FieldDescriptor::CPPTYPE_ENUM: {
832 std::string value;
833 int64_t int_value = kint64max;
834 const EnumDescriptor* enum_type = field->enum_type();
835 const EnumValueDescriptor* enum_value = nullptr;
836
837 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
838 DO(ConsumeIdentifier(&value));
839 // Find the enumeration value.
840 enum_value = enum_type->FindValueByName(value);
841
842 } else if (LookingAt("-") ||
843 LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
844 DO(ConsumeSignedInteger(&int_value, kint32max));
845 value = StrCat(int_value); // for error reporting
846 enum_value = enum_type->FindValueByNumber(int_value);
847 } else {
848 ReportError("Expected integer or identifier, got: " +
849 tokenizer_.current().text);
850 return false;
851 }
852
853 if (enum_value == nullptr) {
854 if (int_value != kint64max &&
855 reflection->SupportsUnknownEnumValues()) {
856 SET_FIELD(EnumValue, int_value);
857 return true;
858 } else if (!allow_unknown_enum_) {
859 ReportError("Unknown enumeration value of \"" + value +
860 "\" for "
861 "field \"" +
862 field->name() + "\".");
863 return false;
864 } else {
865 ReportWarning("Unknown enumeration value of \"" + value +
866 "\" for "
867 "field \"" +
868 field->name() + "\".");
869 return true;
870 }
871 }
872
873 SET_FIELD(Enum, enum_value);
874 break;
875 }
876
877 case FieldDescriptor::CPPTYPE_MESSAGE: {
878 // We should never get here. Put here instead of a default
879 // so that if new types are added, we get a nice compiler warning.
880 GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
881 break;
882 }
883 }
884 #undef SET_FIELD
885 return true;
886 }
887
SkipFieldValue()888 bool SkipFieldValue() {
889 if (--recursion_limit_ < 0) {
890 ReportError(
891 StrCat("Message is too deep, the parser exceeded the "
892 "configured recursion limit of ",
893 initial_recursion_limit_, "."));
894 return false;
895 }
896
897 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
898 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
899 tokenizer_.Next();
900 }
901 ++recursion_limit_;
902 return true;
903 }
904 if (TryConsume("[")) {
905 while (true) {
906 if (!LookingAt("{") && !LookingAt("<")) {
907 DO(SkipFieldValue());
908 } else {
909 DO(SkipFieldMessage());
910 }
911 if (TryConsume("]")) {
912 break;
913 }
914 DO(Consume(","));
915 }
916 ++recursion_limit_;
917 return true;
918 }
919 // Possible field values other than string:
920 // 12345 => TYPE_INTEGER
921 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
922 // 1.2345 => TYPE_FLOAT
923 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
924 // inf => TYPE_IDENTIFIER
925 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
926 // TYPE_INTEGER => TYPE_IDENTIFIER
927 // Divides them into two group, one with TYPE_SYMBOL
928 // and the other without:
929 // Group one:
930 // 12345 => TYPE_INTEGER
931 // 1.2345 => TYPE_FLOAT
932 // inf => TYPE_IDENTIFIER
933 // TYPE_INTEGER => TYPE_IDENTIFIER
934 // Group two:
935 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
936 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
937 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
938 // As we can see, the field value consists of an optional '-' and one of
939 // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
940 bool has_minus = TryConsume("-");
941 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
942 !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
943 !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
944 std::string text = tokenizer_.current().text;
945 ReportError("Cannot skip field value, unexpected token: " + text);
946 ++recursion_limit_;
947 return false;
948 }
949 // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
950 // value while other combinations all generate valid values.
951 // We check if the value of this combination is valid here.
952 // TYPE_IDENTIFIER after a '-' should be one of the float values listed
953 // below:
954 // inf, inff, infinity, nan
955 if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
956 std::string text = tokenizer_.current().text;
957 LowerString(&text);
958 if (text != "inf" &&
959 text != "infinity" && text != "nan") {
960 ReportError("Invalid float number: " + text);
961 ++recursion_limit_;
962 return false;
963 }
964 }
965 tokenizer_.Next();
966 ++recursion_limit_;
967 return true;
968 }
969
970 // Returns true if the current token's text is equal to that specified.
LookingAt(const std::string & text)971 bool LookingAt(const std::string& text) {
972 return tokenizer_.current().text == text;
973 }
974
975 // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)976 bool LookingAtType(io::Tokenizer::TokenType token_type) {
977 return tokenizer_.current().type == token_type;
978 }
979
980 // Consumes an identifier and saves its value in the identifier parameter.
981 // Returns false if the token is not of type IDENTIFIER.
ConsumeIdentifier(std::string * identifier)982 bool ConsumeIdentifier(std::string* identifier) {
983 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
984 *identifier = tokenizer_.current().text;
985 tokenizer_.Next();
986 return true;
987 }
988
989 // If allow_field_numer_ or allow_unknown_field_ is true, we should able
990 // to parse integer identifiers.
991 if ((allow_field_number_ || allow_unknown_field_ ||
992 allow_unknown_extension_) &&
993 LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
994 *identifier = tokenizer_.current().text;
995 tokenizer_.Next();
996 return true;
997 }
998
999 ReportError("Expected identifier, got: " + tokenizer_.current().text);
1000 return false;
1001 }
1002
1003 // Similar to `ConsumeIdentifier`, but any following whitespace token may
1004 // be reported.
ConsumeIdentifierBeforeWhitespace(std::string * identifier)1005 bool ConsumeIdentifierBeforeWhitespace(std::string* identifier) {
1006 tokenizer_.set_report_whitespace(true);
1007 bool result = ConsumeIdentifier(identifier);
1008 tokenizer_.set_report_whitespace(false);
1009 return result;
1010 }
1011
1012 // Consume a string of form "<id1>.<id2>....<idN>".
ConsumeFullTypeName(std::string * name)1013 bool ConsumeFullTypeName(std::string* name) {
1014 DO(ConsumeIdentifier(name));
1015 while (TryConsume(".")) {
1016 std::string part;
1017 DO(ConsumeIdentifier(&part));
1018 *name += ".";
1019 *name += part;
1020 }
1021 return true;
1022 }
1023
ConsumeTypeUrlOrFullTypeName(std::string * name)1024 bool ConsumeTypeUrlOrFullTypeName(std::string* name) {
1025 DO(ConsumeIdentifier(name));
1026 while (true) {
1027 std::string connector;
1028 if (TryConsume(".")) {
1029 connector = ".";
1030 } else if (TryConsume("/")) {
1031 connector = "/";
1032 } else {
1033 break;
1034 }
1035 std::string part;
1036 DO(ConsumeIdentifier(&part));
1037 *name += connector;
1038 *name += part;
1039 }
1040 return true;
1041 }
1042
1043 // Consumes a string and saves its value in the text parameter.
1044 // Returns false if the token is not of type STRING.
ConsumeString(std::string * text)1045 bool ConsumeString(std::string* text) {
1046 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
1047 ReportError("Expected string, got: " + tokenizer_.current().text);
1048 return false;
1049 }
1050
1051 text->clear();
1052 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
1053 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
1054
1055 tokenizer_.Next();
1056 }
1057
1058 return true;
1059 }
1060
1061 // Consumes a uint64_t and saves its value in the value parameter.
1062 // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64_t * value,uint64_t max_value)1063 bool ConsumeUnsignedInteger(uint64_t* value, uint64_t max_value) {
1064 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1065 ReportError("Expected integer, got: " + tokenizer_.current().text);
1066 return false;
1067 }
1068
1069 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text, max_value,
1070 value)) {
1071 ReportError("Integer out of range (" + tokenizer_.current().text + ")");
1072 return false;
1073 }
1074
1075 tokenizer_.Next();
1076 return true;
1077 }
1078
1079 // Consumes an int64_t and saves its value in the value parameter.
1080 // Note that since the tokenizer does not support negative numbers,
1081 // we actually may consume an additional token (for the minus sign) in this
1082 // method. Returns false if the token is not an integer
1083 // (signed or otherwise).
ConsumeSignedInteger(int64_t * value,uint64_t max_value)1084 bool ConsumeSignedInteger(int64_t* value, uint64_t max_value) {
1085 bool negative = false;
1086
1087 if (TryConsume("-")) {
1088 negative = true;
1089 // Two's complement always allows one more negative integer than
1090 // positive.
1091 ++max_value;
1092 }
1093
1094 uint64_t unsigned_value;
1095
1096 DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
1097
1098 if (negative) {
1099 if ((static_cast<uint64_t>(kint64max) + 1) == unsigned_value) {
1100 *value = kint64min;
1101 } else {
1102 *value = -static_cast<int64_t>(unsigned_value);
1103 }
1104 } else {
1105 *value = static_cast<int64_t>(unsigned_value);
1106 }
1107
1108 return true;
1109 }
1110
1111 // Consumes a double and saves its value in the value parameter.
1112 // Accepts decimal numbers only, rejects hex or oct numbers.
ConsumeUnsignedDecimalAsDouble(double * value,uint64_t max_value)1113 bool ConsumeUnsignedDecimalAsDouble(double* value, uint64_t max_value) {
1114 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1115 ReportError("Expected integer, got: " + tokenizer_.current().text);
1116 return false;
1117 }
1118
1119 const std::string& text = tokenizer_.current().text;
1120 if (IsHexNumber(text) || IsOctNumber(text)) {
1121 ReportError("Expect a decimal number, got: " + text);
1122 return false;
1123 }
1124
1125 uint64_t uint64_value;
1126 if (io::Tokenizer::ParseInteger(text, max_value, &uint64_value)) {
1127 *value = static_cast<double>(uint64_value);
1128 } else {
1129 // Uint64 overflow, attempt to parse as a double instead.
1130 *value = io::Tokenizer::ParseFloat(text);
1131 }
1132
1133 tokenizer_.Next();
1134 return true;
1135 }
1136
1137 // Consumes a double and saves its value in the value parameter.
1138 // Note that since the tokenizer does not support negative numbers,
1139 // we actually may consume an additional token (for the minus sign) in this
1140 // method. Returns false if the token is not a double
1141 // (signed or otherwise).
ConsumeDouble(double * value)1142 bool ConsumeDouble(double* value) {
1143 bool negative = false;
1144
1145 if (TryConsume("-")) {
1146 negative = true;
1147 }
1148
1149 // A double can actually be an integer, according to the tokenizer.
1150 // Therefore, we must check both cases here.
1151 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
1152 // We have found an integer value for the double.
1153 DO(ConsumeUnsignedDecimalAsDouble(value, kuint64max));
1154 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
1155 // We have found a float value for the double.
1156 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
1157
1158 // Mark the current token as consumed.
1159 tokenizer_.Next();
1160 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
1161 std::string text = tokenizer_.current().text;
1162 LowerString(&text);
1163 if (text == "inf" ||
1164 text == "infinity") {
1165 *value = std::numeric_limits<double>::infinity();
1166 tokenizer_.Next();
1167 } else if (text == "nan") {
1168 *value = std::numeric_limits<double>::quiet_NaN();
1169 tokenizer_.Next();
1170 } else {
1171 ReportError("Expected double, got: " + text);
1172 return false;
1173 }
1174 } else {
1175 ReportError("Expected double, got: " + tokenizer_.current().text);
1176 return false;
1177 }
1178
1179 if (negative) {
1180 *value = -*value;
1181 }
1182
1183 return true;
1184 }
1185
1186 // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name"
1187 // or "type.googleprod.com/full.type.Name"
ConsumeAnyTypeUrl(std::string * full_type_name,std::string * prefix)1188 bool ConsumeAnyTypeUrl(std::string* full_type_name, std::string* prefix) {
1189 // TODO(saito) Extend Consume() to consume multiple tokens at once, so that
1190 // this code can be written as just DO(Consume(kGoogleApisTypePrefix)).
1191 DO(ConsumeIdentifier(prefix));
1192 while (TryConsume(".")) {
1193 std::string url;
1194 DO(ConsumeIdentifier(&url));
1195 *prefix += "." + url;
1196 }
1197 DO(Consume("/"));
1198 *prefix += "/";
1199 DO(ConsumeFullTypeName(full_type_name));
1200
1201 return true;
1202 }
1203
1204 // A helper function for reconstructing Any::value. Consumes a text of
1205 // full_type_name, then serializes it into serialized_value.
ConsumeAnyValue(const Descriptor * value_descriptor,std::string * serialized_value)1206 bool ConsumeAnyValue(const Descriptor* value_descriptor,
1207 std::string* serialized_value) {
1208 DynamicMessageFactory factory;
1209 const Message* value_prototype = factory.GetPrototype(value_descriptor);
1210 if (value_prototype == nullptr) {
1211 return false;
1212 }
1213 std::unique_ptr<Message> value(value_prototype->New());
1214 std::string sub_delimiter;
1215 DO(ConsumeMessageDelimiter(&sub_delimiter));
1216 DO(ConsumeMessage(value.get(), sub_delimiter));
1217
1218 if (allow_partial_) {
1219 value->AppendPartialToString(serialized_value);
1220 } else {
1221 if (!value->IsInitialized()) {
1222 ReportError(
1223 "Value of type \"" + value_descriptor->full_name() +
1224 "\" stored in google.protobuf.Any has missing required fields");
1225 return false;
1226 }
1227 value->AppendToString(serialized_value);
1228 }
1229 return true;
1230 }
1231
1232 // Consumes a token and confirms that it matches that specified in the
1233 // value parameter. Returns false if the token found does not match that
1234 // which was specified.
Consume(const std::string & value)1235 bool Consume(const std::string& value) {
1236 const std::string& current_value = tokenizer_.current().text;
1237
1238 if (current_value != value) {
1239 ReportError("Expected \"" + value + "\", found \"" + current_value +
1240 "\".");
1241 return false;
1242 }
1243
1244 tokenizer_.Next();
1245
1246 return true;
1247 }
1248
1249 // Similar to `Consume`, but the following token may be tokenized as
1250 // TYPE_WHITESPACE.
ConsumeBeforeWhitespace(const std::string & value)1251 bool ConsumeBeforeWhitespace(const std::string& value) {
1252 // Report whitespace after this token, but only once.
1253 tokenizer_.set_report_whitespace(true);
1254 bool result = Consume(value);
1255 tokenizer_.set_report_whitespace(false);
1256 return result;
1257 }
1258
1259 // Attempts to consume the supplied value. Returns false if a the
1260 // token found does not match the value specified.
TryConsume(const std::string & value)1261 bool TryConsume(const std::string& value) {
1262 if (tokenizer_.current().text == value) {
1263 tokenizer_.Next();
1264 return true;
1265 } else {
1266 return false;
1267 }
1268 }
1269
1270 // Similar to `TryConsume`, but the following token may be tokenized as
1271 // TYPE_WHITESPACE.
TryConsumeBeforeWhitespace(const std::string & value)1272 bool TryConsumeBeforeWhitespace(const std::string& value) {
1273 // Report whitespace after this token, but only once.
1274 tokenizer_.set_report_whitespace(true);
1275 bool result = TryConsume(value);
1276 tokenizer_.set_report_whitespace(false);
1277 return result;
1278 }
1279
TryConsumeWhitespace()1280 bool TryConsumeWhitespace() {
1281 had_silent_marker_ = false;
1282 if (LookingAtType(io::Tokenizer::TYPE_WHITESPACE)) {
1283 if (tokenizer_.current().text == " " DEBUG_STRING_SILENT_MARKER) {
1284 had_silent_marker_ = true;
1285 }
1286 tokenizer_.Next();
1287 return true;
1288 }
1289 return false;
1290 }
1291
1292 // An internal instance of the Tokenizer's error collector, used to
1293 // collect any base-level parse errors and feed them to the ParserImpl.
1294 class ParserErrorCollector : public io::ErrorCollector {
1295 public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)1296 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser)
1297 : parser_(parser) {}
1298
~ParserErrorCollector()1299 ~ParserErrorCollector() override {}
1300
AddError(int line,int column,const std::string & message)1301 void AddError(int line, int column, const std::string& message) override {
1302 parser_->ReportError(line, column, message);
1303 }
1304
AddWarning(int line,int column,const std::string & message)1305 void AddWarning(int line, int column, const std::string& message) override {
1306 parser_->ReportWarning(line, column, message);
1307 }
1308
1309 private:
1310 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
1311 TextFormat::Parser::ParserImpl* parser_;
1312 };
1313
1314 io::ErrorCollector* error_collector_;
1315 const TextFormat::Finder* finder_;
1316 ParseInfoTree* parse_info_tree_;
1317 ParserErrorCollector tokenizer_error_collector_;
1318 io::Tokenizer tokenizer_;
1319 const Descriptor* root_message_type_;
1320 SingularOverwritePolicy singular_overwrite_policy_;
1321 const bool allow_case_insensitive_field_;
1322 const bool allow_unknown_field_;
1323 const bool allow_unknown_extension_;
1324 const bool allow_unknown_enum_;
1325 const bool allow_field_number_;
1326 const bool allow_partial_;
1327 const int initial_recursion_limit_;
1328 int recursion_limit_;
1329 bool had_silent_marker_;
1330 bool had_errors_;
1331 };
1332
1333 // ===========================================================================
1334 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1335 // from the Printer found in //net/proto2/io/public/printer.h
1336 class TextFormat::Printer::TextGenerator
1337 : public TextFormat::BaseTextGenerator {
1338 public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)1339 explicit TextGenerator(io::ZeroCopyOutputStream* output,
1340 int initial_indent_level)
1341 : output_(output),
1342 buffer_(nullptr),
1343 buffer_size_(0),
1344 at_start_of_line_(true),
1345 failed_(false),
1346 insert_silent_marker_(false),
1347 indent_level_(initial_indent_level),
1348 initial_indent_level_(initial_indent_level) {}
1349
TextGenerator(io::ZeroCopyOutputStream * output,bool insert_silent_marker,int initial_indent_level)1350 explicit TextGenerator(io::ZeroCopyOutputStream* output,
1351 bool insert_silent_marker, int initial_indent_level)
1352 : output_(output),
1353 buffer_(nullptr),
1354 buffer_size_(0),
1355 at_start_of_line_(true),
1356 failed_(false),
1357 insert_silent_marker_(insert_silent_marker),
1358 indent_level_(initial_indent_level),
1359 initial_indent_level_(initial_indent_level) {}
1360
~TextGenerator()1361 ~TextGenerator() override {
1362 // Only BackUp() if we're sure we've successfully called Next() at least
1363 // once.
1364 if (!failed_) {
1365 output_->BackUp(buffer_size_);
1366 }
1367 }
1368
1369 // Indent text by two spaces. After calling Indent(), two spaces will be
1370 // inserted at the beginning of each line of text. Indent() may be called
1371 // multiple times to produce deeper indents.
Indent()1372 void Indent() override { ++indent_level_; }
1373
1374 // Reduces the current indent level by two spaces, or crashes if the indent
1375 // level is zero.
Outdent()1376 void Outdent() override {
1377 if (indent_level_ == 0 || indent_level_ < initial_indent_level_) {
1378 GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
1379 return;
1380 }
1381
1382 --indent_level_;
1383 }
1384
GetCurrentIndentationSize() const1385 size_t GetCurrentIndentationSize() const override {
1386 return 2 * indent_level_;
1387 }
1388
1389 // Print text to the output stream.
Print(const char * text,size_t size)1390 void Print(const char* text, size_t size) override {
1391 if (indent_level_ > 0) {
1392 size_t pos = 0; // The number of bytes we've written so far.
1393 for (size_t i = 0; i < size; i++) {
1394 if (text[i] == '\n') {
1395 // Saw newline. If there is more text, we may need to insert an
1396 // indent here. So, write what we have so far, including the '\n'.
1397 Write(text + pos, i - pos + 1);
1398 pos = i + 1;
1399
1400 // Setting this true will cause the next Write() to insert an indent
1401 // first.
1402 at_start_of_line_ = true;
1403 }
1404 }
1405 // Write the rest.
1406 Write(text + pos, size - pos);
1407 } else {
1408 Write(text, size);
1409 if (size > 0 && text[size - 1] == '\n') {
1410 at_start_of_line_ = true;
1411 }
1412 }
1413 }
1414
1415 // True if any write to the underlying stream failed. (We don't just
1416 // crash in this case because this is an I/O failure, not a programming
1417 // error.)
failed() const1418 bool failed() const { return failed_; }
1419
PrintMaybeWithMarker(StringPiece text)1420 void PrintMaybeWithMarker(StringPiece text) {
1421 Print(text.data(), text.size());
1422 if (ConsumeInsertSilentMarker()) {
1423 PrintLiteral(DEBUG_STRING_SILENT_MARKER);
1424 }
1425 }
1426
PrintMaybeWithMarker(StringPiece text_head,StringPiece text_tail)1427 void PrintMaybeWithMarker(StringPiece text_head,
1428 StringPiece text_tail) {
1429 Print(text_head.data(), text_head.size());
1430 if (ConsumeInsertSilentMarker()) {
1431 PrintLiteral(DEBUG_STRING_SILENT_MARKER);
1432 }
1433 Print(text_tail.data(), text_tail.size());
1434 }
1435
1436 private:
1437 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
1438
Write(const char * data,size_t size)1439 void Write(const char* data, size_t size) {
1440 if (failed_) return;
1441 if (size == 0) return;
1442
1443 if (at_start_of_line_) {
1444 // Insert an indent.
1445 at_start_of_line_ = false;
1446 WriteIndent();
1447 if (failed_) return;
1448 }
1449
1450 while (static_cast<int64_t>(size) > buffer_size_) {
1451 // Data exceeds space in the buffer. Copy what we can and request a
1452 // new buffer.
1453 if (buffer_size_ > 0) {
1454 memcpy(buffer_, data, buffer_size_);
1455 data += buffer_size_;
1456 size -= buffer_size_;
1457 }
1458 void* void_buffer = nullptr;
1459 failed_ = !output_->Next(&void_buffer, &buffer_size_);
1460 if (failed_) return;
1461 buffer_ = reinterpret_cast<char*>(void_buffer);
1462 }
1463
1464 // Buffer is big enough to receive the data; copy it.
1465 memcpy(buffer_, data, size);
1466 buffer_ += size;
1467 buffer_size_ -= size;
1468 }
1469
WriteIndent()1470 void WriteIndent() {
1471 if (indent_level_ == 0) {
1472 return;
1473 }
1474 GOOGLE_DCHECK(!failed_);
1475 int size = GetCurrentIndentationSize();
1476
1477 while (size > buffer_size_) {
1478 // Data exceeds space in the buffer. Write what we can and request a new
1479 // buffer.
1480 if (buffer_size_ > 0) {
1481 memset(buffer_, ' ', buffer_size_);
1482 }
1483 size -= buffer_size_;
1484 void* void_buffer;
1485 failed_ = !output_->Next(&void_buffer, &buffer_size_);
1486 if (failed_) return;
1487 buffer_ = reinterpret_cast<char*>(void_buffer);
1488 }
1489
1490 // Buffer is big enough to receive the data; copy it.
1491 memset(buffer_, ' ', size);
1492 buffer_ += size;
1493 buffer_size_ -= size;
1494 }
1495
1496 // Return the current value of insert_silent_marker_. If it is true, set it
1497 // to false as we assume that a silent marker is inserted after a call to this
1498 // function.
ConsumeInsertSilentMarker()1499 bool ConsumeInsertSilentMarker() {
1500 if (insert_silent_marker_) {
1501 insert_silent_marker_ = false;
1502 return true;
1503 }
1504 return false;
1505 }
1506
1507 io::ZeroCopyOutputStream* const output_;
1508 char* buffer_;
1509 int buffer_size_;
1510 bool at_start_of_line_;
1511 bool failed_;
1512 // This flag is false when inserting silent marker is disabled or a silent
1513 // marker has been inserted.
1514 bool insert_silent_marker_;
1515
1516 int indent_level_;
1517 int initial_indent_level_;
1518 };
1519
1520 // ===========================================================================
1521 // An internal field value printer that may insert a silent marker in
1522 // DebugStrings.
1523 class TextFormat::Printer::DebugStringFieldValuePrinter
1524 : public TextFormat::FastFieldValuePrinter {
1525 public:
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1526 void PrintMessageStart(const Message& /*message*/, int /*field_index*/,
1527 int /*field_count*/, bool single_line_mode,
1528 BaseTextGenerator* generator) const override {
1529 // This is safe as only TextGenerator is used with
1530 // DebugStringFieldValuePrinter.
1531 TextGenerator* text_generator = static_cast<TextGenerator*>(generator);
1532 if (single_line_mode) {
1533 text_generator->PrintMaybeWithMarker(" ", "{ ");
1534 } else {
1535 text_generator->PrintMaybeWithMarker(" ", "{\n");
1536 }
1537 }
1538 };
1539
1540 // ===========================================================================
1541 // An internal field value printer that escape UTF8 strings.
1542 class TextFormat::Printer::FastFieldValuePrinterUtf8Escaping
1543 : public TextFormat::Printer::DebugStringFieldValuePrinter {
1544 public:
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const1545 void PrintString(const std::string& val,
1546 TextFormat::BaseTextGenerator* generator) const override {
1547 generator->PrintLiteral("\"");
1548 generator->PrintString(strings::Utf8SafeCEscape(val));
1549 generator->PrintLiteral("\"");
1550 }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const1551 void PrintBytes(const std::string& val,
1552 TextFormat::BaseTextGenerator* generator) const override {
1553 return FastFieldValuePrinter::PrintString(val, generator);
1554 }
1555 };
1556
1557 // ===========================================================================
1558 // Implementation of the default Finder for extensions.
~Finder()1559 TextFormat::Finder::~Finder() {}
1560
FindExtension(Message * message,const std::string & name) const1561 const FieldDescriptor* TextFormat::Finder::FindExtension(
1562 Message* message, const std::string& name) const {
1563 return DefaultFinderFindExtension(message, name);
1564 }
1565
FindExtensionByNumber(const Descriptor * descriptor,int number) const1566 const FieldDescriptor* TextFormat::Finder::FindExtensionByNumber(
1567 const Descriptor* descriptor, int number) const {
1568 return DefaultFinderFindExtensionByNumber(descriptor, number);
1569 }
1570
FindAnyType(const Message & message,const std::string & prefix,const std::string & name) const1571 const Descriptor* TextFormat::Finder::FindAnyType(
1572 const Message& message, const std::string& prefix,
1573 const std::string& name) const {
1574 return DefaultFinderFindAnyType(message, prefix, name);
1575 }
1576
FindExtensionFactory(const FieldDescriptor *) const1577 MessageFactory* TextFormat::Finder::FindExtensionFactory(
1578 const FieldDescriptor* /*field*/) const {
1579 return nullptr;
1580 }
1581
1582 // ===========================================================================
1583
Parser()1584 TextFormat::Parser::Parser()
1585 : error_collector_(nullptr),
1586 finder_(nullptr),
1587 parse_info_tree_(nullptr),
1588 allow_partial_(false),
1589 allow_case_insensitive_field_(false),
1590 allow_unknown_field_(false),
1591 allow_unknown_extension_(false),
1592 allow_unknown_enum_(false),
1593 allow_field_number_(false),
1594 allow_relaxed_whitespace_(false),
1595 allow_singular_overwrites_(false),
1596 recursion_limit_(std::numeric_limits<int>::max()) {}
1597
~Parser()1598 TextFormat::Parser::~Parser() {}
1599
1600 namespace {
1601
CheckParseInputSize(StringPiece input,io::ErrorCollector * error_collector)1602 bool CheckParseInputSize(StringPiece input,
1603 io::ErrorCollector* error_collector) {
1604 if (input.size() > INT_MAX) {
1605 error_collector->AddError(
1606 -1, 0,
1607 StrCat(
1608 "Input size too large: ", static_cast<int64_t>(input.size()),
1609 " bytes", " > ", INT_MAX, " bytes."));
1610 return false;
1611 }
1612 return true;
1613 }
1614
1615 } // namespace
1616
Parse(io::ZeroCopyInputStream * input,Message * output)1617 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1618 Message* output) {
1619 output->Clear();
1620
1621 ParserImpl::SingularOverwritePolicy overwrites_policy =
1622 allow_singular_overwrites_ ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1623 : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1624
1625 ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1626 parse_info_tree_, overwrites_policy,
1627 allow_case_insensitive_field_, allow_unknown_field_,
1628 allow_unknown_extension_, allow_unknown_enum_,
1629 allow_field_number_, allow_relaxed_whitespace_,
1630 allow_partial_, recursion_limit_);
1631 return MergeUsingImpl(input, output, &parser);
1632 }
1633
ParseFromString(ConstStringParam input,Message * output)1634 bool TextFormat::Parser::ParseFromString(ConstStringParam input,
1635 Message* output) {
1636 DO(CheckParseInputSize(input, error_collector_));
1637 io::ArrayInputStream input_stream(input.data(), input.size());
1638 return Parse(&input_stream, output);
1639 }
1640
Merge(io::ZeroCopyInputStream * input,Message * output)1641 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1642 Message* output) {
1643 ParserImpl parser(output->GetDescriptor(), input, error_collector_, finder_,
1644 parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1645 allow_case_insensitive_field_, allow_unknown_field_,
1646 allow_unknown_extension_, allow_unknown_enum_,
1647 allow_field_number_, allow_relaxed_whitespace_,
1648 allow_partial_, recursion_limit_);
1649 return MergeUsingImpl(input, output, &parser);
1650 }
1651
MergeFromString(ConstStringParam input,Message * output)1652 bool TextFormat::Parser::MergeFromString(ConstStringParam input,
1653 Message* output) {
1654 DO(CheckParseInputSize(input, error_collector_));
1655 io::ArrayInputStream input_stream(input.data(), input.size());
1656 return Merge(&input_stream, output);
1657 }
1658
MergeUsingImpl(io::ZeroCopyInputStream *,Message * output,ParserImpl * parser_impl)1659 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1660 Message* output,
1661 ParserImpl* parser_impl) {
1662 if (!parser_impl->Parse(output)) return false;
1663 if (!allow_partial_ && !output->IsInitialized()) {
1664 std::vector<std::string> missing_fields;
1665 output->FindInitializationErrors(&missing_fields);
1666 parser_impl->ReportError(-1, 0,
1667 "Message missing required fields: " +
1668 Join(missing_fields, ", "));
1669 return false;
1670 }
1671 return true;
1672 }
1673
ParseFieldValueFromString(const std::string & input,const FieldDescriptor * field,Message * output)1674 bool TextFormat::Parser::ParseFieldValueFromString(const std::string& input,
1675 const FieldDescriptor* field,
1676 Message* output) {
1677 io::ArrayInputStream input_stream(input.data(), input.size());
1678 ParserImpl parser(
1679 output->GetDescriptor(), &input_stream, error_collector_, finder_,
1680 parse_info_tree_, ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1681 allow_case_insensitive_field_, allow_unknown_field_,
1682 allow_unknown_extension_, allow_unknown_enum_, allow_field_number_,
1683 allow_relaxed_whitespace_, allow_partial_, recursion_limit_);
1684 return parser.ParseField(field, output);
1685 }
1686
Parse(io::ZeroCopyInputStream * input,Message * output)1687 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1688 Message* output) {
1689 return Parser().Parse(input, output);
1690 }
1691
Merge(io::ZeroCopyInputStream * input,Message * output)1692 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1693 Message* output) {
1694 return Parser().Merge(input, output);
1695 }
1696
ParseFromString(ConstStringParam input,Message * output)1697 /* static */ bool TextFormat::ParseFromString(ConstStringParam input,
1698 Message* output) {
1699 return Parser().ParseFromString(input, output);
1700 }
1701
MergeFromString(ConstStringParam input,Message * output)1702 /* static */ bool TextFormat::MergeFromString(ConstStringParam input,
1703 Message* output) {
1704 return Parser().MergeFromString(input, output);
1705 }
1706
1707 #undef DO
1708
1709 // ===========================================================================
1710
~BaseTextGenerator()1711 TextFormat::BaseTextGenerator::~BaseTextGenerator() {}
1712
1713 namespace {
1714
1715 // A BaseTextGenerator that writes to a string.
1716 class StringBaseTextGenerator : public TextFormat::BaseTextGenerator {
1717 public:
Print(const char * text,size_t size)1718 void Print(const char* text, size_t size) override {
1719 output_.append(text, size);
1720 }
1721
1722 // Some compilers do not support ref-qualifiers even in C++11 mode.
1723 // Disable the optimization for now and revisit it later.
1724 #if 0 // LANG_CXX11
1725 std::string Consume() && { return std::move(output_); }
1726 #else // !LANG_CXX11
Get()1727 const std::string& Get() { return output_; }
1728 #endif // LANG_CXX11
1729
1730 private:
1731 std::string output_;
1732 };
1733
1734 } // namespace
1735
1736 // The default implementation for FieldValuePrinter. We just delegate the
1737 // implementation to the default FastFieldValuePrinter to avoid duplicating the
1738 // logic.
FieldValuePrinter()1739 TextFormat::FieldValuePrinter::FieldValuePrinter() {}
~FieldValuePrinter()1740 TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
1741
1742 #if 0 // LANG_CXX11
1743 #define FORWARD_IMPL(fn, ...) \
1744 StringBaseTextGenerator generator; \
1745 delegate_.fn(__VA_ARGS__, &generator); \
1746 return std::move(generator).Consume()
1747 #else // !LANG_CXX11
1748 #define FORWARD_IMPL(fn, ...) \
1749 StringBaseTextGenerator generator; \
1750 delegate_.fn(__VA_ARGS__, &generator); \
1751 return generator.Get()
1752 #endif // LANG_CXX11
1753
PrintBool(bool val) const1754 std::string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1755 FORWARD_IMPL(PrintBool, val);
1756 }
PrintInt32(int32_t val) const1757 std::string TextFormat::FieldValuePrinter::PrintInt32(int32_t val) const {
1758 FORWARD_IMPL(PrintInt32, val);
1759 }
PrintUInt32(uint32_t val) const1760 std::string TextFormat::FieldValuePrinter::PrintUInt32(uint32_t val) const {
1761 FORWARD_IMPL(PrintUInt32, val);
1762 }
PrintInt64(int64_t val) const1763 std::string TextFormat::FieldValuePrinter::PrintInt64(int64_t val) const {
1764 FORWARD_IMPL(PrintInt64, val);
1765 }
PrintUInt64(uint64_t val) const1766 std::string TextFormat::FieldValuePrinter::PrintUInt64(uint64_t val) const {
1767 FORWARD_IMPL(PrintUInt64, val);
1768 }
PrintFloat(float val) const1769 std::string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1770 FORWARD_IMPL(PrintFloat, val);
1771 }
PrintDouble(double val) const1772 std::string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1773 FORWARD_IMPL(PrintDouble, val);
1774 }
PrintString(const std::string & val) const1775 std::string TextFormat::FieldValuePrinter::PrintString(
1776 const std::string& val) const {
1777 FORWARD_IMPL(PrintString, val);
1778 }
PrintBytes(const std::string & val) const1779 std::string TextFormat::FieldValuePrinter::PrintBytes(
1780 const std::string& val) const {
1781 return PrintString(val);
1782 }
PrintEnum(int32_t val,const std::string & name) const1783 std::string TextFormat::FieldValuePrinter::PrintEnum(
1784 int32_t val, const std::string& name) const {
1785 FORWARD_IMPL(PrintEnum, val, name);
1786 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field) const1787 std::string TextFormat::FieldValuePrinter::PrintFieldName(
1788 const Message& message, const Reflection* reflection,
1789 const FieldDescriptor* field) const {
1790 FORWARD_IMPL(PrintFieldName, message, reflection, field);
1791 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode) const1792 std::string TextFormat::FieldValuePrinter::PrintMessageStart(
1793 const Message& message, int field_index, int field_count,
1794 bool single_line_mode) const {
1795 FORWARD_IMPL(PrintMessageStart, message, field_index, field_count,
1796 single_line_mode);
1797 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode) const1798 std::string TextFormat::FieldValuePrinter::PrintMessageEnd(
1799 const Message& message, int field_index, int field_count,
1800 bool single_line_mode) const {
1801 FORWARD_IMPL(PrintMessageEnd, message, field_index, field_count,
1802 single_line_mode);
1803 }
1804 #undef FORWARD_IMPL
1805
FastFieldValuePrinter()1806 TextFormat::FastFieldValuePrinter::FastFieldValuePrinter() {}
~FastFieldValuePrinter()1807 TextFormat::FastFieldValuePrinter::~FastFieldValuePrinter() {}
PrintBool(bool val,BaseTextGenerator * generator) const1808 void TextFormat::FastFieldValuePrinter::PrintBool(
1809 bool val, BaseTextGenerator* generator) const {
1810 if (val) {
1811 generator->PrintLiteral("true");
1812 } else {
1813 generator->PrintLiteral("false");
1814 }
1815 }
PrintInt32(int32_t val,BaseTextGenerator * generator) const1816 void TextFormat::FastFieldValuePrinter::PrintInt32(
1817 int32_t val, BaseTextGenerator* generator) const {
1818 generator->PrintString(StrCat(val));
1819 }
PrintUInt32(uint32_t val,BaseTextGenerator * generator) const1820 void TextFormat::FastFieldValuePrinter::PrintUInt32(
1821 uint32_t val, BaseTextGenerator* generator) const {
1822 generator->PrintString(StrCat(val));
1823 }
PrintInt64(int64_t val,BaseTextGenerator * generator) const1824 void TextFormat::FastFieldValuePrinter::PrintInt64(
1825 int64_t val, BaseTextGenerator* generator) const {
1826 generator->PrintString(StrCat(val));
1827 }
PrintUInt64(uint64_t val,BaseTextGenerator * generator) const1828 void TextFormat::FastFieldValuePrinter::PrintUInt64(
1829 uint64_t val, BaseTextGenerator* generator) const {
1830 generator->PrintString(StrCat(val));
1831 }
PrintFloat(float val,BaseTextGenerator * generator) const1832 void TextFormat::FastFieldValuePrinter::PrintFloat(
1833 float val, BaseTextGenerator* generator) const {
1834 generator->PrintString(!std::isnan(val) ? SimpleFtoa(val) : "nan");
1835 }
PrintDouble(double val,BaseTextGenerator * generator) const1836 void TextFormat::FastFieldValuePrinter::PrintDouble(
1837 double val, BaseTextGenerator* generator) const {
1838 generator->PrintString(!std::isnan(val) ? SimpleDtoa(val) : "nan");
1839 }
PrintEnum(int32_t,const std::string & name,BaseTextGenerator * generator) const1840 void TextFormat::FastFieldValuePrinter::PrintEnum(
1841 int32_t /*val*/, const std::string& name,
1842 BaseTextGenerator* generator) const {
1843 generator->PrintString(name);
1844 }
1845
PrintString(const std::string & val,BaseTextGenerator * generator) const1846 void TextFormat::FastFieldValuePrinter::PrintString(
1847 const std::string& val, BaseTextGenerator* generator) const {
1848 generator->PrintLiteral("\"");
1849 generator->PrintString(CEscape(val));
1850 generator->PrintLiteral("\"");
1851 }
PrintBytes(const std::string & val,BaseTextGenerator * generator) const1852 void TextFormat::FastFieldValuePrinter::PrintBytes(
1853 const std::string& val, BaseTextGenerator* generator) const {
1854 PrintString(val, generator);
1855 }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,BaseTextGenerator * generator) const1856 void TextFormat::FastFieldValuePrinter::PrintFieldName(
1857 const Message& message, int /*field_index*/, int /*field_count*/,
1858 const Reflection* reflection, const FieldDescriptor* field,
1859 BaseTextGenerator* generator) const {
1860 PrintFieldName(message, reflection, field, generator);
1861 }
PrintFieldName(const Message &,const Reflection *,const FieldDescriptor * field,BaseTextGenerator * generator) const1862 void TextFormat::FastFieldValuePrinter::PrintFieldName(
1863 const Message& /*message*/, const Reflection* /*reflection*/,
1864 const FieldDescriptor* field, BaseTextGenerator* generator) const {
1865 if (field->is_extension()) {
1866 generator->PrintLiteral("[");
1867 generator->PrintString(field->PrintableNameForExtension());
1868 generator->PrintLiteral("]");
1869 } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
1870 // Groups must be serialized with their original capitalization.
1871 generator->PrintString(field->message_type()->name());
1872 } else {
1873 generator->PrintString(field->name());
1874 }
1875 }
PrintMessageStart(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1876 void TextFormat::FastFieldValuePrinter::PrintMessageStart(
1877 const Message& /*message*/, int /*field_index*/, int /*field_count*/,
1878 bool single_line_mode, BaseTextGenerator* generator) const {
1879 if (single_line_mode) {
1880 generator->PrintLiteral(" { ");
1881 } else {
1882 generator->PrintLiteral(" {\n");
1883 }
1884 }
PrintMessageContent(const Message &,int,int,bool,BaseTextGenerator *) const1885 bool TextFormat::FastFieldValuePrinter::PrintMessageContent(
1886 const Message& /*message*/, int /*field_index*/, int /*field_count*/,
1887 bool /*single_line_mode*/, BaseTextGenerator* /*generator*/) const {
1888 return false; // Use the default printing function.
1889 }
PrintMessageEnd(const Message &,int,int,bool single_line_mode,BaseTextGenerator * generator) const1890 void TextFormat::FastFieldValuePrinter::PrintMessageEnd(
1891 const Message& /*message*/, int /*field_index*/, int /*field_count*/,
1892 bool single_line_mode, BaseTextGenerator* generator) const {
1893 if (single_line_mode) {
1894 generator->PrintLiteral("} ");
1895 } else {
1896 generator->PrintLiteral("}\n");
1897 }
1898 }
1899
1900 namespace {
1901
1902 // A legacy compatibility wrapper. Takes ownership of the delegate.
1903 class FieldValuePrinterWrapper : public TextFormat::FastFieldValuePrinter {
1904 public:
FieldValuePrinterWrapper(const TextFormat::FieldValuePrinter * delegate)1905 explicit FieldValuePrinterWrapper(
1906 const TextFormat::FieldValuePrinter* delegate)
1907 : delegate_(delegate) {}
1908
SetDelegate(const TextFormat::FieldValuePrinter * delegate)1909 void SetDelegate(const TextFormat::FieldValuePrinter* delegate) {
1910 delegate_.reset(delegate);
1911 }
1912
PrintBool(bool val,TextFormat::BaseTextGenerator * generator) const1913 void PrintBool(bool val,
1914 TextFormat::BaseTextGenerator* generator) const override {
1915 generator->PrintString(delegate_->PrintBool(val));
1916 }
PrintInt32(int32_t val,TextFormat::BaseTextGenerator * generator) const1917 void PrintInt32(int32_t val,
1918 TextFormat::BaseTextGenerator* generator) const override {
1919 generator->PrintString(delegate_->PrintInt32(val));
1920 }
PrintUInt32(uint32_t val,TextFormat::BaseTextGenerator * generator) const1921 void PrintUInt32(uint32_t val,
1922 TextFormat::BaseTextGenerator* generator) const override {
1923 generator->PrintString(delegate_->PrintUInt32(val));
1924 }
PrintInt64(int64_t val,TextFormat::BaseTextGenerator * generator) const1925 void PrintInt64(int64_t val,
1926 TextFormat::BaseTextGenerator* generator) const override {
1927 generator->PrintString(delegate_->PrintInt64(val));
1928 }
PrintUInt64(uint64_t val,TextFormat::BaseTextGenerator * generator) const1929 void PrintUInt64(uint64_t val,
1930 TextFormat::BaseTextGenerator* generator) const override {
1931 generator->PrintString(delegate_->PrintUInt64(val));
1932 }
PrintFloat(float val,TextFormat::BaseTextGenerator * generator) const1933 void PrintFloat(float val,
1934 TextFormat::BaseTextGenerator* generator) const override {
1935 generator->PrintString(delegate_->PrintFloat(val));
1936 }
PrintDouble(double val,TextFormat::BaseTextGenerator * generator) const1937 void PrintDouble(double val,
1938 TextFormat::BaseTextGenerator* generator) const override {
1939 generator->PrintString(delegate_->PrintDouble(val));
1940 }
PrintString(const std::string & val,TextFormat::BaseTextGenerator * generator) const1941 void PrintString(const std::string& val,
1942 TextFormat::BaseTextGenerator* generator) const override {
1943 generator->PrintString(delegate_->PrintString(val));
1944 }
PrintBytes(const std::string & val,TextFormat::BaseTextGenerator * generator) const1945 void PrintBytes(const std::string& val,
1946 TextFormat::BaseTextGenerator* generator) const override {
1947 generator->PrintString(delegate_->PrintBytes(val));
1948 }
PrintEnum(int32_t val,const std::string & name,TextFormat::BaseTextGenerator * generator) const1949 void PrintEnum(int32_t val, const std::string& name,
1950 TextFormat::BaseTextGenerator* generator) const override {
1951 generator->PrintString(delegate_->PrintEnum(val, name));
1952 }
PrintFieldName(const Message & message,int,int,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const1953 void PrintFieldName(const Message& message, int /*field_index*/,
1954 int /*field_count*/, const Reflection* reflection,
1955 const FieldDescriptor* field,
1956 TextFormat::BaseTextGenerator* generator) const override {
1957 generator->PrintString(
1958 delegate_->PrintFieldName(message, reflection, field));
1959 }
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextFormat::BaseTextGenerator * generator) const1960 void PrintFieldName(const Message& message, const Reflection* reflection,
1961 const FieldDescriptor* field,
1962 TextFormat::BaseTextGenerator* generator) const override {
1963 generator->PrintString(
1964 delegate_->PrintFieldName(message, reflection, field));
1965 }
PrintMessageStart(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const1966 void PrintMessageStart(
1967 const Message& message, int field_index, int field_count,
1968 bool single_line_mode,
1969 TextFormat::BaseTextGenerator* generator) const override {
1970 generator->PrintString(delegate_->PrintMessageStart(
1971 message, field_index, field_count, single_line_mode));
1972 }
PrintMessageEnd(const Message & message,int field_index,int field_count,bool single_line_mode,TextFormat::BaseTextGenerator * generator) const1973 void PrintMessageEnd(
1974 const Message& message, int field_index, int field_count,
1975 bool single_line_mode,
1976 TextFormat::BaseTextGenerator* generator) const override {
1977 generator->PrintString(delegate_->PrintMessageEnd(
1978 message, field_index, field_count, single_line_mode));
1979 }
1980
1981 private:
1982 std::unique_ptr<const TextFormat::FieldValuePrinter> delegate_;
1983 };
1984
1985 } // namespace
1986
1987 const char* const TextFormat::Printer::kDoNotParse =
1988 "DO NOT PARSE: fields may be stripped and missing.\n";
1989
Printer()1990 TextFormat::Printer::Printer()
1991 : initial_indent_level_(0),
1992 single_line_mode_(false),
1993 use_field_number_(false),
1994 use_short_repeated_primitives_(false),
1995 insert_silent_marker_(false),
1996 hide_unknown_fields_(false),
1997 print_message_fields_in_index_order_(false),
1998 expand_any_(false),
1999 truncate_string_field_longer_than_(0LL),
2000 finder_(nullptr) {
2001 SetUseUtf8StringEscaping(false);
2002 }
2003
SetUseUtf8StringEscaping(bool as_utf8)2004 void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
2005 SetDefaultFieldValuePrinter(as_utf8 ? new FastFieldValuePrinterUtf8Escaping()
2006 : new DebugStringFieldValuePrinter());
2007 }
2008
SetDefaultFieldValuePrinter(const FieldValuePrinter * printer)2009 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2010 const FieldValuePrinter* printer) {
2011 default_field_value_printer_.reset(new FieldValuePrinterWrapper(printer));
2012 }
2013
SetDefaultFieldValuePrinter(const FastFieldValuePrinter * printer)2014 void TextFormat::Printer::SetDefaultFieldValuePrinter(
2015 const FastFieldValuePrinter* printer) {
2016 default_field_value_printer_.reset(printer);
2017 }
2018
RegisterFieldValuePrinter(const FieldDescriptor * field,const FieldValuePrinter * printer)2019 bool TextFormat::Printer::RegisterFieldValuePrinter(
2020 const FieldDescriptor* field, const FieldValuePrinter* printer) {
2021 if (field == nullptr || printer == nullptr) {
2022 return false;
2023 }
2024 std::unique_ptr<FieldValuePrinterWrapper> wrapper(
2025 new FieldValuePrinterWrapper(nullptr));
2026 auto pair = custom_printers_.insert(std::make_pair(field, nullptr));
2027 if (pair.second) {
2028 wrapper->SetDelegate(printer);
2029 pair.first->second = std::move(wrapper);
2030 return true;
2031 } else {
2032 return false;
2033 }
2034 }
2035
RegisterFieldValuePrinter(const FieldDescriptor * field,const FastFieldValuePrinter * printer)2036 bool TextFormat::Printer::RegisterFieldValuePrinter(
2037 const FieldDescriptor* field, const FastFieldValuePrinter* printer) {
2038 if (field == nullptr || printer == nullptr) {
2039 return false;
2040 }
2041 auto pair = custom_printers_.insert(std::make_pair(field, nullptr));
2042 if (pair.second) {
2043 pair.first->second.reset(printer);
2044 return true;
2045 } else {
2046 return false;
2047 }
2048 }
2049
RegisterMessagePrinter(const Descriptor * descriptor,const MessagePrinter * printer)2050 bool TextFormat::Printer::RegisterMessagePrinter(
2051 const Descriptor* descriptor, const MessagePrinter* printer) {
2052 if (descriptor == nullptr || printer == nullptr) {
2053 return false;
2054 }
2055 auto pair =
2056 custom_message_printers_.insert(std::make_pair(descriptor, nullptr));
2057 if (pair.second) {
2058 pair.first->second.reset(printer);
2059 return true;
2060 } else {
2061 return false;
2062 }
2063 }
2064
PrintToString(const Message & message,std::string * output) const2065 bool TextFormat::Printer::PrintToString(const Message& message,
2066 std::string* output) const {
2067 GOOGLE_DCHECK(output) << "output specified is nullptr";
2068
2069 output->clear();
2070 io::StringOutputStream output_stream(output);
2071
2072 return Print(message, &output_stream);
2073 }
2074
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output) const2075 bool TextFormat::Printer::PrintUnknownFieldsToString(
2076 const UnknownFieldSet& unknown_fields, std::string* output) const {
2077 GOOGLE_DCHECK(output) << "output specified is nullptr";
2078
2079 output->clear();
2080 io::StringOutputStream output_stream(output);
2081 return PrintUnknownFields(unknown_fields, &output_stream);
2082 }
2083
Print(const Message & message,io::ZeroCopyOutputStream * output) const2084 bool TextFormat::Printer::Print(const Message& message,
2085 io::ZeroCopyOutputStream* output) const {
2086 TextGenerator generator(output, insert_silent_marker_, initial_indent_level_);
2087
2088 Print(message, &generator);
2089
2090 // Output false if the generator failed internally.
2091 return !generator.failed();
2092 }
2093
2094 // Maximum recursion depth for heuristically printing out length-delimited
2095 // unknown fields as messages.
2096 static constexpr int kUnknownFieldRecursionLimit = 10;
2097
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output) const2098 bool TextFormat::Printer::PrintUnknownFields(
2099 const UnknownFieldSet& unknown_fields,
2100 io::ZeroCopyOutputStream* output) const {
2101 TextGenerator generator(output, initial_indent_level_);
2102
2103 PrintUnknownFields(unknown_fields, &generator, kUnknownFieldRecursionLimit);
2104
2105 // Output false if the generator failed internally.
2106 return !generator.failed();
2107 }
2108
2109 namespace {
2110 // Comparison functor for sorting FieldDescriptors by field index.
2111 // Normal fields have higher precedence than extensions.
2112 struct FieldIndexSorter {
operator ()google::protobuf::__anon048cea3c0611::FieldIndexSorter2113 bool operator()(const FieldDescriptor* left,
2114 const FieldDescriptor* right) const {
2115 if (left->is_extension() && right->is_extension()) {
2116 return left->number() < right->number();
2117 } else if (left->is_extension()) {
2118 return false;
2119 } else if (right->is_extension()) {
2120 return true;
2121 } else {
2122 return left->index() < right->index();
2123 }
2124 }
2125 };
2126
2127 } // namespace
2128
PrintAny(const Message & message,TextGenerator * generator) const2129 bool TextFormat::Printer::PrintAny(const Message& message,
2130 TextGenerator* generator) const {
2131 const FieldDescriptor* type_url_field;
2132 const FieldDescriptor* value_field;
2133 if (!internal::GetAnyFieldDescriptors(message, &type_url_field,
2134 &value_field)) {
2135 return false;
2136 }
2137
2138 const Reflection* reflection = message.GetReflection();
2139
2140 // Extract the full type name from the type_url field.
2141 const std::string& type_url = reflection->GetString(message, type_url_field);
2142 std::string url_prefix;
2143 std::string full_type_name;
2144 if (!internal::ParseAnyTypeUrl(type_url, &url_prefix, &full_type_name)) {
2145 return false;
2146 }
2147
2148 // Print the "value" in text.
2149 const Descriptor* value_descriptor =
2150 finder_ ? finder_->FindAnyType(message, url_prefix, full_type_name)
2151 : DefaultFinderFindAnyType(message, url_prefix, full_type_name);
2152 if (value_descriptor == nullptr) {
2153 GOOGLE_LOG(WARNING) << "Can't print proto content: proto type " << type_url
2154 << " not found";
2155 return false;
2156 }
2157 DynamicMessageFactory factory;
2158 std::unique_ptr<Message> value_message(
2159 factory.GetPrototype(value_descriptor)->New());
2160 std::string serialized_value = reflection->GetString(message, value_field);
2161 if (!value_message->ParseFromString(serialized_value)) {
2162 GOOGLE_LOG(WARNING) << type_url << ": failed to parse contents";
2163 return false;
2164 }
2165 generator->PrintLiteral("[");
2166 generator->PrintString(type_url);
2167 generator->PrintLiteral("]");
2168 const FastFieldValuePrinter* printer = GetFieldPrinter(value_field);
2169 printer->PrintMessageStart(message, -1, 0, single_line_mode_, generator);
2170 generator->Indent();
2171 Print(*value_message, generator);
2172 generator->Outdent();
2173 printer->PrintMessageEnd(message, -1, 0, single_line_mode_, generator);
2174 return true;
2175 }
2176
Print(const Message & message,TextGenerator * generator) const2177 void TextFormat::Printer::Print(const Message& message,
2178 TextGenerator* generator) const {
2179 const Reflection* reflection = message.GetReflection();
2180 if (!reflection) {
2181 // This message does not provide any way to describe its structure.
2182 // Parse it again in an UnknownFieldSet, and display this instead.
2183 UnknownFieldSet unknown_fields;
2184 {
2185 std::string serialized = message.SerializeAsString();
2186 io::ArrayInputStream input(serialized.data(), serialized.size());
2187 unknown_fields.ParseFromZeroCopyStream(&input);
2188 }
2189 PrintUnknownFields(unknown_fields, generator, kUnknownFieldRecursionLimit);
2190 return;
2191 }
2192 const Descriptor* descriptor = message.GetDescriptor();
2193 auto itr = custom_message_printers_.find(descriptor);
2194 if (itr != custom_message_printers_.end()) {
2195 itr->second->Print(message, single_line_mode_, generator);
2196 return;
2197 }
2198 if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ &&
2199 PrintAny(message, generator)) {
2200 return;
2201 }
2202 std::vector<const FieldDescriptor*> fields;
2203 if (descriptor->options().map_entry()) {
2204 fields.push_back(descriptor->field(0));
2205 fields.push_back(descriptor->field(1));
2206 } else {
2207 reflection->ListFieldsOmitStripped(message, &fields);
2208 if (reflection->IsMessageStripped(message.GetDescriptor())) {
2209 generator->Print(kDoNotParse, std::strlen(kDoNotParse));
2210 }
2211 }
2212
2213 if (print_message_fields_in_index_order_) {
2214 std::sort(fields.begin(), fields.end(), FieldIndexSorter());
2215 }
2216 for (const FieldDescriptor* field : fields) {
2217 PrintField(message, reflection, field, generator);
2218 }
2219 if (!hide_unknown_fields_) {
2220 PrintUnknownFields(reflection->GetUnknownFields(message), generator,
2221 kUnknownFieldRecursionLimit);
2222 }
2223 }
2224
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output) const2225 void TextFormat::Printer::PrintFieldValueToString(const Message& message,
2226 const FieldDescriptor* field,
2227 int index,
2228 std::string* output) const {
2229 GOOGLE_DCHECK(output) << "output specified is nullptr";
2230
2231 output->clear();
2232 io::StringOutputStream output_stream(output);
2233 TextGenerator generator(&output_stream, initial_indent_level_);
2234
2235 PrintFieldValue(message, message.GetReflection(), field, index, &generator);
2236 }
2237
2238 class MapEntryMessageComparator {
2239 public:
MapEntryMessageComparator(const Descriptor * descriptor)2240 explicit MapEntryMessageComparator(const Descriptor* descriptor)
2241 : field_(descriptor->field(0)) {}
2242
operator ()(const Message * a,const Message * b)2243 bool operator()(const Message* a, const Message* b) {
2244 const Reflection* reflection = a->GetReflection();
2245 switch (field_->cpp_type()) {
2246 case FieldDescriptor::CPPTYPE_BOOL: {
2247 bool first = reflection->GetBool(*a, field_);
2248 bool second = reflection->GetBool(*b, field_);
2249 return first < second;
2250 }
2251 case FieldDescriptor::CPPTYPE_INT32: {
2252 int32_t first = reflection->GetInt32(*a, field_);
2253 int32_t second = reflection->GetInt32(*b, field_);
2254 return first < second;
2255 }
2256 case FieldDescriptor::CPPTYPE_INT64: {
2257 int64_t first = reflection->GetInt64(*a, field_);
2258 int64_t second = reflection->GetInt64(*b, field_);
2259 return first < second;
2260 }
2261 case FieldDescriptor::CPPTYPE_UINT32: {
2262 uint32_t first = reflection->GetUInt32(*a, field_);
2263 uint32_t second = reflection->GetUInt32(*b, field_);
2264 return first < second;
2265 }
2266 case FieldDescriptor::CPPTYPE_UINT64: {
2267 uint64_t first = reflection->GetUInt64(*a, field_);
2268 uint64_t second = reflection->GetUInt64(*b, field_);
2269 return first < second;
2270 }
2271 case FieldDescriptor::CPPTYPE_STRING: {
2272 std::string first = reflection->GetString(*a, field_);
2273 std::string second = reflection->GetString(*b, field_);
2274 return first < second;
2275 }
2276 default:
2277 GOOGLE_LOG(DFATAL) << "Invalid key for map field.";
2278 return true;
2279 }
2280 }
2281
2282 private:
2283 const FieldDescriptor* field_;
2284 };
2285
2286 namespace internal {
2287 class MapFieldPrinterHelper {
2288 public:
2289 // DynamicMapSorter::Sort cannot be used because it enforces syncing with
2290 // repeated field.
2291 static bool SortMap(const Message& message, const Reflection* reflection,
2292 const FieldDescriptor* field,
2293 std::vector<const Message*>* sorted_map_field);
2294 static void CopyKey(const MapKey& key, Message* message,
2295 const FieldDescriptor* field_desc);
2296 static void CopyValue(const MapValueRef& value, Message* message,
2297 const FieldDescriptor* field_desc);
2298 };
2299
2300 // Returns true if elements contained in sorted_map_field need to be released.
SortMap(const Message & message,const Reflection * reflection,const FieldDescriptor * field,std::vector<const Message * > * sorted_map_field)2301 bool MapFieldPrinterHelper::SortMap(
2302 const Message& message, const Reflection* reflection,
2303 const FieldDescriptor* field,
2304 std::vector<const Message*>* sorted_map_field) {
2305 bool need_release = false;
2306 const MapFieldBase& base = *reflection->GetMapData(message, field);
2307
2308 if (base.IsRepeatedFieldValid()) {
2309 const RepeatedPtrField<Message>& map_field =
2310 reflection->GetRepeatedPtrFieldInternal<Message>(message, field);
2311 for (int i = 0; i < map_field.size(); ++i) {
2312 sorted_map_field->push_back(
2313 const_cast<RepeatedPtrField<Message>*>(&map_field)->Mutable(i));
2314 }
2315 } else {
2316 // TODO(teboring): For performance, instead of creating map entry message
2317 // for each element, just store map keys and sort them.
2318 const Descriptor* map_entry_desc = field->message_type();
2319 const Message* prototype =
2320 reflection->GetMessageFactory()->GetPrototype(map_entry_desc);
2321 for (MapIterator iter =
2322 reflection->MapBegin(const_cast<Message*>(&message), field);
2323 iter != reflection->MapEnd(const_cast<Message*>(&message), field);
2324 ++iter) {
2325 Message* map_entry_message = prototype->New();
2326 CopyKey(iter.GetKey(), map_entry_message, map_entry_desc->field(0));
2327 CopyValue(iter.GetValueRef(), map_entry_message,
2328 map_entry_desc->field(1));
2329 sorted_map_field->push_back(map_entry_message);
2330 }
2331 need_release = true;
2332 }
2333
2334 MapEntryMessageComparator comparator(field->message_type());
2335 std::stable_sort(sorted_map_field->begin(), sorted_map_field->end(),
2336 comparator);
2337 return need_release;
2338 }
2339
CopyKey(const MapKey & key,Message * message,const FieldDescriptor * field_desc)2340 void MapFieldPrinterHelper::CopyKey(const MapKey& key, Message* message,
2341 const FieldDescriptor* field_desc) {
2342 const Reflection* reflection = message->GetReflection();
2343 switch (field_desc->cpp_type()) {
2344 case FieldDescriptor::CPPTYPE_DOUBLE:
2345 case FieldDescriptor::CPPTYPE_FLOAT:
2346 case FieldDescriptor::CPPTYPE_ENUM:
2347 case FieldDescriptor::CPPTYPE_MESSAGE:
2348 GOOGLE_LOG(ERROR) << "Not supported.";
2349 break;
2350 case FieldDescriptor::CPPTYPE_STRING:
2351 reflection->SetString(message, field_desc, key.GetStringValue());
2352 return;
2353 case FieldDescriptor::CPPTYPE_INT64:
2354 reflection->SetInt64(message, field_desc, key.GetInt64Value());
2355 return;
2356 case FieldDescriptor::CPPTYPE_INT32:
2357 reflection->SetInt32(message, field_desc, key.GetInt32Value());
2358 return;
2359 case FieldDescriptor::CPPTYPE_UINT64:
2360 reflection->SetUInt64(message, field_desc, key.GetUInt64Value());
2361 return;
2362 case FieldDescriptor::CPPTYPE_UINT32:
2363 reflection->SetUInt32(message, field_desc, key.GetUInt32Value());
2364 return;
2365 case FieldDescriptor::CPPTYPE_BOOL:
2366 reflection->SetBool(message, field_desc, key.GetBoolValue());
2367 return;
2368 }
2369 }
2370
CopyValue(const MapValueRef & value,Message * message,const FieldDescriptor * field_desc)2371 void MapFieldPrinterHelper::CopyValue(const MapValueRef& value,
2372 Message* message,
2373 const FieldDescriptor* field_desc) {
2374 const Reflection* reflection = message->GetReflection();
2375 switch (field_desc->cpp_type()) {
2376 case FieldDescriptor::CPPTYPE_DOUBLE:
2377 reflection->SetDouble(message, field_desc, value.GetDoubleValue());
2378 return;
2379 case FieldDescriptor::CPPTYPE_FLOAT:
2380 reflection->SetFloat(message, field_desc, value.GetFloatValue());
2381 return;
2382 case FieldDescriptor::CPPTYPE_ENUM:
2383 reflection->SetEnumValue(message, field_desc, value.GetEnumValue());
2384 return;
2385 case FieldDescriptor::CPPTYPE_MESSAGE: {
2386 Message* sub_message = value.GetMessageValue().New();
2387 sub_message->CopyFrom(value.GetMessageValue());
2388 reflection->SetAllocatedMessage(message, sub_message, field_desc);
2389 return;
2390 }
2391 case FieldDescriptor::CPPTYPE_STRING:
2392 reflection->SetString(message, field_desc, value.GetStringValue());
2393 return;
2394 case FieldDescriptor::CPPTYPE_INT64:
2395 reflection->SetInt64(message, field_desc, value.GetInt64Value());
2396 return;
2397 case FieldDescriptor::CPPTYPE_INT32:
2398 reflection->SetInt32(message, field_desc, value.GetInt32Value());
2399 return;
2400 case FieldDescriptor::CPPTYPE_UINT64:
2401 reflection->SetUInt64(message, field_desc, value.GetUInt64Value());
2402 return;
2403 case FieldDescriptor::CPPTYPE_UINT32:
2404 reflection->SetUInt32(message, field_desc, value.GetUInt32Value());
2405 return;
2406 case FieldDescriptor::CPPTYPE_BOOL:
2407 reflection->SetBool(message, field_desc, value.GetBoolValue());
2408 return;
2409 }
2410 }
2411 } // namespace internal
2412
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator * generator) const2413 void TextFormat::Printer::PrintField(const Message& message,
2414 const Reflection* reflection,
2415 const FieldDescriptor* field,
2416 TextGenerator* generator) const {
2417 if (use_short_repeated_primitives_ && field->is_repeated() &&
2418 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
2419 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
2420 PrintShortRepeatedField(message, reflection, field, generator);
2421 return;
2422 }
2423
2424 int count = 0;
2425
2426 if (field->is_repeated()) {
2427 count = reflection->FieldSize(message, field);
2428 } else if (reflection->HasField(message, field) ||
2429 field->containing_type()->options().map_entry()) {
2430 count = 1;
2431 }
2432
2433 std::vector<const Message*> sorted_map_field;
2434 bool need_release = false;
2435 bool is_map = field->is_map();
2436 if (is_map) {
2437 need_release = internal::MapFieldPrinterHelper::SortMap(
2438 message, reflection, field, &sorted_map_field);
2439 }
2440
2441 for (int j = 0; j < count; ++j) {
2442 const int field_index = field->is_repeated() ? j : -1;
2443
2444 PrintFieldName(message, field_index, count, reflection, field, generator);
2445
2446 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
2447 const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2448 const Message& sub_message =
2449 field->is_repeated()
2450 ? (is_map ? *sorted_map_field[j]
2451 : reflection->GetRepeatedMessage(message, field, j))
2452 : reflection->GetMessage(message, field);
2453 printer->PrintMessageStart(sub_message, field_index, count,
2454 single_line_mode_, generator);
2455 generator->Indent();
2456 if (!printer->PrintMessageContent(sub_message, field_index, count,
2457 single_line_mode_, generator)) {
2458 Print(sub_message, generator);
2459 }
2460 generator->Outdent();
2461 printer->PrintMessageEnd(sub_message, field_index, count,
2462 single_line_mode_, generator);
2463 } else {
2464 generator->PrintMaybeWithMarker(": ");
2465 // Write the field value.
2466 PrintFieldValue(message, reflection, field, field_index, generator);
2467 if (single_line_mode_) {
2468 generator->PrintLiteral(" ");
2469 } else {
2470 generator->PrintLiteral("\n");
2471 }
2472 }
2473 }
2474
2475 if (need_release) {
2476 for (const Message* message_to_delete : sorted_map_field) {
2477 delete message_to_delete;
2478 }
2479 }
2480 }
2481
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator * generator) const2482 void TextFormat::Printer::PrintShortRepeatedField(
2483 const Message& message, const Reflection* reflection,
2484 const FieldDescriptor* field, TextGenerator* generator) const {
2485 // Print primitive repeated field in short form.
2486 int size = reflection->FieldSize(message, field);
2487 PrintFieldName(message, /*field_index=*/-1, /*field_count=*/size, reflection,
2488 field, generator);
2489 generator->PrintMaybeWithMarker(": ", "[");
2490 for (int i = 0; i < size; i++) {
2491 if (i > 0) generator->PrintLiteral(", ");
2492 PrintFieldValue(message, reflection, field, i, generator);
2493 }
2494 if (single_line_mode_) {
2495 generator->PrintLiteral("] ");
2496 } else {
2497 generator->PrintLiteral("]\n");
2498 }
2499 }
2500
PrintFieldName(const Message & message,int field_index,int field_count,const Reflection * reflection,const FieldDescriptor * field,TextGenerator * generator) const2501 void TextFormat::Printer::PrintFieldName(const Message& message,
2502 int field_index, int field_count,
2503 const Reflection* reflection,
2504 const FieldDescriptor* field,
2505 TextGenerator* generator) const {
2506 // if use_field_number_ is true, prints field number instead
2507 // of field name.
2508 if (use_field_number_) {
2509 generator->PrintString(StrCat(field->number()));
2510 return;
2511 }
2512
2513 const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2514 printer->PrintFieldName(message, field_index, field_count, reflection, field,
2515 generator);
2516 }
2517
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,TextGenerator * generator) const2518 void TextFormat::Printer::PrintFieldValue(const Message& message,
2519 const Reflection* reflection,
2520 const FieldDescriptor* field,
2521 int index,
2522 TextGenerator* generator) const {
2523 GOOGLE_DCHECK(field->is_repeated() || (index == -1))
2524 << "Index must be -1 for non-repeated fields";
2525
2526 const FastFieldValuePrinter* printer = GetFieldPrinter(field);
2527
2528 switch (field->cpp_type()) {
2529 #define OUTPUT_FIELD(CPPTYPE, METHOD) \
2530 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
2531 printer->Print##METHOD( \
2532 field->is_repeated() \
2533 ? reflection->GetRepeated##METHOD(message, field, index) \
2534 : reflection->Get##METHOD(message, field), \
2535 generator); \
2536 break
2537
2538 OUTPUT_FIELD(INT32, Int32);
2539 OUTPUT_FIELD(INT64, Int64);
2540 OUTPUT_FIELD(UINT32, UInt32);
2541 OUTPUT_FIELD(UINT64, UInt64);
2542 OUTPUT_FIELD(FLOAT, Float);
2543 OUTPUT_FIELD(DOUBLE, Double);
2544 OUTPUT_FIELD(BOOL, Bool);
2545 #undef OUTPUT_FIELD
2546
2547 case FieldDescriptor::CPPTYPE_STRING: {
2548 std::string scratch;
2549 const std::string& value =
2550 field->is_repeated()
2551 ? reflection->GetRepeatedStringReference(message, field, index,
2552 &scratch)
2553 : reflection->GetStringReference(message, field, &scratch);
2554 const std::string* value_to_print = &value;
2555 std::string truncated_value;
2556 if (truncate_string_field_longer_than_ > 0 &&
2557 static_cast<size_t>(truncate_string_field_longer_than_) <
2558 value.size()) {
2559 truncated_value = value.substr(0, truncate_string_field_longer_than_) +
2560 "...<truncated>...";
2561 value_to_print = &truncated_value;
2562 }
2563 if (field->type() == FieldDescriptor::TYPE_STRING) {
2564 printer->PrintString(*value_to_print, generator);
2565 } else {
2566 GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
2567 printer->PrintBytes(*value_to_print, generator);
2568 }
2569 break;
2570 }
2571
2572 case FieldDescriptor::CPPTYPE_ENUM: {
2573 int enum_value =
2574 field->is_repeated()
2575 ? reflection->GetRepeatedEnumValue(message, field, index)
2576 : reflection->GetEnumValue(message, field);
2577 const EnumValueDescriptor* enum_desc =
2578 field->enum_type()->FindValueByNumber(enum_value);
2579 if (enum_desc != nullptr) {
2580 printer->PrintEnum(enum_value, enum_desc->name(), generator);
2581 } else {
2582 // Ordinarily, enum_desc should not be null, because proto2 has the
2583 // invariant that set enum field values must be in-range, but with the
2584 // new integer-based API for enums (or the RepeatedField<int> loophole),
2585 // it is possible for the user to force an unknown integer value. So we
2586 // simply use the integer value itself as the enum value name in this
2587 // case.
2588 printer->PrintEnum(enum_value, StrCat(enum_value), generator);
2589 }
2590 break;
2591 }
2592
2593 case FieldDescriptor::CPPTYPE_MESSAGE:
2594 Print(field->is_repeated()
2595 ? reflection->GetRepeatedMessage(message, field, index)
2596 : reflection->GetMessage(message, field),
2597 generator);
2598 break;
2599 }
2600 }
2601
Print(const Message & message,io::ZeroCopyOutputStream * output)2602 /* static */ bool TextFormat::Print(const Message& message,
2603 io::ZeroCopyOutputStream* output) {
2604 return Printer().Print(message, output);
2605 }
2606
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)2607 /* static */ bool TextFormat::PrintUnknownFields(
2608 const UnknownFieldSet& unknown_fields, io::ZeroCopyOutputStream* output) {
2609 return Printer().PrintUnknownFields(unknown_fields, output);
2610 }
2611
PrintToString(const Message & message,std::string * output)2612 /* static */ bool TextFormat::PrintToString(const Message& message,
2613 std::string* output) {
2614 return Printer().PrintToString(message, output);
2615 }
2616
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,std::string * output)2617 /* static */ bool TextFormat::PrintUnknownFieldsToString(
2618 const UnknownFieldSet& unknown_fields, std::string* output) {
2619 return Printer().PrintUnknownFieldsToString(unknown_fields, output);
2620 }
2621
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,std::string * output)2622 /* static */ void TextFormat::PrintFieldValueToString(
2623 const Message& message, const FieldDescriptor* field, int index,
2624 std::string* output) {
2625 return Printer().PrintFieldValueToString(message, field, index, output);
2626 }
2627
ParseFieldValueFromString(const std::string & input,const FieldDescriptor * field,Message * message)2628 /* static */ bool TextFormat::ParseFieldValueFromString(
2629 const std::string& input, const FieldDescriptor* field, Message* message) {
2630 return Parser().ParseFieldValueFromString(input, field, message);
2631 }
2632
PrintUnknownFields(const UnknownFieldSet & unknown_fields,TextGenerator * generator,int recursion_budget) const2633 void TextFormat::Printer::PrintUnknownFields(
2634 const UnknownFieldSet& unknown_fields, TextGenerator* generator,
2635 int recursion_budget) const {
2636 for (int i = 0; i < unknown_fields.field_count(); i++) {
2637 const UnknownField& field = unknown_fields.field(i);
2638 std::string field_number = StrCat(field.number());
2639
2640 switch (field.type()) {
2641 case UnknownField::TYPE_VARINT:
2642 generator->PrintString(field_number);
2643 generator->PrintMaybeWithMarker(": ");
2644 generator->PrintString(StrCat(field.varint()));
2645 if (single_line_mode_) {
2646 generator->PrintLiteral(" ");
2647 } else {
2648 generator->PrintLiteral("\n");
2649 }
2650 break;
2651 case UnknownField::TYPE_FIXED32: {
2652 generator->PrintString(field_number);
2653 generator->PrintMaybeWithMarker(": ", "0x");
2654 generator->PrintString(
2655 StrCat(strings::Hex(field.fixed32(), strings::ZERO_PAD_8)));
2656 if (single_line_mode_) {
2657 generator->PrintLiteral(" ");
2658 } else {
2659 generator->PrintLiteral("\n");
2660 }
2661 break;
2662 }
2663 case UnknownField::TYPE_FIXED64: {
2664 generator->PrintString(field_number);
2665 generator->PrintMaybeWithMarker(": ", "0x");
2666 generator->PrintString(
2667 StrCat(strings::Hex(field.fixed64(), strings::ZERO_PAD_16)));
2668 if (single_line_mode_) {
2669 generator->PrintLiteral(" ");
2670 } else {
2671 generator->PrintLiteral("\n");
2672 }
2673 break;
2674 }
2675 case UnknownField::TYPE_LENGTH_DELIMITED: {
2676 generator->PrintString(field_number);
2677 const std::string& value = field.length_delimited();
2678 // We create a CodedInputStream so that we can adhere to our recursion
2679 // budget when we attempt to parse the data. UnknownFieldSet parsing is
2680 // recursive because of groups.
2681 io::CodedInputStream input_stream(
2682 reinterpret_cast<const uint8_t*>(value.data()), value.size());
2683 input_stream.SetRecursionLimit(recursion_budget);
2684 UnknownFieldSet embedded_unknown_fields;
2685 if (!value.empty() && recursion_budget > 0 &&
2686 embedded_unknown_fields.ParseFromCodedStream(&input_stream)) {
2687 // This field is parseable as a Message.
2688 // So it is probably an embedded message.
2689 if (single_line_mode_) {
2690 generator->PrintMaybeWithMarker(" ", "{ ");
2691 } else {
2692 generator->PrintMaybeWithMarker(" ", "{\n");
2693 generator->Indent();
2694 }
2695 PrintUnknownFields(embedded_unknown_fields, generator,
2696 recursion_budget - 1);
2697 if (single_line_mode_) {
2698 generator->PrintLiteral("} ");
2699 } else {
2700 generator->Outdent();
2701 generator->PrintLiteral("}\n");
2702 }
2703 } else {
2704 // This field is not parseable as a Message (or we ran out of
2705 // recursion budget). So it is probably just a plain string.
2706 generator->PrintMaybeWithMarker(": ", "\"");
2707 generator->PrintString(CEscape(value));
2708 if (single_line_mode_) {
2709 generator->PrintLiteral("\" ");
2710 } else {
2711 generator->PrintLiteral("\"\n");
2712 }
2713 }
2714 break;
2715 }
2716 case UnknownField::TYPE_GROUP:
2717 generator->PrintString(field_number);
2718 if (single_line_mode_) {
2719 generator->PrintMaybeWithMarker(" ", "{ ");
2720 } else {
2721 generator->PrintMaybeWithMarker(" ", "{\n");
2722 generator->Indent();
2723 }
2724 // For groups, we recurse without checking the budget. This is OK,
2725 // because if the groups were too deeply nested then we would have
2726 // already rejected the message when we originally parsed it.
2727 PrintUnknownFields(field.group(), generator, recursion_budget - 1);
2728 if (single_line_mode_) {
2729 generator->PrintLiteral("} ");
2730 } else {
2731 generator->Outdent();
2732 generator->PrintLiteral("}\n");
2733 }
2734 break;
2735 }
2736 }
2737 }
2738
2739 } // namespace protobuf
2740 } // namespace google
2741
2742 #include <google/protobuf/port_undef.inc>
2743