1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // http://code.google.com/p/protobuf/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <float.h>
36 #include <math.h>
37 #include <stdio.h>
38 #include <stack>
39 #include <limits>
40
41 #include <google/protobuf/text_format.h>
42
43 #include <google/protobuf/descriptor.h>
44 #include <google/protobuf/io/coded_stream.h>
45 #include <google/protobuf/io/zero_copy_stream.h>
46 #include <google/protobuf/io/zero_copy_stream_impl.h>
47 #include <google/protobuf/unknown_field_set.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/io/tokenizer.h>
50 #include <google/protobuf/stubs/strutil.h>
51
52 namespace google {
53 namespace protobuf {
54
DebugString() const55 string Message::DebugString() const {
56 string debug_string;
57
58 TextFormat::PrintToString(*this, &debug_string);
59
60 return debug_string;
61 }
62
ShortDebugString() const63 string Message::ShortDebugString() const {
64 string debug_string;
65
66 TextFormat::Printer printer;
67 printer.SetSingleLineMode(true);
68
69 printer.PrintToString(*this, &debug_string);
70 // Single line mode currently might have an extra space at the end.
71 if (debug_string.size() > 0 &&
72 debug_string[debug_string.size() - 1] == ' ') {
73 debug_string.resize(debug_string.size() - 1);
74 }
75
76 return debug_string;
77 }
78
Utf8DebugString() const79 string Message::Utf8DebugString() const {
80 string debug_string;
81
82 TextFormat::Printer printer;
83 printer.SetUseUtf8StringEscaping(true);
84
85 printer.PrintToString(*this, &debug_string);
86
87 return debug_string;
88 }
89
PrintDebugString() const90 void Message::PrintDebugString() const {
91 printf("%s", DebugString().c_str());
92 }
93
94
95 // ===========================================================================
96 // Internal class for parsing an ASCII representation of a Protocol Message.
97 // This class makes use of the Protocol Message compiler's tokenizer found
98 // in //google/protobuf/io/tokenizer.h. Note that class's Parse
99 // method is *not* thread-safe and should only be used in a single thread at
100 // a time.
101
102 // Makes code slightly more readable. The meaning of "DO(foo)" is
103 // "Execute foo and fail if it fails.", where failure is indicated by
104 // returning false. Borrowed from parser.cc (Thanks Kenton!).
105 #define DO(STATEMENT) if (STATEMENT) {} else return false
106
107 class TextFormat::Parser::ParserImpl {
108 public:
109
110 // Determines if repeated values for a non-repeated field are
111 // permitted, e.g., the string "foo: 1 foo: 2" for a
112 // required/optional field named "foo".
113 enum SingularOverwritePolicy {
114 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
115 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
116 };
117
ParserImpl(const Descriptor * root_message_type,io::ZeroCopyInputStream * input_stream,io::ErrorCollector * error_collector,SingularOverwritePolicy singular_overwrite_policy)118 ParserImpl(const Descriptor* root_message_type,
119 io::ZeroCopyInputStream* input_stream,
120 io::ErrorCollector* error_collector,
121 SingularOverwritePolicy singular_overwrite_policy)
122 : error_collector_(error_collector),
123 tokenizer_error_collector_(this),
124 tokenizer_(input_stream, &tokenizer_error_collector_),
125 root_message_type_(root_message_type),
126 singular_overwrite_policy_(singular_overwrite_policy),
127 had_errors_(false) {
128 // For backwards-compatibility with proto1, we need to allow the 'f' suffix
129 // for floats.
130 tokenizer_.set_allow_f_after_float(true);
131
132 // '#' starts a comment.
133 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
134
135 // Consume the starting token.
136 tokenizer_.Next();
137 }
~ParserImpl()138 ~ParserImpl() { }
139
140 // Parses the ASCII representation specified in input and saves the
141 // information into the output pointer (a Message). Returns
142 // false if an error occurs (an error will also be logged to
143 // GOOGLE_LOG(ERROR)).
Parse(Message * output)144 bool Parse(Message* output) {
145 // Consume fields until we cannot do so anymore.
146 while(true) {
147 if (LookingAtType(io::Tokenizer::TYPE_END)) {
148 return !had_errors_;
149 }
150
151 DO(ConsumeField(output));
152 }
153 }
154
ParseField(const FieldDescriptor * field,Message * output)155 bool ParseField(const FieldDescriptor* field, Message* output) {
156 bool suc;
157 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
158 suc = ConsumeFieldMessage(output, output->GetReflection(), field);
159 } else {
160 suc = ConsumeFieldValue(output, output->GetReflection(), field);
161 }
162 return suc && LookingAtType(io::Tokenizer::TYPE_END);
163 }
164
ReportError(int line,int col,const string & message)165 void ReportError(int line, int col, const string& message) {
166 had_errors_ = true;
167 if (error_collector_ == NULL) {
168 if (line >= 0) {
169 GOOGLE_LOG(ERROR) << "Error parsing text-format "
170 << root_message_type_->full_name()
171 << ": " << (line + 1) << ":"
172 << (col + 1) << ": " << message;
173 } else {
174 GOOGLE_LOG(ERROR) << "Error parsing text-format "
175 << root_message_type_->full_name()
176 << ": " << message;
177 }
178 } else {
179 error_collector_->AddError(line, col, message);
180 }
181 }
182
ReportWarning(int line,int col,const string & message)183 void ReportWarning(int line, int col, const string& message) {
184 if (error_collector_ == NULL) {
185 if (line >= 0) {
186 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
187 << root_message_type_->full_name()
188 << ": " << (line + 1) << ":"
189 << (col + 1) << ": " << message;
190 } else {
191 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
192 << root_message_type_->full_name()
193 << ": " << message;
194 }
195 } else {
196 error_collector_->AddWarning(line, col, message);
197 }
198 }
199
200 private:
201 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
202
203 // Reports an error with the given message with information indicating
204 // the position (as derived from the current token).
ReportError(const string & message)205 void ReportError(const string& message) {
206 ReportError(tokenizer_.current().line, tokenizer_.current().column,
207 message);
208 }
209
210 // Reports a warning with the given message with information indicating
211 // the position (as derived from the current token).
ReportWarning(const string & message)212 void ReportWarning(const string& message) {
213 ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
214 message);
215 }
216
217 // Consumes the specified message with the given starting delimeter.
218 // This method checks to see that the end delimeter at the conclusion of
219 // the consumption matches the starting delimeter passed in here.
ConsumeMessage(Message * message,const string delimeter)220 bool ConsumeMessage(Message* message, const string delimeter) {
221 while (!LookingAt(">") && !LookingAt("}")) {
222 DO(ConsumeField(message));
223 }
224
225 // Confirm that we have a valid ending delimeter.
226 DO(Consume(delimeter));
227
228 return true;
229 }
230
231 // Consumes the current field (as returned by the tokenizer) on the
232 // passed in message.
ConsumeField(Message * message)233 bool ConsumeField(Message* message) {
234 const Reflection* reflection = message->GetReflection();
235 const Descriptor* descriptor = message->GetDescriptor();
236
237 string field_name;
238
239 const FieldDescriptor* field = NULL;
240
241 if (TryConsume("[")) {
242 // Extension.
243 DO(ConsumeIdentifier(&field_name));
244 while (TryConsume(".")) {
245 string part;
246 DO(ConsumeIdentifier(&part));
247 field_name += ".";
248 field_name += part;
249 }
250 DO(Consume("]"));
251
252 field = reflection->FindKnownExtensionByName(field_name);
253
254 if (field == NULL) {
255 ReportError("Extension \"" + field_name + "\" is not defined or "
256 "is not an extension of \"" +
257 descriptor->full_name() + "\".");
258 return false;
259 }
260 } else {
261 DO(ConsumeIdentifier(&field_name));
262
263 field = descriptor->FindFieldByName(field_name);
264 // Group names are expected to be capitalized as they appear in the
265 // .proto file, which actually matches their type names, not their field
266 // names.
267 if (field == NULL) {
268 string lower_field_name = field_name;
269 LowerString(&lower_field_name);
270 field = descriptor->FindFieldByName(lower_field_name);
271 // If the case-insensitive match worked but the field is NOT a group,
272 if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
273 field = NULL;
274 }
275 }
276 // Again, special-case group names as described above.
277 if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
278 && field->message_type()->name() != field_name) {
279 field = NULL;
280 }
281
282 if (field == NULL) {
283 ReportError("Message type \"" + descriptor->full_name() +
284 "\" has no field named \"" + field_name + "\".");
285 return false;
286 }
287 }
288
289 // Fail if the field is not repeated and it has already been specified.
290 if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
291 !field->is_repeated() && reflection->HasField(*message, field)) {
292 ReportError("Non-repeated field \"" + field_name +
293 "\" is specified multiple times.");
294 return false;
295 }
296
297 // Perform special handling for embedded message types.
298 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
299 // ':' is optional here.
300 TryConsume(":");
301 DO(ConsumeFieldMessage(message, reflection, field));
302 } else {
303 DO(Consume(":"));
304 DO(ConsumeFieldValue(message, reflection, field));
305 }
306
307 if (field->options().deprecated()) {
308 ReportWarning("text format contains deprecated field \""
309 + field_name + "\"");
310 }
311
312 return true;
313 }
314
ConsumeFieldMessage(Message * message,const Reflection * reflection,const FieldDescriptor * field)315 bool ConsumeFieldMessage(Message* message,
316 const Reflection* reflection,
317 const FieldDescriptor* field) {
318 string delimeter;
319 if (TryConsume("<")) {
320 delimeter = ">";
321 } else {
322 DO(Consume("{"));
323 delimeter = "}";
324 }
325
326 if (field->is_repeated()) {
327 DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
328 } else {
329 DO(ConsumeMessage(reflection->MutableMessage(message, field),
330 delimeter));
331 }
332 return true;
333 }
334
ConsumeFieldValue(Message * message,const Reflection * reflection,const FieldDescriptor * field)335 bool ConsumeFieldValue(Message* message,
336 const Reflection* reflection,
337 const FieldDescriptor* field) {
338
339 // Define an easy to use macro for setting fields. This macro checks
340 // to see if the field is repeated (in which case we need to use the Add
341 // methods or not (in which case we need to use the Set methods).
342 #define SET_FIELD(CPPTYPE, VALUE) \
343 if (field->is_repeated()) { \
344 reflection->Add##CPPTYPE(message, field, VALUE); \
345 } else { \
346 reflection->Set##CPPTYPE(message, field, VALUE); \
347 } \
348
349 switch(field->cpp_type()) {
350 case FieldDescriptor::CPPTYPE_INT32: {
351 int64 value;
352 DO(ConsumeSignedInteger(&value, kint32max));
353 SET_FIELD(Int32, static_cast<int32>(value));
354 break;
355 }
356
357 case FieldDescriptor::CPPTYPE_UINT32: {
358 uint64 value;
359 DO(ConsumeUnsignedInteger(&value, kuint32max));
360 SET_FIELD(UInt32, static_cast<uint32>(value));
361 break;
362 }
363
364 case FieldDescriptor::CPPTYPE_INT64: {
365 int64 value;
366 DO(ConsumeSignedInteger(&value, kint64max));
367 SET_FIELD(Int64, value);
368 break;
369 }
370
371 case FieldDescriptor::CPPTYPE_UINT64: {
372 uint64 value;
373 DO(ConsumeUnsignedInteger(&value, kuint64max));
374 SET_FIELD(UInt64, value);
375 break;
376 }
377
378 case FieldDescriptor::CPPTYPE_FLOAT: {
379 double value;
380 DO(ConsumeDouble(&value));
381 SET_FIELD(Float, static_cast<float>(value));
382 break;
383 }
384
385 case FieldDescriptor::CPPTYPE_DOUBLE: {
386 double value;
387 DO(ConsumeDouble(&value));
388 SET_FIELD(Double, value);
389 break;
390 }
391
392 case FieldDescriptor::CPPTYPE_STRING: {
393 string value;
394 DO(ConsumeString(&value));
395 SET_FIELD(String, value);
396 break;
397 }
398
399 case FieldDescriptor::CPPTYPE_BOOL: {
400 string value;
401 DO(ConsumeIdentifier(&value));
402
403 if (value == "true") {
404 SET_FIELD(Bool, true);
405 } else if (value == "false") {
406 SET_FIELD(Bool, false);
407 } else {
408 ReportError("Invalid value for boolean field \"" + field->name()
409 + "\". Value: \"" + value + "\".");
410 return false;
411 }
412 break;
413 }
414
415 case FieldDescriptor::CPPTYPE_ENUM: {
416 string value;
417 DO(ConsumeIdentifier(&value));
418
419 // Find the enumeration value.
420 const EnumDescriptor* enum_type = field->enum_type();
421 const EnumValueDescriptor* enum_value
422 = enum_type->FindValueByName(value);
423
424 if (enum_value == NULL) {
425 ReportError("Unknown enumeration value of \"" + value + "\" for "
426 "field \"" + field->name() + "\".");
427 return false;
428 }
429
430 SET_FIELD(Enum, enum_value);
431 break;
432 }
433
434 case FieldDescriptor::CPPTYPE_MESSAGE: {
435 // We should never get here. Put here instead of a default
436 // so that if new types are added, we get a nice compiler warning.
437 GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
438 break;
439 }
440 }
441 #undef SET_FIELD
442 return true;
443 }
444
445 // Returns true if the current token's text is equal to that specified.
LookingAt(const string & text)446 bool LookingAt(const string& text) {
447 return tokenizer_.current().text == text;
448 }
449
450 // Returns true if the current token's type is equal to that specified.
LookingAtType(io::Tokenizer::TokenType token_type)451 bool LookingAtType(io::Tokenizer::TokenType token_type) {
452 return tokenizer_.current().type == token_type;
453 }
454
455 // Consumes an identifier and saves its value in the identifier parameter.
456 // Returns false if the token is not of type IDENTFIER.
ConsumeIdentifier(string * identifier)457 bool ConsumeIdentifier(string* identifier) {
458 if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
459 ReportError("Expected identifier.");
460 return false;
461 }
462
463 *identifier = tokenizer_.current().text;
464
465 tokenizer_.Next();
466 return true;
467 }
468
469 // Consumes a string and saves its value in the text parameter.
470 // Returns false if the token is not of type STRING.
ConsumeString(string * text)471 bool ConsumeString(string* text) {
472 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
473 ReportError("Expected string.");
474 return false;
475 }
476
477 text->clear();
478 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
479 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
480
481 tokenizer_.Next();
482 }
483
484 return true;
485 }
486
487 // Consumes a uint64 and saves its value in the value parameter.
488 // Returns false if the token is not of type INTEGER.
ConsumeUnsignedInteger(uint64 * value,uint64 max_value)489 bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
490 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
491 ReportError("Expected integer.");
492 return false;
493 }
494
495 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
496 max_value, value)) {
497 ReportError("Integer out of range.");
498 return false;
499 }
500
501 tokenizer_.Next();
502 return true;
503 }
504
505 // Consumes an int64 and saves its value in the value parameter.
506 // Note that since the tokenizer does not support negative numbers,
507 // we actually may consume an additional token (for the minus sign) in this
508 // method. Returns false if the token is not an integer
509 // (signed or otherwise).
ConsumeSignedInteger(int64 * value,uint64 max_value)510 bool ConsumeSignedInteger(int64* value, uint64 max_value) {
511 bool negative = false;
512
513 if (TryConsume("-")) {
514 negative = true;
515 // Two's complement always allows one more negative integer than
516 // positive.
517 ++max_value;
518 }
519
520 uint64 unsigned_value;
521
522 DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
523
524 *value = static_cast<int64>(unsigned_value);
525
526 if (negative) {
527 *value = -*value;
528 }
529
530 return true;
531 }
532
533 // Consumes a double and saves its value in the value parameter.
534 // Note that since the tokenizer does not support negative numbers,
535 // we actually may consume an additional token (for the minus sign) in this
536 // method. Returns false if the token is not a double
537 // (signed or otherwise).
ConsumeDouble(double * value)538 bool ConsumeDouble(double* value) {
539 bool negative = false;
540
541 if (TryConsume("-")) {
542 negative = true;
543 }
544
545 // A double can actually be an integer, according to the tokenizer.
546 // Therefore, we must check both cases here.
547 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
548 // We have found an integer value for the double.
549 uint64 integer_value;
550 DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
551
552 *value = static_cast<double>(integer_value);
553 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
554 // We have found a float value for the double.
555 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
556
557 // Mark the current token as consumed.
558 tokenizer_.Next();
559 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
560 string text = tokenizer_.current().text;
561 LowerString(&text);
562 if (text == "inf" || text == "infinity") {
563 *value = std::numeric_limits<double>::infinity();
564 tokenizer_.Next();
565 } else if (text == "nan") {
566 *value = std::numeric_limits<double>::quiet_NaN();
567 tokenizer_.Next();
568 } else {
569 ReportError("Expected double.");
570 return false;
571 }
572 } else {
573 ReportError("Expected double.");
574 return false;
575 }
576
577 if (negative) {
578 *value = -*value;
579 }
580
581 return true;
582 }
583
584 // Consumes a token and confirms that it matches that specified in the
585 // value parameter. Returns false if the token found does not match that
586 // which was specified.
Consume(const string & value)587 bool Consume(const string& value) {
588 const string& current_value = tokenizer_.current().text;
589
590 if (current_value != value) {
591 ReportError("Expected \"" + value + "\", found \"" + current_value
592 + "\".");
593 return false;
594 }
595
596 tokenizer_.Next();
597
598 return true;
599 }
600
601 // Attempts to consume the supplied value. Returns false if a the
602 // token found does not match the value specified.
TryConsume(const string & value)603 bool TryConsume(const string& value) {
604 if (tokenizer_.current().text == value) {
605 tokenizer_.Next();
606 return true;
607 } else {
608 return false;
609 }
610 }
611
612 // An internal instance of the Tokenizer's error collector, used to
613 // collect any base-level parse errors and feed them to the ParserImpl.
614 class ParserErrorCollector : public io::ErrorCollector {
615 public:
ParserErrorCollector(TextFormat::Parser::ParserImpl * parser)616 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
617 parser_(parser) { }
618
~ParserErrorCollector()619 virtual ~ParserErrorCollector() { };
620
AddError(int line,int column,const string & message)621 virtual void AddError(int line, int column, const string& message) {
622 parser_->ReportError(line, column, message);
623 }
624
AddWarning(int line,int column,const string & message)625 virtual void AddWarning(int line, int column, const string& message) {
626 parser_->ReportWarning(line, column, message);
627 }
628
629 private:
630 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
631 TextFormat::Parser::ParserImpl* parser_;
632 };
633
634 io::ErrorCollector* error_collector_;
635 ParserErrorCollector tokenizer_error_collector_;
636 io::Tokenizer tokenizer_;
637 const Descriptor* root_message_type_;
638 SingularOverwritePolicy singular_overwrite_policy_;
639 bool had_errors_;
640 };
641
642 #undef DO
643
644 // ===========================================================================
645 // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
646 // from the Printer found in //google/protobuf/io/printer.h
647 class TextFormat::Printer::TextGenerator {
648 public:
TextGenerator(io::ZeroCopyOutputStream * output,int initial_indent_level)649 explicit TextGenerator(io::ZeroCopyOutputStream* output,
650 int initial_indent_level)
651 : output_(output),
652 buffer_(NULL),
653 buffer_size_(0),
654 at_start_of_line_(true),
655 failed_(false),
656 indent_(""),
657 initial_indent_level_(initial_indent_level) {
658 indent_.resize(initial_indent_level_ * 2, ' ');
659 }
660
~TextGenerator()661 ~TextGenerator() {
662 // Only BackUp() if we're sure we've successfully called Next() at least
663 // once.
664 if (buffer_size_ > 0) {
665 output_->BackUp(buffer_size_);
666 }
667 }
668
669 // Indent text by two spaces. After calling Indent(), two spaces will be
670 // inserted at the beginning of each line of text. Indent() may be called
671 // multiple times to produce deeper indents.
Indent()672 void Indent() {
673 indent_ += " ";
674 }
675
676 // Reduces the current indent level by two spaces, or crashes if the indent
677 // level is zero.
Outdent()678 void Outdent() {
679 if (indent_.empty() ||
680 indent_.size() < initial_indent_level_ * 2) {
681 GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
682 return;
683 }
684
685 indent_.resize(indent_.size() - 2);
686 }
687
688 // Print text to the output stream.
Print(const string & str)689 void Print(const string& str) {
690 Print(str.data(), str.size());
691 }
692
693 // Print text to the output stream.
Print(const char * text)694 void Print(const char* text) {
695 Print(text, strlen(text));
696 }
697
698 // Print text to the output stream.
Print(const char * text,int size)699 void Print(const char* text, int size) {
700 int pos = 0; // The number of bytes we've written so far.
701
702 for (int i = 0; i < size; i++) {
703 if (text[i] == '\n') {
704 // Saw newline. If there is more text, we may need to insert an indent
705 // here. So, write what we have so far, including the '\n'.
706 Write(text + pos, i - pos + 1);
707 pos = i + 1;
708
709 // Setting this true will cause the next Write() to insert an indent
710 // first.
711 at_start_of_line_ = true;
712 }
713 }
714
715 // Write the rest.
716 Write(text + pos, size - pos);
717 }
718
719 // True if any write to the underlying stream failed. (We don't just
720 // crash in this case because this is an I/O failure, not a programming
721 // error.)
failed() const722 bool failed() const { return failed_; }
723
724 private:
725 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
726
Write(const char * data,int size)727 void Write(const char* data, int size) {
728 if (failed_) return;
729 if (size == 0) return;
730
731 if (at_start_of_line_) {
732 // Insert an indent.
733 at_start_of_line_ = false;
734 Write(indent_.data(), indent_.size());
735 if (failed_) return;
736 }
737
738 while (size > buffer_size_) {
739 // Data exceeds space in the buffer. Copy what we can and request a
740 // new buffer.
741 memcpy(buffer_, data, buffer_size_);
742 data += buffer_size_;
743 size -= buffer_size_;
744 void* void_buffer;
745 failed_ = !output_->Next(&void_buffer, &buffer_size_);
746 if (failed_) return;
747 buffer_ = reinterpret_cast<char*>(void_buffer);
748 }
749
750 // Buffer is big enough to receive the data; copy it.
751 memcpy(buffer_, data, size);
752 buffer_ += size;
753 buffer_size_ -= size;
754 }
755
756 io::ZeroCopyOutputStream* const output_;
757 char* buffer_;
758 int buffer_size_;
759 bool at_start_of_line_;
760 bool failed_;
761
762 string indent_;
763 int initial_indent_level_;
764 };
765
766 // ===========================================================================
767
Parser()768 TextFormat::Parser::Parser()
769 : error_collector_(NULL),
770 allow_partial_(false) {}
771
~Parser()772 TextFormat::Parser::~Parser() {}
773
Parse(io::ZeroCopyInputStream * input,Message * output)774 bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
775 Message* output) {
776 output->Clear();
777 ParserImpl parser(output->GetDescriptor(), input, error_collector_,
778 ParserImpl::FORBID_SINGULAR_OVERWRITES);
779 return MergeUsingImpl(input, output, &parser);
780 }
781
ParseFromString(const string & input,Message * output)782 bool TextFormat::Parser::ParseFromString(const string& input,
783 Message* output) {
784 io::ArrayInputStream input_stream(input.data(), input.size());
785 return Parse(&input_stream, output);
786 }
787
Merge(io::ZeroCopyInputStream * input,Message * output)788 bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
789 Message* output) {
790 ParserImpl parser(output->GetDescriptor(), input, error_collector_,
791 ParserImpl::ALLOW_SINGULAR_OVERWRITES);
792 return MergeUsingImpl(input, output, &parser);
793 }
794
MergeFromString(const string & input,Message * output)795 bool TextFormat::Parser::MergeFromString(const string& input,
796 Message* output) {
797 io::ArrayInputStream input_stream(input.data(), input.size());
798 return Merge(&input_stream, output);
799 }
800
MergeUsingImpl(io::ZeroCopyInputStream * input,Message * output,ParserImpl * parser_impl)801 bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
802 Message* output,
803 ParserImpl* parser_impl) {
804 if (!parser_impl->Parse(output)) return false;
805 if (!allow_partial_ && !output->IsInitialized()) {
806 vector<string> missing_fields;
807 output->FindInitializationErrors(&missing_fields);
808 parser_impl->ReportError(-1, 0, "Message missing required fields: " +
809 JoinStrings(missing_fields, ", "));
810 return false;
811 }
812 return true;
813 }
814
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * output)815 bool TextFormat::Parser::ParseFieldValueFromString(
816 const string& input,
817 const FieldDescriptor* field,
818 Message* output) {
819 io::ArrayInputStream input_stream(input.data(), input.size());
820 ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
821 ParserImpl::ALLOW_SINGULAR_OVERWRITES);
822 return parser.ParseField(field, output);
823 }
824
Parse(io::ZeroCopyInputStream * input,Message * output)825 /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
826 Message* output) {
827 return Parser().Parse(input, output);
828 }
829
Merge(io::ZeroCopyInputStream * input,Message * output)830 /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
831 Message* output) {
832 return Parser().Merge(input, output);
833 }
834
ParseFromString(const string & input,Message * output)835 /* static */ bool TextFormat::ParseFromString(const string& input,
836 Message* output) {
837 return Parser().ParseFromString(input, output);
838 }
839
MergeFromString(const string & input,Message * output)840 /* static */ bool TextFormat::MergeFromString(const string& input,
841 Message* output) {
842 return Parser().MergeFromString(input, output);
843 }
844
845 // ===========================================================================
846
Printer()847 TextFormat::Printer::Printer()
848 : initial_indent_level_(0),
849 single_line_mode_(false),
850 use_short_repeated_primitives_(false),
851 utf8_string_escaping_(false) {}
852
~Printer()853 TextFormat::Printer::~Printer() {}
854
PrintToString(const Message & message,string * output)855 bool TextFormat::Printer::PrintToString(const Message& message,
856 string* output) {
857 GOOGLE_DCHECK(output) << "output specified is NULL";
858
859 output->clear();
860 io::StringOutputStream output_stream(output);
861
862 bool result = Print(message, &output_stream);
863
864 return result;
865 }
866
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output)867 bool TextFormat::Printer::PrintUnknownFieldsToString(
868 const UnknownFieldSet& unknown_fields,
869 string* output) {
870 GOOGLE_DCHECK(output) << "output specified is NULL";
871
872 output->clear();
873 io::StringOutputStream output_stream(output);
874 return PrintUnknownFields(unknown_fields, &output_stream);
875 }
876
Print(const Message & message,io::ZeroCopyOutputStream * output)877 bool TextFormat::Printer::Print(const Message& message,
878 io::ZeroCopyOutputStream* output) {
879 TextGenerator generator(output, initial_indent_level_);
880
881 Print(message, generator);
882
883 // Output false if the generator failed internally.
884 return !generator.failed();
885 }
886
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)887 bool TextFormat::Printer::PrintUnknownFields(
888 const UnknownFieldSet& unknown_fields,
889 io::ZeroCopyOutputStream* output) {
890 TextGenerator generator(output, initial_indent_level_);
891
892 PrintUnknownFields(unknown_fields, generator);
893
894 // Output false if the generator failed internally.
895 return !generator.failed();
896 }
897
Print(const Message & message,TextGenerator & generator)898 void TextFormat::Printer::Print(const Message& message,
899 TextGenerator& generator) {
900 const Reflection* reflection = message.GetReflection();
901 vector<const FieldDescriptor*> fields;
902 reflection->ListFields(message, &fields);
903 for (int i = 0; i < fields.size(); i++) {
904 PrintField(message, reflection, fields[i], generator);
905 }
906 PrintUnknownFields(reflection->GetUnknownFields(message), generator);
907 }
908
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output)909 void TextFormat::Printer::PrintFieldValueToString(
910 const Message& message,
911 const FieldDescriptor* field,
912 int index,
913 string* output) {
914
915 GOOGLE_DCHECK(output) << "output specified is NULL";
916
917 output->clear();
918 io::StringOutputStream output_stream(output);
919 TextGenerator generator(&output_stream, initial_indent_level_);
920
921 PrintFieldValue(message, message.GetReflection(), field, index, generator);
922 }
923
PrintField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator)924 void TextFormat::Printer::PrintField(const Message& message,
925 const Reflection* reflection,
926 const FieldDescriptor* field,
927 TextGenerator& generator) {
928 if (use_short_repeated_primitives_ &&
929 field->is_repeated() &&
930 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
931 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
932 PrintShortRepeatedField(message, reflection, field, generator);
933 return;
934 }
935
936 int count = 0;
937
938 if (field->is_repeated()) {
939 count = reflection->FieldSize(message, field);
940 } else if (reflection->HasField(message, field)) {
941 count = 1;
942 }
943
944 for (int j = 0; j < count; ++j) {
945 PrintFieldName(message, reflection, field, generator);
946
947 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
948 if (single_line_mode_) {
949 generator.Print(" { ");
950 } else {
951 generator.Print(" {\n");
952 generator.Indent();
953 }
954 } else {
955 generator.Print(": ");
956 }
957
958 // Write the field value.
959 int field_index = j;
960 if (!field->is_repeated()) {
961 field_index = -1;
962 }
963
964 PrintFieldValue(message, reflection, field, field_index, generator);
965
966 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
967 if (single_line_mode_) {
968 generator.Print("} ");
969 } else {
970 generator.Outdent();
971 generator.Print("}\n");
972 }
973 } else {
974 if (single_line_mode_) {
975 generator.Print(" ");
976 } else {
977 generator.Print("\n");
978 }
979 }
980 }
981 }
982
PrintShortRepeatedField(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator)983 void TextFormat::Printer::PrintShortRepeatedField(const Message& message,
984 const Reflection* reflection,
985 const FieldDescriptor* field,
986 TextGenerator& generator) {
987 // Print primitive repeated field in short form.
988 PrintFieldName(message, reflection, field, generator);
989
990 int size = reflection->FieldSize(message, field);
991 generator.Print(": [");
992 for (int i = 0; i < size; i++) {
993 if (i > 0) generator.Print(", ");
994 PrintFieldValue(message, reflection, field, i, generator);
995 }
996 if (single_line_mode_) {
997 generator.Print("] ");
998 } else {
999 generator.Print("]\n");
1000 }
1001 }
1002
PrintFieldName(const Message & message,const Reflection * reflection,const FieldDescriptor * field,TextGenerator & generator)1003 void TextFormat::Printer::PrintFieldName(const Message& message,
1004 const Reflection* reflection,
1005 const FieldDescriptor* field,
1006 TextGenerator& generator) {
1007 if (field->is_extension()) {
1008 generator.Print("[");
1009 // We special-case MessageSet elements for compatibility with proto1.
1010 if (field->containing_type()->options().message_set_wire_format()
1011 && field->type() == FieldDescriptor::TYPE_MESSAGE
1012 && field->is_optional()
1013 && field->extension_scope() == field->message_type()) {
1014 generator.Print(field->message_type()->full_name());
1015 } else {
1016 generator.Print(field->full_name());
1017 }
1018 generator.Print("]");
1019 } else {
1020 if (field->type() == FieldDescriptor::TYPE_GROUP) {
1021 // Groups must be serialized with their original capitalization.
1022 generator.Print(field->message_type()->name());
1023 } else {
1024 generator.Print(field->name());
1025 }
1026 }
1027 }
1028
PrintFieldValue(const Message & message,const Reflection * reflection,const FieldDescriptor * field,int index,TextGenerator & generator)1029 void TextFormat::Printer::PrintFieldValue(
1030 const Message& message,
1031 const Reflection* reflection,
1032 const FieldDescriptor* field,
1033 int index,
1034 TextGenerator& generator) {
1035 GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1036 << "Index must be -1 for non-repeated fields";
1037
1038 switch (field->cpp_type()) {
1039 #define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING) \
1040 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
1041 generator.Print(TO_STRING(field->is_repeated() ? \
1042 reflection->GetRepeated##METHOD(message, field, index) : \
1043 reflection->Get##METHOD(message, field))); \
1044 break; \
1045
1046 OUTPUT_FIELD( INT32, Int32, SimpleItoa);
1047 OUTPUT_FIELD( INT64, Int64, SimpleItoa);
1048 OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
1049 OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
1050 OUTPUT_FIELD( FLOAT, Float, SimpleFtoa);
1051 OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
1052 #undef OUTPUT_FIELD
1053
1054 case FieldDescriptor::CPPTYPE_STRING: {
1055 string scratch;
1056 const string& value = field->is_repeated() ?
1057 reflection->GetRepeatedStringReference(
1058 message, field, index, &scratch) :
1059 reflection->GetStringReference(message, field, &scratch);
1060
1061 generator.Print("\"");
1062 if (utf8_string_escaping_) {
1063 generator.Print(strings::Utf8SafeCEscape(value));
1064 } else {
1065 generator.Print(CEscape(value));
1066 }
1067 generator.Print("\"");
1068
1069 break;
1070 }
1071
1072 case FieldDescriptor::CPPTYPE_BOOL:
1073 if (field->is_repeated()) {
1074 generator.Print(reflection->GetRepeatedBool(message, field, index)
1075 ? "true" : "false");
1076 } else {
1077 generator.Print(reflection->GetBool(message, field)
1078 ? "true" : "false");
1079 }
1080 break;
1081
1082 case FieldDescriptor::CPPTYPE_ENUM:
1083 generator.Print(field->is_repeated() ?
1084 reflection->GetRepeatedEnum(message, field, index)->name() :
1085 reflection->GetEnum(message, field)->name());
1086 break;
1087
1088 case FieldDescriptor::CPPTYPE_MESSAGE:
1089 Print(field->is_repeated() ?
1090 reflection->GetRepeatedMessage(message, field, index) :
1091 reflection->GetMessage(message, field),
1092 generator);
1093 break;
1094 }
1095 }
1096
Print(const Message & message,io::ZeroCopyOutputStream * output)1097 /* static */ bool TextFormat::Print(const Message& message,
1098 io::ZeroCopyOutputStream* output) {
1099 return Printer().Print(message, output);
1100 }
1101
PrintUnknownFields(const UnknownFieldSet & unknown_fields,io::ZeroCopyOutputStream * output)1102 /* static */ bool TextFormat::PrintUnknownFields(
1103 const UnknownFieldSet& unknown_fields,
1104 io::ZeroCopyOutputStream* output) {
1105 return Printer().PrintUnknownFields(unknown_fields, output);
1106 }
1107
PrintToString(const Message & message,string * output)1108 /* static */ bool TextFormat::PrintToString(
1109 const Message& message, string* output) {
1110 return Printer().PrintToString(message, output);
1111 }
1112
PrintUnknownFieldsToString(const UnknownFieldSet & unknown_fields,string * output)1113 /* static */ bool TextFormat::PrintUnknownFieldsToString(
1114 const UnknownFieldSet& unknown_fields, string* output) {
1115 return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1116 }
1117
PrintFieldValueToString(const Message & message,const FieldDescriptor * field,int index,string * output)1118 /* static */ void TextFormat::PrintFieldValueToString(
1119 const Message& message,
1120 const FieldDescriptor* field,
1121 int index,
1122 string* output) {
1123 return Printer().PrintFieldValueToString(message, field, index, output);
1124 }
1125
ParseFieldValueFromString(const string & input,const FieldDescriptor * field,Message * message)1126 /* static */ bool TextFormat::ParseFieldValueFromString(
1127 const string& input,
1128 const FieldDescriptor* field,
1129 Message* message) {
1130 return Parser().ParseFieldValueFromString(input, field, message);
1131 }
1132
1133 // Prints an integer as hex with a fixed number of digits dependent on the
1134 // integer type.
1135 template<typename IntType>
PaddedHex(IntType value)1136 static string PaddedHex(IntType value) {
1137 string result;
1138 result.reserve(sizeof(value) * 2);
1139 for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1140 result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1141 }
1142 return result;
1143 }
1144
PrintUnknownFields(const UnknownFieldSet & unknown_fields,TextGenerator & generator)1145 void TextFormat::Printer::PrintUnknownFields(
1146 const UnknownFieldSet& unknown_fields, TextGenerator& generator) {
1147 for (int i = 0; i < unknown_fields.field_count(); i++) {
1148 const UnknownField& field = unknown_fields.field(i);
1149 string field_number = SimpleItoa(field.number());
1150
1151 switch (field.type()) {
1152 case UnknownField::TYPE_VARINT:
1153 generator.Print(field_number);
1154 generator.Print(": ");
1155 generator.Print(SimpleItoa(field.varint()));
1156 if (single_line_mode_) {
1157 generator.Print(" ");
1158 } else {
1159 generator.Print("\n");
1160 }
1161 break;
1162 case UnknownField::TYPE_FIXED32: {
1163 generator.Print(field_number);
1164 generator.Print(": 0x");
1165 char buffer[kFastToBufferSize];
1166 generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
1167 if (single_line_mode_) {
1168 generator.Print(" ");
1169 } else {
1170 generator.Print("\n");
1171 }
1172 break;
1173 }
1174 case UnknownField::TYPE_FIXED64: {
1175 generator.Print(field_number);
1176 generator.Print(": 0x");
1177 char buffer[kFastToBufferSize];
1178 generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
1179 if (single_line_mode_) {
1180 generator.Print(" ");
1181 } else {
1182 generator.Print("\n");
1183 }
1184 break;
1185 }
1186 case UnknownField::TYPE_LENGTH_DELIMITED: {
1187 generator.Print(field_number);
1188 const string& value = field.length_delimited();
1189 UnknownFieldSet embedded_unknown_fields;
1190 if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1191 // This field is parseable as a Message.
1192 // So it is probably an embedded message.
1193 if (single_line_mode_) {
1194 generator.Print(" { ");
1195 } else {
1196 generator.Print(" {\n");
1197 generator.Indent();
1198 }
1199 PrintUnknownFields(embedded_unknown_fields, generator);
1200 if (single_line_mode_) {
1201 generator.Print("} ");
1202 } else {
1203 generator.Outdent();
1204 generator.Print("}\n");
1205 }
1206 } else {
1207 // This field is not parseable as a Message.
1208 // So it is probably just a plain string.
1209 generator.Print(": \"");
1210 generator.Print(CEscape(value));
1211 generator.Print("\"");
1212 if (single_line_mode_) {
1213 generator.Print(" ");
1214 } else {
1215 generator.Print("\n");
1216 }
1217 }
1218 break;
1219 }
1220 case UnknownField::TYPE_GROUP:
1221 generator.Print(field_number);
1222 if (single_line_mode_) {
1223 generator.Print(" { ");
1224 } else {
1225 generator.Print(" {\n");
1226 generator.Indent();
1227 }
1228 PrintUnknownFields(field.group(), generator);
1229 if (single_line_mode_) {
1230 generator.Print("} ");
1231 } else {
1232 generator.Outdent();
1233 generator.Print("}\n");
1234 }
1235 break;
1236 }
1237 }
1238 }
1239
1240 } // namespace protobuf
1241 } // namespace google
1242