1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // Utilities for printing and parsing protocol messages in a human-readable,
36 // text-based format.
37
38 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
39 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
40
41 #include <map>
42 #include <memory>
43 #ifndef _SHARED_PTR_H
44 #include <google/protobuf/stubs/shared_ptr.h>
45 #endif
46 #include <string>
47 #include <vector>
48
49 #include <google/protobuf/stubs/common.h>
50 #include <google/protobuf/descriptor.h>
51 #include <google/protobuf/message.h>
52
53 namespace google {
54 namespace protobuf {
55
56 namespace io {
57 class ErrorCollector; // tokenizer.h
58 }
59
60 // This class implements protocol buffer text format. Printing and parsing
61 // protocol messages in text format is useful for debugging and human editing
62 // of messages.
63 //
64 // This class is really a namespace that contains only static methods.
65 class LIBPROTOBUF_EXPORT TextFormat {
66 public:
67 // Outputs a textual representation of the given message to the given
68 // output stream.
69 static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
70
71 // Print the fields in an UnknownFieldSet. They are printed by tag number
72 // only. Embedded messages are heuristically identified by attempting to
73 // parse them.
74 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
75 io::ZeroCopyOutputStream* output);
76
77 // Like Print(), but outputs directly to a string.
78 static bool PrintToString(const Message& message, string* output);
79
80 // Like PrintUnknownFields(), but outputs directly to a string.
81 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
82 string* output);
83
84 // Outputs a textual representation of the value of the field supplied on
85 // the message supplied. For non-repeated fields, an index of -1 must
86 // be supplied. Note that this method will print the default value for a
87 // field if it is not set.
88 static void PrintFieldValueToString(const Message& message,
89 const FieldDescriptor* field,
90 int index,
91 string* output);
92
93 // The default printer that converts scalar values from fields into
94 // their string representation.
95 // You can derive from this FieldValuePrinter if you want to have
96 // fields to be printed in a different way and register it at the
97 // Printer.
98 class LIBPROTOBUF_EXPORT FieldValuePrinter {
99 public:
100 FieldValuePrinter();
101 virtual ~FieldValuePrinter();
102 virtual string PrintBool(bool val) const;
103 virtual string PrintInt32(int32 val) const;
104 virtual string PrintUInt32(uint32 val) const;
105 virtual string PrintInt64(int64 val) const;
106 virtual string PrintUInt64(uint64 val) const;
107 virtual string PrintFloat(float val) const;
108 virtual string PrintDouble(double val) const;
109 virtual string PrintString(const string& val) const;
110 virtual string PrintBytes(const string& val) const;
111 virtual string PrintEnum(int32 val, const string& name) const;
112 virtual string PrintFieldName(const Message& message,
113 const Reflection* reflection,
114 const FieldDescriptor* field) const;
115 virtual string PrintMessageStart(const Message& message,
116 int field_index,
117 int field_count,
118 bool single_line_mode) const;
119 virtual string PrintMessageEnd(const Message& message,
120 int field_index,
121 int field_count,
122 bool single_line_mode) const;
123
124 private:
125 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldValuePrinter);
126 };
127
128 // Class for those users which require more fine-grained control over how
129 // a protobuffer message is printed out.
130 class LIBPROTOBUF_EXPORT Printer {
131 public:
132 Printer();
133 ~Printer();
134
135 // Like TextFormat::Print
136 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
137 // Like TextFormat::PrintUnknownFields
138 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
139 io::ZeroCopyOutputStream* output) const;
140 // Like TextFormat::PrintToString
141 bool PrintToString(const Message& message, string* output) const;
142 // Like TextFormat::PrintUnknownFieldsToString
143 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
144 string* output) const;
145 // Like TextFormat::PrintFieldValueToString
146 void PrintFieldValueToString(const Message& message,
147 const FieldDescriptor* field,
148 int index,
149 string* output) const;
150
151 // Adjust the initial indent level of all output. Each indent level is
152 // equal to two spaces.
SetInitialIndentLevel(int indent_level)153 void SetInitialIndentLevel(int indent_level) {
154 initial_indent_level_ = indent_level;
155 }
156
157 // If printing in single line mode, then the entire message will be output
158 // on a single line with no line breaks.
SetSingleLineMode(bool single_line_mode)159 void SetSingleLineMode(bool single_line_mode) {
160 single_line_mode_ = single_line_mode;
161 }
162
IsInSingleLineMode()163 bool IsInSingleLineMode() {
164 return single_line_mode_;
165 }
166
167 // If use_field_number is true, uses field number instead of field name.
SetUseFieldNumber(bool use_field_number)168 void SetUseFieldNumber(bool use_field_number) {
169 use_field_number_ = use_field_number;
170 }
171
172 // Set true to print repeated primitives in a format like:
173 // field_name: [1, 2, 3, 4]
174 // instead of printing each value on its own line. Short format applies
175 // only to primitive values -- i.e. everything except strings and
176 // sub-messages/groups.
SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)177 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
178 use_short_repeated_primitives_ = use_short_repeated_primitives;
179 }
180
181 // Set true to output UTF-8 instead of ASCII. The only difference
182 // is that bytes >= 0x80 in string fields will not be escaped,
183 // because they are assumed to be part of UTF-8 multi-byte
184 // sequences. This will change the default FieldValuePrinter.
185 void SetUseUtf8StringEscaping(bool as_utf8);
186
187 // Set the default FieldValuePrinter that is used for all fields that
188 // don't have a field-specific printer registered.
189 // Takes ownership of the printer.
190 void SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
191
192 // Sets whether we want to hide unknown fields or not.
193 // Usually unknown fields are printed in a generic way that includes the
194 // tag number of the field instead of field name. However, sometimes it
195 // is useful to be able to print the message without unknown fields (e.g.
196 // for the python protobuf version to maintain consistency between its pure
197 // python and c++ implementations).
SetHideUnknownFields(bool hide)198 void SetHideUnknownFields(bool hide) {
199 hide_unknown_fields_ = hide;
200 }
201
202 // If print_message_fields_in_index_order is true, print fields of a proto
203 // message using the order defined in source code instead of the field
204 // number. By default, use the field number order.
SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)205 void SetPrintMessageFieldsInIndexOrder(
206 bool print_message_fields_in_index_order) {
207 print_message_fields_in_index_order_ =
208 print_message_fields_in_index_order;
209 }
210
211 // If expand==true, expand google.protobuf.Any payloads. The output
212 // will be of form
213 // [type_url] { <value_printed_in_text> }
214 //
215 // If expand==false, print Any using the default printer. The output will
216 // look like
217 // type_url: "<type_url>" value: "serialized_content"
SetExpandAny(bool expand)218 void SetExpandAny(bool expand) {
219 expand_any_ = expand;
220 }
221
222 // If non-zero, we truncate all string fields that are longer than this
223 // threshold. This is useful when the proto message has very long strings,
224 // e.g., dump of encoded image file.
225 //
226 // NOTE(hfgong): Setting a non-zero value breaks round-trip safe
227 // property of TextFormat::Printer. That is, from the printed message, we
228 // cannot fully recover the original string field any more.
SetTruncateStringFieldLongerThan(const int64 truncate_string_field_longer_than)229 void SetTruncateStringFieldLongerThan(
230 const int64 truncate_string_field_longer_than) {
231 truncate_string_field_longer_than_ = truncate_string_field_longer_than;
232 }
233
234 // Register a custom field-specific FieldValuePrinter for fields
235 // with a particular FieldDescriptor.
236 // Returns "true" if the registration succeeded, or "false", if there is
237 // already a printer for that FieldDescriptor.
238 // Takes ownership of the printer on successful registration.
239 bool RegisterFieldValuePrinter(const FieldDescriptor* field,
240 const FieldValuePrinter* printer);
241
242 private:
243 // Forward declaration of an internal class used to print the text
244 // output to the OutputStream (see text_format.cc for implementation).
245 class TextGenerator;
246
247 // Internal Print method, used for writing to the OutputStream via
248 // the TextGenerator class.
249 void Print(const Message& message,
250 TextGenerator& generator) const;
251
252 // Print a single field.
253 void PrintField(const Message& message,
254 const Reflection* reflection,
255 const FieldDescriptor* field,
256 TextGenerator& generator) const;
257
258 // Print a repeated primitive field in short form.
259 void PrintShortRepeatedField(const Message& message,
260 const Reflection* reflection,
261 const FieldDescriptor* field,
262 TextGenerator& generator) const;
263
264 // Print the name of a field -- i.e. everything that comes before the
265 // ':' for a single name/value pair.
266 void PrintFieldName(const Message& message,
267 const Reflection* reflection,
268 const FieldDescriptor* field,
269 TextGenerator& generator) const;
270
271 // Outputs a textual representation of the value of the field supplied on
272 // the message supplied or the default value if not set.
273 void PrintFieldValue(const Message& message,
274 const Reflection* reflection,
275 const FieldDescriptor* field,
276 int index,
277 TextGenerator& generator) const;
278
279 // Print the fields in an UnknownFieldSet. They are printed by tag number
280 // only. Embedded messages are heuristically identified by attempting to
281 // parse them.
282 void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
283 TextGenerator& generator) const;
284
285 bool PrintAny(const Message& message, TextGenerator& generator) const;
286
287 int initial_indent_level_;
288
289 bool single_line_mode_;
290
291 bool use_field_number_;
292
293 bool use_short_repeated_primitives_;
294
295 bool hide_unknown_fields_;
296
297 bool print_message_fields_in_index_order_;
298
299 bool expand_any_;
300
301 int64 truncate_string_field_longer_than_;
302
303 google::protobuf::scoped_ptr<const FieldValuePrinter> default_field_value_printer_;
304 typedef map<const FieldDescriptor*,
305 const FieldValuePrinter*> CustomPrinterMap;
306 CustomPrinterMap custom_printers_;
307 };
308
309 // Parses a text-format protocol message from the given input stream to
310 // the given message object. This function parses the human-readable format
311 // written by Print(). Returns true on success. The message is cleared first,
312 // even if the function fails -- See Merge() to avoid this behavior.
313 //
314 // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
315 //
316 // One use for this function is parsing handwritten strings in test code.
317 // Another use is to parse the output from google::protobuf::Message::DebugString()
318 // (or ShortDebugString()), because these functions output using
319 // google::protobuf::TextFormat::Print().
320 //
321 // If you would like to read a protocol buffer serialized in the
322 // (non-human-readable) binary wire format, see
323 // google::protobuf::MessageLite::ParseFromString().
324 static bool Parse(io::ZeroCopyInputStream* input, Message* output);
325 // Like Parse(), but reads directly from a string.
326 static bool ParseFromString(const string& input, Message* output);
327
328 // Like Parse(), but the data is merged into the given message, as if
329 // using Message::MergeFrom().
330 static bool Merge(io::ZeroCopyInputStream* input, Message* output);
331 // Like Merge(), but reads directly from a string.
332 static bool MergeFromString(const string& input, Message* output);
333
334 // Parse the given text as a single field value and store it into the
335 // given field of the given message. If the field is a repeated field,
336 // the new value will be added to the end
337 static bool ParseFieldValueFromString(const string& input,
338 const FieldDescriptor* field,
339 Message* message);
340
341 // Interface that TextFormat::Parser can use to find extensions.
342 // This class may be extended in the future to find more information
343 // like fields, etc.
344 class LIBPROTOBUF_EXPORT Finder {
345 public:
346 virtual ~Finder();
347
348 // Try to find an extension of *message by fully-qualified field
349 // name. Returns NULL if no extension is known for this name or number.
350 virtual const FieldDescriptor* FindExtension(
351 Message* message,
352 const string& name) const = 0;
353 };
354
355 // A location in the parsed text.
356 struct ParseLocation {
357 int line;
358 int column;
359
ParseLocationParseLocation360 ParseLocation() : line(-1), column(-1) {}
ParseLocationParseLocation361 ParseLocation(int line_param, int column_param)
362 : line(line_param), column(column_param) {}
363 };
364
365 // Data structure which is populated with the locations of each field
366 // value parsed from the text.
367 class LIBPROTOBUF_EXPORT ParseInfoTree {
368 public:
369 ParseInfoTree();
370 ~ParseInfoTree();
371
372 // Returns the parse location for index-th value of the field in the parsed
373 // text. If none exists, returns a location with line = -1. Index should be
374 // -1 for not-repeated fields.
375 ParseLocation GetLocation(const FieldDescriptor* field, int index) const;
376
377 // Returns the parse info tree for the given field, which must be a message
378 // type. The nested information tree is owned by the root tree and will be
379 // deleted when it is deleted.
380 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
381 int index) const;
382
383 private:
384 // Allow the text format parser to record information into the tree.
385 friend class TextFormat;
386
387 // Records the starting location of a single value for a field.
388 void RecordLocation(const FieldDescriptor* field, ParseLocation location);
389
390 // Create and records a nested tree for a nested message field.
391 ParseInfoTree* CreateNested(const FieldDescriptor* field);
392
393 // Defines the map from the index-th field descriptor to its parse location.
394 typedef map<const FieldDescriptor*, vector<ParseLocation> > LocationMap;
395
396 // Defines the map from the index-th field descriptor to the nested parse
397 // info tree.
398 typedef map<const FieldDescriptor*, vector<ParseInfoTree*> > NestedMap;
399
400 LocationMap locations_;
401 NestedMap nested_;
402
403 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParseInfoTree);
404 };
405
406 // For more control over parsing, use this class.
407 class LIBPROTOBUF_EXPORT Parser {
408 public:
409 Parser();
410 ~Parser();
411
412 // Like TextFormat::Parse().
413 bool Parse(io::ZeroCopyInputStream* input, Message* output);
414 // Like TextFormat::ParseFromString().
415 bool ParseFromString(const string& input, Message* output);
416 // Like TextFormat::Merge().
417 bool Merge(io::ZeroCopyInputStream* input, Message* output);
418 // Like TextFormat::MergeFromString().
419 bool MergeFromString(const string& input, Message* output);
420
421 // Set where to report parse errors. If NULL (the default), errors will
422 // be printed to stderr.
RecordErrorsTo(io::ErrorCollector * error_collector)423 void RecordErrorsTo(io::ErrorCollector* error_collector) {
424 error_collector_ = error_collector;
425 }
426
427 // Set how parser finds extensions. If NULL (the default), the
428 // parser will use the standard Reflection object associated with
429 // the message being parsed.
SetFinder(Finder * finder)430 void SetFinder(Finder* finder) {
431 finder_ = finder;
432 }
433
434 // Sets where location information about the parse will be written. If NULL
435 // (the default), then no location will be written.
WriteLocationsTo(ParseInfoTree * tree)436 void WriteLocationsTo(ParseInfoTree* tree) {
437 parse_info_tree_ = tree;
438 }
439
440 // Normally parsing fails if, after parsing, output->IsInitialized()
441 // returns false. Call AllowPartialMessage(true) to skip this check.
AllowPartialMessage(bool allow)442 void AllowPartialMessage(bool allow) {
443 allow_partial_ = allow;
444 }
445
446 // Allow field names to be matched case-insensitively.
447 // This is not advisable if there are fields that only differ in case, or
448 // if you want to enforce writing in the canonical form.
449 // This is 'false' by default.
AllowCaseInsensitiveField(bool allow)450 void AllowCaseInsensitiveField(bool allow) {
451 allow_case_insensitive_field_ = allow;
452 }
453
454 // Like TextFormat::ParseFieldValueFromString
455 bool ParseFieldValueFromString(const string& input,
456 const FieldDescriptor* field,
457 Message* output);
458
459
AllowFieldNumber(bool allow)460 void AllowFieldNumber(bool allow) {
461 allow_field_number_ = allow;
462 }
463
464 private:
465 // Forward declaration of an internal class used to parse text
466 // representations (see text_format.cc for implementation).
467 class ParserImpl;
468
469 // Like TextFormat::Merge(). The provided implementation is used
470 // to do the parsing.
471 bool MergeUsingImpl(io::ZeroCopyInputStream* input,
472 Message* output,
473 ParserImpl* parser_impl);
474
475 io::ErrorCollector* error_collector_;
476 Finder* finder_;
477 ParseInfoTree* parse_info_tree_;
478 bool allow_partial_;
479 bool allow_case_insensitive_field_;
480 bool allow_unknown_field_;
481 bool allow_unknown_enum_;
482 bool allow_field_number_;
483 bool allow_relaxed_whitespace_;
484 bool allow_singular_overwrites_;
485 };
486
487
488 private:
489 // Hack: ParseInfoTree declares TextFormat as a friend which should extend
490 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
491 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
492 // helpers for ParserImpl to call methods of ParseInfoTree.
493 static inline void RecordLocation(ParseInfoTree* info_tree,
494 const FieldDescriptor* field,
495 ParseLocation location);
496 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
497 const FieldDescriptor* field);
498
499 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextFormat);
500 };
501
RecordLocation(ParseInfoTree * info_tree,const FieldDescriptor * field,ParseLocation location)502 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
503 const FieldDescriptor* field,
504 ParseLocation location) {
505 info_tree->RecordLocation(field, location);
506 }
507
508
CreateNested(ParseInfoTree * info_tree,const FieldDescriptor * field)509 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
510 ParseInfoTree* info_tree, const FieldDescriptor* field) {
511 return info_tree->CreateNested(field);
512 }
513
514 } // namespace protobuf
515
516 } // namespace google
517 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
518