1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google LLC. All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7
8 // Author: jschorr@google.com (Joseph Schorr)
9 // Based on original Protocol Buffers design by
10 // Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // Utilities for printing and parsing protocol messages in a human-readable,
13 // text-based format.
14
15 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
16 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
17
18 #include <atomic>
19 #include <memory>
20 #include <string>
21 #include <vector>
22
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/container/flat_hash_set.h"
25 #include "absl/strings/cord.h"
26 #include "absl/strings/string_view.h"
27 #include "google/protobuf/descriptor.h"
28 #include "google/protobuf/message.h"
29 #include "google/protobuf/message_lite.h"
30 #include "google/protobuf/port.h"
31
32
33 // Must be included last.
34 #include "google/protobuf/port_def.inc"
35
36 #ifdef SWIG
37 #error "You cannot SWIG proto headers"
38 #endif
39
40 namespace google {
41 namespace protobuf {
42
43 namespace internal {
44 PROTOBUF_EXPORT extern const char kDebugStringSilentMarker[1];
45 PROTOBUF_EXPORT extern const char kDebugStringSilentMarkerForDetection[3];
46
47 PROTOBUF_EXPORT extern std::atomic<bool> enable_debug_string_safe_format;
48 PROTOBUF_EXPORT int64_t GetRedactedFieldCount();
49 PROTOBUF_EXPORT bool ShouldRedactField(const FieldDescriptor* field);
50
51 // This enum contains all the APIs that convert protos to human-readable
52 // formats. A higher-level API must correspond to a greater number than any
53 // lower-level APIs it calls under the hood (e.g kDebugString >
54 // kMemberPrintToString > kPrintWithStream).
55 PROTOBUF_EXPORT enum class FieldReporterLevel {
56 kNoReport = 0,
57 kPrintMessage = 1,
58 kPrintWithGenerator = 2,
59 kPrintWithStream = 3,
60 kMemberPrintToString = 4,
61 kStaticPrintToString = 5,
62 kAbslStringify = 6,
63 kShortFormat = 7,
64 kUtf8Format = 8,
65 kDebugString = 12,
66 kShortDebugString = 13,
67 kUtf8DebugString = 14,
68 kUnredactedDebugFormatForTest = 15,
69 kUnredactedShortDebugFormatForTest = 16,
70 kUnredactedUtf8DebugFormatForTest = 17
71 };
72
73 } // namespace internal
74
75 namespace io {
76 class ErrorCollector; // tokenizer.h
77 }
78
79 namespace python {
80 namespace cmessage {
81 class PythonFieldValuePrinter;
82 }
83 } // namespace python
84
85 namespace internal {
86 // Enum used to set printing options for StringifyMessage.
87 PROTOBUF_EXPORT enum class Option;
88
89 // Converts a protobuf message to a string. If enable_safe_format is true,
90 // sensitive fields are redacted, and a per-process randomized prefix is
91 // inserted.
92 PROTOBUF_EXPORT std::string StringifyMessage(const Message& message,
93 Option option,
94 FieldReporterLevel reporter_level,
95 bool enable_safe_format);
96
97 class UnsetFieldsMetadataTextFormatTestUtil;
98 class UnsetFieldsMetadataMessageDifferencerTestUtil;
99 } // namespace internal
100
101 // This class implements protocol buffer text format, colloquially known as text
102 // proto. Printing and parsing protocol messages in text format is useful for
103 // debugging and human editing of messages.
104 //
105 // This class is really a namespace that contains only static methods.
106 class PROTOBUF_EXPORT TextFormat {
107 public:
108 TextFormat(const TextFormat&) = delete;
109 TextFormat& operator=(const TextFormat&) = delete;
110
111 // Outputs a textual representation of the given message to the given
112 // output stream. Returns false if printing fails.
113 static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
114
115 // Print the fields in an UnknownFieldSet. They are printed by tag number
116 // only. Embedded messages are heuristically identified by attempting to
117 // parse them. Returns false if printing fails.
118 static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
119 io::ZeroCopyOutputStream* output);
120
121 // Like Print(), but outputs directly to a string.
122 // Note: output will be cleared prior to printing, and will be left empty
123 // even if printing fails. Returns false if printing fails.
124 static bool PrintToString(const Message& message, std::string* output);
125
126 // Like PrintUnknownFields(), but outputs directly to a string. Returns
127 // false if printing fails.
128 static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
129 std::string* output);
130
131 // Outputs a textual representation of the value of the field supplied on
132 // the message supplied. For non-repeated fields, an index of -1 must
133 // be supplied. Note that this method will print the default value for a
134 // field if it is not set.
135 static void PrintFieldValueToString(const Message& message,
136 const FieldDescriptor* field, int index,
137 std::string* output);
138
139 // Forward declare `Printer` for `BaseTextGenerator::MarkerToken` which
140 // restricts some methods of `BaseTextGenerator` to the class `Printer`.
141 class Printer;
142
143 class PROTOBUF_EXPORT BaseTextGenerator {
144 private:
145 // Passkey (go/totw/134#what-about-stdshared-ptr) that allows `Printer`
146 // (but not derived classes) to call `PrintMaybeWithMarker` and its
147 // `Printer::TextGenerator` to overload it.
148 // This prevents users from bypassing the marker generation.
149 class MarkerToken {
150 private:
151 explicit MarkerToken() = default; // 'explicit' prevents aggregate init.
152 friend class Printer;
153 };
154
155 public:
156 virtual ~BaseTextGenerator();
157
Indent()158 virtual void Indent() {}
Outdent()159 virtual void Outdent() {}
160 // Returns the current indentation size in characters.
GetCurrentIndentationSize()161 virtual size_t GetCurrentIndentationSize() const { return 0; }
162
163 // Print text to the output stream.
164 virtual void Print(const char* text, size_t size) = 0;
165
PrintString(absl::string_view str)166 void PrintString(absl::string_view str) { Print(str.data(), str.size()); }
167
168 template <size_t n>
PrintLiteral(const char (& text)[n])169 void PrintLiteral(const char (&text)[n]) {
170 Print(text, n - 1); // n includes the terminating zero character.
171 }
172
173 // Internal to Printer, access regulated by `MarkerToken`.
PrintMaybeWithMarker(MarkerToken,absl::string_view text)174 virtual void PrintMaybeWithMarker(MarkerToken, absl::string_view text) {
175 Print(text.data(), text.size());
176 }
177
178 // Internal to Printer, access regulated by `MarkerToken`.
PrintMaybeWithMarker(MarkerToken,absl::string_view text_head,absl::string_view text_tail)179 virtual void PrintMaybeWithMarker(MarkerToken, absl::string_view text_head,
180 absl::string_view text_tail) {
181 Print(text_head.data(), text_head.size());
182 Print(text_tail.data(), text_tail.size());
183 }
184
185 friend class Printer;
186 };
187
188 // The default printer that converts scalar values from fields into their
189 // string representation.
190 // You can derive from this FastFieldValuePrinter if you want to have fields
191 // to be printed in a different way and register it at the Printer.
192 class PROTOBUF_EXPORT FastFieldValuePrinter {
193 public:
194 FastFieldValuePrinter();
195 FastFieldValuePrinter(const FastFieldValuePrinter&) = delete;
196 FastFieldValuePrinter& operator=(const FastFieldValuePrinter&) = delete;
197 virtual ~FastFieldValuePrinter();
198 virtual void PrintBool(bool val, BaseTextGenerator* generator) const;
199 virtual void PrintInt32(int32_t val, BaseTextGenerator* generator) const;
200 virtual void PrintUInt32(uint32_t val, BaseTextGenerator* generator) const;
201 virtual void PrintInt64(int64_t val, BaseTextGenerator* generator) const;
202 virtual void PrintUInt64(uint64_t val, BaseTextGenerator* generator) const;
203 virtual void PrintFloat(float val, BaseTextGenerator* generator) const;
204 virtual void PrintDouble(double val, BaseTextGenerator* generator) const;
205 virtual void PrintString(const std::string& val,
206 BaseTextGenerator* generator) const;
207 virtual void PrintBytes(const std::string& val,
208 BaseTextGenerator* generator) const;
209 virtual void PrintEnum(int32_t val, const std::string& name,
210 BaseTextGenerator* generator) const;
211 virtual void PrintFieldName(const Message& message, int field_index,
212 int field_count, const Reflection* reflection,
213 const FieldDescriptor* field,
214 BaseTextGenerator* generator) const;
215 virtual void PrintFieldName(const Message& message,
216 const Reflection* reflection,
217 const FieldDescriptor* field,
218 BaseTextGenerator* generator) const;
219 virtual void PrintMessageStart(const Message& message, int field_index,
220 int field_count, bool single_line_mode,
221 BaseTextGenerator* generator) const;
222 // Allows to override the logic on how to print the content of a message.
223 // Return false to use the default printing logic. Note that it is legal for
224 // this function to print something and then return false to use the default
225 // content printing (although at that point it would behave similarly to
226 // PrintMessageStart).
227 virtual bool PrintMessageContent(const Message& message, int field_index,
228 int field_count, bool single_line_mode,
229 BaseTextGenerator* generator) const;
230 virtual void PrintMessageEnd(const Message& message, int field_index,
231 int field_count, bool single_line_mode,
232 BaseTextGenerator* generator) const;
233 };
234
235 // Deprecated: please use FastFieldValuePrinter instead.
236 class PROTOBUF_EXPORT FieldValuePrinter {
237 public:
238 FieldValuePrinter();
239 FieldValuePrinter(const FieldValuePrinter&) = delete;
240 FieldValuePrinter& operator=(const FieldValuePrinter&) = delete;
241 virtual ~FieldValuePrinter();
242 virtual std::string PrintBool(bool val) const;
243 virtual std::string PrintInt32(int32_t val) const;
244 virtual std::string PrintUInt32(uint32_t val) const;
245 virtual std::string PrintInt64(int64_t val) const;
246 virtual std::string PrintUInt64(uint64_t val) const;
247 virtual std::string PrintFloat(float val) const;
248 virtual std::string PrintDouble(double val) const;
249 virtual std::string PrintString(const std::string& val) const;
250 virtual std::string PrintBytes(const std::string& val) const;
251 virtual std::string PrintEnum(int32_t val, const std::string& name) const;
252 virtual std::string PrintFieldName(const Message& message,
253 const Reflection* reflection,
254 const FieldDescriptor* field) const;
255 virtual std::string PrintMessageStart(const Message& message,
256 int field_index, int field_count,
257 bool single_line_mode) const;
258 virtual std::string PrintMessageEnd(const Message& message, int field_index,
259 int field_count,
260 bool single_line_mode) const;
261
262 private:
263 FastFieldValuePrinter delegate_;
264 };
265
266 class PROTOBUF_EXPORT MessagePrinter {
267 public:
MessagePrinter()268 MessagePrinter() {}
269 MessagePrinter(const MessagePrinter&) = delete;
270 MessagePrinter& operator=(const MessagePrinter&) = delete;
~MessagePrinter()271 virtual ~MessagePrinter() {}
272 virtual void Print(const Message& message, bool single_line_mode,
273 BaseTextGenerator* generator) const = 0;
274 };
275
276 // Interface that Printers or Parsers can use to find extensions, or types
277 // referenced in Any messages.
278 class PROTOBUF_EXPORT Finder {
279 public:
280 virtual ~Finder();
281
282 // Try to find an extension of *message by fully-qualified field
283 // name. Returns nullptr if no extension is known for this name or number.
284 // The base implementation uses the extensions already known by the message.
285 virtual const FieldDescriptor* FindExtension(Message* message,
286 const std::string& name) const;
287
288 // Similar to FindExtension, but uses a Descriptor and the extension number
289 // instead of using a Message and the name when doing the look up.
290 virtual const FieldDescriptor* FindExtensionByNumber(
291 const Descriptor* descriptor, int number) const;
292
293 // Find the message type for an Any proto.
294 // Returns nullptr if no message is known for this name.
295 // The base implementation only accepts prefixes of type.googleprod.com/ or
296 // type.googleapis.com/, and searches the DescriptorPool of the parent
297 // message.
298 virtual const Descriptor* FindAnyType(const Message& message,
299 const std::string& prefix,
300 const std::string& name) const;
301
302 // Find the message factory for the given extension field. This can be used
303 // to generalize the Parser to add extension fields to a message in the same
304 // way as the "input" message for the Parser.
305 virtual MessageFactory* FindExtensionFactory(
306 const FieldDescriptor* field) const;
307 };
308
309 // Class for those users which require more fine-grained control over how
310 // a protobuffer message is printed out.
311 class PROTOBUF_EXPORT Printer {
312 public:
313 Printer();
314
315 // Like TextFormat::Print
316 bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
317 // Like TextFormat::Printer::Print but takes an additional
318 // internal::FieldReporterLevel
319 bool Print(const Message& message, io::ZeroCopyOutputStream* output,
320 internal::FieldReporterLevel reporter) const;
321 // Like TextFormat::PrintUnknownFields
322 bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
323 io::ZeroCopyOutputStream* output) const;
324 // Like TextFormat::PrintToString
325 bool PrintToString(const Message& message, std::string* output) const;
326 // Like TextFormat::PrintUnknownFieldsToString
327 bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
328 std::string* output) const;
329 // Like TextFormat::PrintFieldValueToString
330 void PrintFieldValueToString(const Message& message,
331 const FieldDescriptor* field, int index,
332 std::string* output) const;
333
334 // Adjust the initial indent level of all output. Each indent level is
335 // equal to two spaces.
SetInitialIndentLevel(int indent_level)336 void SetInitialIndentLevel(int indent_level) {
337 initial_indent_level_ = indent_level;
338 }
339
340 // If printing in single line mode, then the entire message will be output
341 // on a single line with no line breaks.
SetSingleLineMode(bool single_line_mode)342 void SetSingleLineMode(bool single_line_mode) {
343 single_line_mode_ = single_line_mode;
344 }
345
IsInSingleLineMode()346 bool IsInSingleLineMode() const { return single_line_mode_; }
347
348 // If use_field_number is true, uses field number instead of field name.
SetUseFieldNumber(bool use_field_number)349 void SetUseFieldNumber(bool use_field_number) {
350 use_field_number_ = use_field_number;
351 }
352
353 // Set true to print repeated primitives in a format like:
354 // field_name: [1, 2, 3, 4]
355 // instead of printing each value on its own line. Short format applies
356 // only to primitive values -- i.e. everything except strings and
357 // sub-messages/groups.
SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)358 void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
359 use_short_repeated_primitives_ = use_short_repeated_primitives;
360 }
361
362 // Set true to output UTF-8 instead of ASCII. The only difference
363 // is that bytes >= 0x80 in string fields will not be escaped,
364 // because they are assumed to be part of UTF-8 multi-byte
365 // sequences. This will change the default FastFieldValuePrinter.
366 void SetUseUtf8StringEscaping(bool as_utf8);
367
368 // Set the default FastFieldValuePrinter that is used for all fields that
369 // don't have a field-specific printer registered.
370 // Takes ownership of the printer.
371 void SetDefaultFieldValuePrinter(const FastFieldValuePrinter* printer);
372
373 [[deprecated("Please use FastFieldValuePrinter")]] void
374 SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
375
376 // Sets whether we want to hide unknown fields or not.
377 // Usually unknown fields are printed in a generic way that includes the
378 // tag number of the field instead of field name. However, sometimes it
379 // is useful to be able to print the message without unknown fields (e.g.
380 // for the python protobuf version to maintain consistency between its pure
381 // python and c++ implementations).
SetHideUnknownFields(bool hide)382 void SetHideUnknownFields(bool hide) { hide_unknown_fields_ = hide; }
383
384 // If print_message_fields_in_index_order is true, fields of a proto message
385 // will be printed using the order defined in source code instead of the
386 // field number, extensions will be printed at the end of the message
387 // and their relative order is determined by the extension number.
388 // By default, use the field number order.
SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)389 void SetPrintMessageFieldsInIndexOrder(
390 bool print_message_fields_in_index_order) {
391 print_message_fields_in_index_order_ =
392 print_message_fields_in_index_order;
393 }
394
395 // If expand==true, expand google.protobuf.Any payloads. The output
396 // will be of form
397 // [type_url] { <value_printed_in_text> }
398 //
399 // If expand==false, print Any using the default printer. The output will
400 // look like
401 // type_url: "<type_url>" value: "serialized_content"
SetExpandAny(bool expand)402 void SetExpandAny(bool expand) { expand_any_ = expand; }
403
404 // Set how parser finds message for Any payloads.
SetFinder(const Finder * finder)405 void SetFinder(const Finder* finder) { finder_ = finder; }
406
407 // If non-zero, we truncate all string fields that are longer than
408 // this threshold. This is useful when the proto message has very long
409 // strings, e.g., dump of encoded image file.
410 //
411 // NOTE: Setting a non-zero value breaks round-trip safe
412 // property of TextFormat::Printer. That is, from the printed message, we
413 // cannot fully recover the original string field any more.
SetTruncateStringFieldLongerThan(const int64_t truncate_string_field_longer_than)414 void SetTruncateStringFieldLongerThan(
415 const int64_t truncate_string_field_longer_than) {
416 truncate_string_field_longer_than_ = truncate_string_field_longer_than;
417 }
418
419 // Sets whether sensitive fields found in the message will be reported or
420 // not.
SetReportSensitiveFields(internal::FieldReporterLevel reporter)421 void SetReportSensitiveFields(internal::FieldReporterLevel reporter) {
422 if (report_sensitive_fields_ < reporter) {
423 report_sensitive_fields_ = reporter;
424 }
425 }
426
427 // Register a custom field-specific FastFieldValuePrinter for fields
428 // with a particular FieldDescriptor.
429 // Returns "true" if the registration succeeded, or "false", if there is
430 // already a printer for that FieldDescriptor.
431 // Takes ownership of the printer on successful registration.
432 bool RegisterFieldValuePrinter(const FieldDescriptor* field,
433 const FastFieldValuePrinter* printer);
434
435 [[deprecated("Please use FastFieldValuePrinter")]] bool
436 RegisterFieldValuePrinter(const FieldDescriptor* field,
437 const FieldValuePrinter* printer);
438
439 // Register a custom message-specific MessagePrinter for messages with a
440 // particular Descriptor.
441 // Returns "true" if the registration succeeded, or "false" if there is
442 // already a printer for that Descriptor.
443 // Takes ownership of the printer on successful registration.
444 bool RegisterMessagePrinter(const Descriptor* descriptor,
445 const MessagePrinter* printer);
446
447 // Default printing for messages, which allows registered message printers
448 // to fall back to default printing without losing the ability to control
449 // sub-messages or fields.
450 // NOTE: If the passed in `text_generaor` is not actually the current
451 // `TextGenerator`, then no output will be produced.
452 void PrintMessage(const Message& message,
453 BaseTextGenerator* generator) const;
454
455 private:
456 friend std::string Message::DebugString() const;
457 friend std::string Message::ShortDebugString() const;
458 friend std::string Message::Utf8DebugString() const;
459 friend std::string internal::StringifyMessage(
460 const Message& message, internal::Option option,
461 internal::FieldReporterLevel reporter_level, bool enable_safe_format);
462
463 // Sets whether silent markers will be inserted.
SetInsertSilentMarker(bool v)464 void SetInsertSilentMarker(bool v) { insert_silent_marker_ = v; }
465
466 // Sets whether strings will be redacted and thus unparsable.
SetRedactDebugString(bool redact)467 void SetRedactDebugString(bool redact) { redact_debug_string_ = redact; }
468
469 // Sets whether the output string should be made non-deterministic.
470 // This discourages equality checks based on serialized string comparisons.
SetRandomizeDebugString(bool randomize)471 void SetRandomizeDebugString(bool randomize) {
472 randomize_debug_string_ = randomize;
473 }
474
475 // Forward declaration of an internal class used to print the text
476 // output to the OutputStream (see text_format.cc for implementation).
477 class TextGenerator;
478 using MarkerToken = BaseTextGenerator::MarkerToken;
479
480 // Forward declaration of an internal class used to print field values for
481 // DebugString APIs (see text_format.cc for implementation).
482 class DebugStringFieldValuePrinter;
483
484 // Forward declaration of an internal class used to print UTF-8 escaped
485 // strings (see text_format.cc for implementation).
486 class FastFieldValuePrinterUtf8Escaping;
487
488 // Internal Print method, used for writing to the OutputStream via
489 // the TextGenerator class.
490 void Print(const Message& message, BaseTextGenerator* generator) const;
491
492 // Print a single field.
493 void PrintField(const Message& message, const Reflection* reflection,
494 const FieldDescriptor* field,
495 BaseTextGenerator* generator) const;
496
497 // Print a repeated primitive field in short form.
498 void PrintShortRepeatedField(const Message& message,
499 const Reflection* reflection,
500 const FieldDescriptor* field,
501 BaseTextGenerator* generator) const;
502
503 // Print the name of a field -- i.e. everything that comes before the
504 // ':' for a single name/value pair.
505 void PrintFieldName(const Message& message, int field_index,
506 int field_count, const Reflection* reflection,
507 const FieldDescriptor* field,
508 BaseTextGenerator* generator) const;
509
510 // Outputs a textual representation of the value of the field supplied on
511 // the message supplied or the default value if not set.
512 void PrintFieldValue(const Message& message, const Reflection* reflection,
513 const FieldDescriptor* field, int index,
514 BaseTextGenerator* generator) const;
515
516 // Print the fields in an UnknownFieldSet. They are printed by tag number
517 // only. Embedded messages are heuristically identified by attempting to
518 // parse them (subject to the recursion budget).
519 void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
520 BaseTextGenerator* generator,
521 int recursion_budget) const;
522
523 bool PrintAny(const Message& message, BaseTextGenerator* generator) const;
524
525 // Try to redact a field value based on the annotations associated with
526 // the field. This function returns true if it redacts the field value.
527 bool TryRedactFieldValue(const Message& message,
528 const FieldDescriptor* field,
529 BaseTextGenerator* generator,
530 bool insert_value_separator) const;
531
GetFieldPrinter(const FieldDescriptor * field)532 const FastFieldValuePrinter* GetFieldPrinter(
533 const FieldDescriptor* field) const {
534 auto it = custom_printers_.find(field);
535 return it == custom_printers_.end() ? default_field_value_printer_.get()
536 : it->second.get();
537 }
538
539 friend class google::protobuf::python::cmessage::PythonFieldValuePrinter;
540 static void HardenedPrintString(absl::string_view src,
541 TextFormat::BaseTextGenerator* generator);
542
543 int initial_indent_level_;
544 bool single_line_mode_;
545 bool use_field_number_;
546 bool use_short_repeated_primitives_;
547 bool insert_silent_marker_;
548 bool redact_debug_string_;
549 bool randomize_debug_string_;
550 internal::FieldReporterLevel report_sensitive_fields_;
551 bool hide_unknown_fields_;
552 bool print_message_fields_in_index_order_;
553 bool expand_any_;
554 int64_t truncate_string_field_longer_than_;
555
556 std::unique_ptr<const FastFieldValuePrinter> default_field_value_printer_;
557 absl::flat_hash_map<const FieldDescriptor*,
558 std::unique_ptr<const FastFieldValuePrinter>>
559 custom_printers_;
560
561 absl::flat_hash_map<const Descriptor*,
562 std::unique_ptr<const MessagePrinter>>
563 custom_message_printers_;
564
565 const Finder* finder_;
566 };
567
568 // Parses a text-format protocol message from the given input stream to
569 // the given message object. This function parses the human-readable
570 // serialization format written by Print(). Returns true on success. The
571 // message is cleared first, even if the function fails -- See Merge() to
572 // avoid this behavior.
573 //
574 // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
575 //
576 // One common use for this function is parsing handwritten strings in test
577 // code.
578 //
579 // If you would like to read a protocol buffer serialized in the
580 // (non-human-readable) binary wire format, see
581 // google::protobuf::MessageLite::ParseFromString().
582 static bool Parse(io::ZeroCopyInputStream* input, Message* output);
583 // Like Parse(), but reads directly from a string.
584 static bool ParseFromString(absl::string_view input, Message* output);
585 // Like Parse(), but reads directly from a Cord.
586 static bool ParseFromCord(const absl::Cord& input, Message* output);
587
588 // Like Parse(), but the data is merged into the given message, as if
589 // using Message::MergeFrom().
590 static bool Merge(io::ZeroCopyInputStream* input, Message* output);
591 // Like Merge(), but reads directly from a string.
592 static bool MergeFromString(absl::string_view input, Message* output);
593
594 // Parse the given text as a single field value and store it into the
595 // given field of the given message. If the field is a repeated field,
596 // the new value will be added to the end
597 static bool ParseFieldValueFromString(absl::string_view input,
598 const FieldDescriptor* field,
599 Message* message);
600
601 // A location in the parsed text.
602 struct ParseLocation {
603 int line;
604 int column;
605
ParseLocationParseLocation606 ParseLocation() : line(-1), column(-1) {}
ParseLocationParseLocation607 ParseLocation(int line_param, int column_param)
608 : line(line_param), column(column_param) {}
609 };
610
611 // A range of locations in the parsed text, including `start` and excluding
612 // `end`.
613 struct ParseLocationRange {
614 ParseLocation start;
615 ParseLocation end;
ParseLocationRangeParseLocationRange616 ParseLocationRange() : start(), end() {}
ParseLocationRangeParseLocationRange617 ParseLocationRange(ParseLocation start_param, ParseLocation end_param)
618 : start(start_param), end(end_param) {}
619 };
620
621 // Data structure which is populated with the locations of each field
622 // value parsed from the text.
623 class PROTOBUF_EXPORT ParseInfoTree {
624 public:
625 ParseInfoTree() = default;
626 ParseInfoTree(const ParseInfoTree&) = delete;
627 ParseInfoTree& operator=(const ParseInfoTree&) = delete;
628
629 // Returns the parse location range for index-th value of the field in
630 // the parsed text. If none exists, returns a location with start and end
631 // line -1. Index should be -1 for not-repeated fields.
632 ParseLocationRange GetLocationRange(const FieldDescriptor* field,
633 int index) const;
634
635 // Returns the starting parse location for index-th value of the field in
636 // the parsed text. If none exists, returns a location with line = -1. Index
637 // should be -1 for not-repeated fields.
GetLocation(const FieldDescriptor * field,int index)638 ParseLocation GetLocation(const FieldDescriptor* field, int index) const {
639 return GetLocationRange(field, index).start;
640 }
641
642 // Returns the parse info tree for the given field, which must be a message
643 // type. The nested information tree is owned by the root tree and will be
644 // deleted when it is deleted.
645 ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
646 int index) const;
647
648 private:
649 // Allow the text format parser to record information into the tree.
650 friend class TextFormat;
651
652 // Records the starting and ending locations of a single value for a field.
653 void RecordLocation(const FieldDescriptor* field, ParseLocationRange range);
654
655 // Create and records a nested tree for a nested message field.
656 ParseInfoTree* CreateNested(const FieldDescriptor* field);
657
658 // Defines the map from the index-th field descriptor to its parse location.
659 absl::flat_hash_map<const FieldDescriptor*, std::vector<ParseLocationRange>>
660 locations_;
661 // Defines the map from the index-th field descriptor to the nested parse
662 // info tree.
663 absl::flat_hash_map<const FieldDescriptor*,
664 std::vector<std::unique_ptr<ParseInfoTree>>>
665 nested_;
666 };
667
668 // For more control over parsing, use this class.
669 class PROTOBUF_EXPORT Parser {
670 public:
671 Parser();
672 ~Parser();
673
674 // Like TextFormat::Parse().
675 bool Parse(io::ZeroCopyInputStream* input, Message* output);
676 // Like TextFormat::ParseFromString().
677 bool ParseFromString(absl::string_view input, Message* output);
678 // Like TextFormat::ParseFromCord().
679 bool ParseFromCord(const absl::Cord& input, Message* output);
680 // Like TextFormat::Merge().
681 bool Merge(io::ZeroCopyInputStream* input, Message* output);
682 // Like TextFormat::MergeFromString().
683 bool MergeFromString(absl::string_view input, Message* output);
684
685 // Set where to report parse errors. If nullptr (the default), errors will
686 // be printed to stderr.
RecordErrorsTo(io::ErrorCollector * error_collector)687 void RecordErrorsTo(io::ErrorCollector* error_collector) {
688 error_collector_ = error_collector;
689 }
690
691 // Set how parser finds extensions. If nullptr (the default), the
692 // parser will use the standard Reflection object associated with
693 // the message being parsed.
SetFinder(const Finder * finder)694 void SetFinder(const Finder* finder) { finder_ = finder; }
695
696 // Sets where location information about the parse will be written. If
697 // nullptr
698 // (the default), then no location will be written.
WriteLocationsTo(ParseInfoTree * tree)699 void WriteLocationsTo(ParseInfoTree* tree) { parse_info_tree_ = tree; }
700
701 // Normally parsing fails if, after parsing, output->IsInitialized()
702 // returns false. Call AllowPartialMessage(true) to skip this check.
AllowPartialMessage(bool allow)703 void AllowPartialMessage(bool allow) { allow_partial_ = allow; }
704
705 // Allow field names to be matched case-insensitively.
706 // This is not advisable if there are fields that only differ in case, or
707 // if you want to enforce writing in the canonical form.
708 // This is 'false' by default.
AllowCaseInsensitiveField(bool allow)709 void AllowCaseInsensitiveField(bool allow) {
710 allow_case_insensitive_field_ = allow;
711 }
712
713 // Like TextFormat::ParseFieldValueFromString
714 bool ParseFieldValueFromString(absl::string_view input,
715 const FieldDescriptor* field,
716 Message* output);
717
718 // When an unknown extension is met, parsing will fail if this option is
719 // set to false (the default). If true, unknown extensions will be ignored
720 // and a warning message will be generated.
721 // Beware! Setting this option true may hide some errors (e.g. spelling
722 // error on extension name). This allows data loss; unlike binary format,
723 // text format cannot preserve unknown extensions. Avoid using this option
724 // if possible.
AllowUnknownExtension(bool allow)725 void AllowUnknownExtension(bool allow) { allow_unknown_extension_ = allow; }
726
727 // When an unknown field is met, parsing will fail if this option is set
728 // to false (the default). If true, unknown fields will be ignored and
729 // a warning message will be generated.
730 // Beware! Setting this option true may hide some errors (e.g. spelling
731 // error on field name). This allows data loss; unlike binary format, text
732 // format cannot preserve unknown fields. Avoid using this option
733 // if possible.
AllowUnknownField(bool allow)734 void AllowUnknownField(bool allow) { allow_unknown_field_ = allow; }
735
736
AllowFieldNumber(bool allow)737 void AllowFieldNumber(bool allow) { allow_field_number_ = allow; }
738
739 // Sets maximum recursion depth which parser can use. This is effectively
740 // the maximum allowed nesting of proto messages.
SetRecursionLimit(int limit)741 void SetRecursionLimit(int limit) { recursion_limit_ = limit; }
742
743 // Metadata representing all the fields that were explicitly unset in
744 // textproto. Example:
745 // "some_int_field: 0"
746 // where some_int_field has implicit presence.
747 //
748 // This class should only be used to pass data between TextFormat and the
749 // MessageDifferencer.
750 class UnsetFieldsMetadata {
751 public:
752 UnsetFieldsMetadata() = default;
753
754 private:
755 using Id = std::pair<const Message*, const FieldDescriptor*>;
756 // Return an id representing the unset field in the given message.
757 static Id GetUnsetFieldId(const Message& message,
758 const FieldDescriptor& fd);
759
760 // List of ids of explicitly unset proto fields.
761 absl::flat_hash_set<Id> ids_;
762
763 friend class ::google::protobuf::internal::
764 UnsetFieldsMetadataMessageDifferencerTestUtil;
765 friend class ::google::protobuf::internal::UnsetFieldsMetadataTextFormatTestUtil;
766 friend class ::google::protobuf::util::MessageDifferencer;
767 friend class ::google::protobuf::TextFormat::Parser;
768 };
769
770 // If called, the parser will report the parsed fields that had no
771 // effect on the resulting proto (for example, fields with no presence that
772 // were set to their default value). These can be passed to the Partially()
773 // matcher as an indicator to explicitly check these fields are missing
774 // in the actual.
OutputNoOpFields(UnsetFieldsMetadata * no_op_fields)775 void OutputNoOpFields(UnsetFieldsMetadata* no_op_fields) {
776 no_op_fields_ = no_op_fields;
777 }
778
779 private:
780 // Forward declaration of an internal class used to parse text
781 // representations (see text_format.cc for implementation).
782 class ParserImpl;
783
784 // Like TextFormat::Merge(). The provided implementation is used
785 // to do the parsing.
786 bool MergeUsingImpl(io::ZeroCopyInputStream* input, Message* output,
787 ParserImpl* parser_impl);
788
789 io::ErrorCollector* error_collector_;
790 const Finder* finder_;
791 ParseInfoTree* parse_info_tree_;
792 bool allow_partial_;
793 bool allow_case_insensitive_field_;
794 bool allow_unknown_field_;
795 bool allow_unknown_extension_;
796 bool allow_unknown_enum_;
797 bool allow_field_number_;
798 bool allow_relaxed_whitespace_;
799 bool allow_singular_overwrites_;
800 int recursion_limit_;
801 UnsetFieldsMetadata* no_op_fields_ = nullptr;
802 };
803
804
805 private:
806 // Hack: ParseInfoTree declares TextFormat as a friend which should extend
807 // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
808 // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
809 // helpers for ParserImpl to call methods of ParseInfoTree.
810 static inline void RecordLocation(ParseInfoTree* info_tree,
811 const FieldDescriptor* field,
812 ParseLocationRange location);
813 static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
814 const FieldDescriptor* field);
815 // To reduce stack frame bloat we use an out-of-line function to print
816 // strings. This avoid local std::string temporaries.
817 template <typename... T>
818 static void OutOfLinePrintString(BaseTextGenerator* generator,
819 const T&... values);
820 };
821
822
RecordLocation(ParseInfoTree * info_tree,const FieldDescriptor * field,ParseLocationRange location)823 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
824 const FieldDescriptor* field,
825 ParseLocationRange location) {
826 info_tree->RecordLocation(field, location);
827 }
828
CreateNested(ParseInfoTree * info_tree,const FieldDescriptor * field)829 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
830 ParseInfoTree* info_tree, const FieldDescriptor* field) {
831 return info_tree->CreateNested(field);
832 }
833
834 } // namespace protobuf
835 } // namespace google
836
837 #include "google/protobuf/port_undef.inc"
838
839 #endif // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
840