• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google LLC.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: jschorr@google.com (Joseph Schorr)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // Utilities for printing and parsing protocol messages in a human-readable,
13 // text-based format.
14 
15 #ifndef GOOGLE_PROTOBUF_TEXT_FORMAT_H__
16 #define GOOGLE_PROTOBUF_TEXT_FORMAT_H__
17 
18 #include <atomic>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 #include "absl/container/flat_hash_map.h"
24 #include "absl/container/flat_hash_set.h"
25 #include "absl/strings/cord.h"
26 #include "absl/strings/string_view.h"
27 #include "google/protobuf/descriptor.h"
28 #include "google/protobuf/message.h"
29 #include "google/protobuf/message_lite.h"
30 #include "google/protobuf/port.h"
31 
32 
33 // Must be included last.
34 #include "google/protobuf/port_def.inc"
35 
36 #ifdef SWIG
37 #error "You cannot SWIG proto headers"
38 #endif
39 
40 namespace google {
41 namespace protobuf {
42 
43 namespace internal {
44 PROTOBUF_EXPORT extern const char kDebugStringSilentMarker[1];
45 PROTOBUF_EXPORT extern const char kDebugStringSilentMarkerForDetection[3];
46 
47 PROTOBUF_EXPORT extern std::atomic<bool> enable_debug_string_safe_format;
48 PROTOBUF_EXPORT int64_t GetRedactedFieldCount();
49 PROTOBUF_EXPORT bool ShouldRedactField(const FieldDescriptor* field);
50 
51 // This enum contains all the APIs that convert protos to human-readable
52 // formats. A higher-level API must correspond to a greater number than any
53 // lower-level APIs it calls under the hood (e.g kDebugString >
54 // kMemberPrintToString > kPrintWithStream).
55 PROTOBUF_EXPORT enum class FieldReporterLevel {
56   kNoReport = 0,
57   kPrintMessage = 1,
58   kPrintWithGenerator = 2,
59   kPrintWithStream = 3,
60   kMemberPrintToString = 4,
61   kStaticPrintToString = 5,
62   kAbslStringify = 6,
63   kShortFormat = 7,
64   kUtf8Format = 8,
65   kDebugString = 12,
66   kShortDebugString = 13,
67   kUtf8DebugString = 14,
68   kUnredactedDebugFormatForTest = 15,
69   kUnredactedShortDebugFormatForTest = 16,
70   kUnredactedUtf8DebugFormatForTest = 17
71 };
72 
73 }  // namespace internal
74 
75 namespace io {
76 class ErrorCollector;  // tokenizer.h
77 }
78 
79 namespace python {
80 namespace cmessage {
81 class PythonFieldValuePrinter;
82 }
83 }  // namespace python
84 
85 namespace internal {
86 // Enum used to set printing options for StringifyMessage.
87 PROTOBUF_EXPORT enum class Option;
88 
89 // Converts a protobuf message to a string. If enable_safe_format is true,
90 // sensitive fields are redacted, and a per-process randomized prefix is
91 // inserted.
92 PROTOBUF_EXPORT std::string StringifyMessage(const Message& message,
93                                              Option option,
94                                              FieldReporterLevel reporter_level,
95                                              bool enable_safe_format);
96 
97 class UnsetFieldsMetadataTextFormatTestUtil;
98 class UnsetFieldsMetadataMessageDifferencerTestUtil;
99 }  // namespace internal
100 
101 // This class implements protocol buffer text format, colloquially known as text
102 // proto.  Printing and parsing protocol messages in text format is useful for
103 // debugging and human editing of messages.
104 //
105 // This class is really a namespace that contains only static methods.
106 class PROTOBUF_EXPORT TextFormat {
107  public:
108   TextFormat(const TextFormat&) = delete;
109   TextFormat& operator=(const TextFormat&) = delete;
110 
111   // Outputs a textual representation of the given message to the given
112   // output stream. Returns false if printing fails.
113   static bool Print(const Message& message, io::ZeroCopyOutputStream* output);
114 
115   // Print the fields in an UnknownFieldSet.  They are printed by tag number
116   // only.  Embedded messages are heuristically identified by attempting to
117   // parse them. Returns false if printing fails.
118   static bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
119                                  io::ZeroCopyOutputStream* output);
120 
121   // Like Print(), but outputs directly to a string.
122   // Note: output will be cleared prior to printing, and will be left empty
123   // even if printing fails. Returns false if printing fails.
124   static bool PrintToString(const Message& message, std::string* output);
125 
126   // Like PrintUnknownFields(), but outputs directly to a string. Returns
127   // false if printing fails.
128   static bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
129                                          std::string* output);
130 
131   // Outputs a textual representation of the value of the field supplied on
132   // the message supplied. For non-repeated fields, an index of -1 must
133   // be supplied. Note that this method will print the default value for a
134   // field if it is not set.
135   static void PrintFieldValueToString(const Message& message,
136                                       const FieldDescriptor* field, int index,
137                                       std::string* output);
138 
139   // Forward declare `Printer` for `BaseTextGenerator::MarkerToken` which
140   // restricts some methods of `BaseTextGenerator` to the class `Printer`.
141   class Printer;
142 
143   class PROTOBUF_EXPORT BaseTextGenerator {
144    private:
145     // Passkey (go/totw/134#what-about-stdshared-ptr) that allows `Printer`
146     // (but not derived classes) to call `PrintMaybeWithMarker` and its
147     // `Printer::TextGenerator` to overload it.
148     // This prevents users from bypassing the marker generation.
149     class MarkerToken {
150      private:
151       explicit MarkerToken() = default;  // 'explicit' prevents aggregate init.
152       friend class Printer;
153     };
154 
155    public:
156     virtual ~BaseTextGenerator();
157 
Indent()158     virtual void Indent() {}
Outdent()159     virtual void Outdent() {}
160     // Returns the current indentation size in characters.
GetCurrentIndentationSize()161     virtual size_t GetCurrentIndentationSize() const { return 0; }
162 
163     // Print text to the output stream.
164     virtual void Print(const char* text, size_t size) = 0;
165 
PrintString(absl::string_view str)166     void PrintString(absl::string_view str) { Print(str.data(), str.size()); }
167 
168     template <size_t n>
PrintLiteral(const char (& text)[n])169     void PrintLiteral(const char (&text)[n]) {
170       Print(text, n - 1);  // n includes the terminating zero character.
171     }
172 
173     // Internal to Printer, access regulated by `MarkerToken`.
PrintMaybeWithMarker(MarkerToken,absl::string_view text)174     virtual void PrintMaybeWithMarker(MarkerToken, absl::string_view text) {
175       Print(text.data(), text.size());
176     }
177 
178     // Internal to Printer, access regulated by `MarkerToken`.
PrintMaybeWithMarker(MarkerToken,absl::string_view text_head,absl::string_view text_tail)179     virtual void PrintMaybeWithMarker(MarkerToken, absl::string_view text_head,
180                                       absl::string_view text_tail) {
181       Print(text_head.data(), text_head.size());
182       Print(text_tail.data(), text_tail.size());
183     }
184 
185     friend class Printer;
186   };
187 
188   // The default printer that converts scalar values from fields into their
189   // string representation.
190   // You can derive from this FastFieldValuePrinter if you want to have fields
191   // to be printed in a different way and register it at the Printer.
192   class PROTOBUF_EXPORT FastFieldValuePrinter {
193    public:
194     FastFieldValuePrinter();
195     FastFieldValuePrinter(const FastFieldValuePrinter&) = delete;
196     FastFieldValuePrinter& operator=(const FastFieldValuePrinter&) = delete;
197     virtual ~FastFieldValuePrinter();
198     virtual void PrintBool(bool val, BaseTextGenerator* generator) const;
199     virtual void PrintInt32(int32_t val, BaseTextGenerator* generator) const;
200     virtual void PrintUInt32(uint32_t val, BaseTextGenerator* generator) const;
201     virtual void PrintInt64(int64_t val, BaseTextGenerator* generator) const;
202     virtual void PrintUInt64(uint64_t val, BaseTextGenerator* generator) const;
203     virtual void PrintFloat(float val, BaseTextGenerator* generator) const;
204     virtual void PrintDouble(double val, BaseTextGenerator* generator) const;
205     virtual void PrintString(const std::string& val,
206                              BaseTextGenerator* generator) const;
207     virtual void PrintBytes(const std::string& val,
208                             BaseTextGenerator* generator) const;
209     virtual void PrintEnum(int32_t val, const std::string& name,
210                            BaseTextGenerator* generator) const;
211     virtual void PrintFieldName(const Message& message, int field_index,
212                                 int field_count, const Reflection* reflection,
213                                 const FieldDescriptor* field,
214                                 BaseTextGenerator* generator) const;
215     virtual void PrintFieldName(const Message& message,
216                                 const Reflection* reflection,
217                                 const FieldDescriptor* field,
218                                 BaseTextGenerator* generator) const;
219     virtual void PrintMessageStart(const Message& message, int field_index,
220                                    int field_count, bool single_line_mode,
221                                    BaseTextGenerator* generator) const;
222     // Allows to override the logic on how to print the content of a message.
223     // Return false to use the default printing logic. Note that it is legal for
224     // this function to print something and then return false to use the default
225     // content printing (although at that point it would behave similarly to
226     // PrintMessageStart).
227     virtual bool PrintMessageContent(const Message& message, int field_index,
228                                      int field_count, bool single_line_mode,
229                                      BaseTextGenerator* generator) const;
230     virtual void PrintMessageEnd(const Message& message, int field_index,
231                                  int field_count, bool single_line_mode,
232                                  BaseTextGenerator* generator) const;
233   };
234 
235   // Deprecated: please use FastFieldValuePrinter instead.
236   class PROTOBUF_EXPORT FieldValuePrinter {
237    public:
238     FieldValuePrinter();
239     FieldValuePrinter(const FieldValuePrinter&) = delete;
240     FieldValuePrinter& operator=(const FieldValuePrinter&) = delete;
241     virtual ~FieldValuePrinter();
242     virtual std::string PrintBool(bool val) const;
243     virtual std::string PrintInt32(int32_t val) const;
244     virtual std::string PrintUInt32(uint32_t val) const;
245     virtual std::string PrintInt64(int64_t val) const;
246     virtual std::string PrintUInt64(uint64_t val) const;
247     virtual std::string PrintFloat(float val) const;
248     virtual std::string PrintDouble(double val) const;
249     virtual std::string PrintString(const std::string& val) const;
250     virtual std::string PrintBytes(const std::string& val) const;
251     virtual std::string PrintEnum(int32_t val, const std::string& name) const;
252     virtual std::string PrintFieldName(const Message& message,
253                                        const Reflection* reflection,
254                                        const FieldDescriptor* field) const;
255     virtual std::string PrintMessageStart(const Message& message,
256                                           int field_index, int field_count,
257                                           bool single_line_mode) const;
258     virtual std::string PrintMessageEnd(const Message& message, int field_index,
259                                         int field_count,
260                                         bool single_line_mode) const;
261 
262    private:
263     FastFieldValuePrinter delegate_;
264   };
265 
266   class PROTOBUF_EXPORT MessagePrinter {
267    public:
MessagePrinter()268     MessagePrinter() {}
269     MessagePrinter(const MessagePrinter&) = delete;
270     MessagePrinter& operator=(const MessagePrinter&) = delete;
~MessagePrinter()271     virtual ~MessagePrinter() {}
272     virtual void Print(const Message& message, bool single_line_mode,
273                        BaseTextGenerator* generator) const = 0;
274   };
275 
276   // Interface that Printers or Parsers can use to find extensions, or types
277   // referenced in Any messages.
278   class PROTOBUF_EXPORT Finder {
279    public:
280     virtual ~Finder();
281 
282     // Try to find an extension of *message by fully-qualified field
283     // name.  Returns nullptr if no extension is known for this name or number.
284     // The base implementation uses the extensions already known by the message.
285     virtual const FieldDescriptor* FindExtension(Message* message,
286                                                  const std::string& name) const;
287 
288     // Similar to FindExtension, but uses a Descriptor and the extension number
289     // instead of using a Message and the name when doing the look up.
290     virtual const FieldDescriptor* FindExtensionByNumber(
291         const Descriptor* descriptor, int number) const;
292 
293     // Find the message type for an Any proto.
294     // Returns nullptr if no message is known for this name.
295     // The base implementation only accepts prefixes of type.googleprod.com/ or
296     // type.googleapis.com/, and searches the DescriptorPool of the parent
297     // message.
298     virtual const Descriptor* FindAnyType(const Message& message,
299                                           const std::string& prefix,
300                                           const std::string& name) const;
301 
302     // Find the message factory for the given extension field. This can be used
303     // to generalize the Parser to add extension fields to a message in the same
304     // way as the "input" message for the Parser.
305     virtual MessageFactory* FindExtensionFactory(
306         const FieldDescriptor* field) const;
307   };
308 
309   // Class for those users which require more fine-grained control over how
310   // a protobuffer message is printed out.
311   class PROTOBUF_EXPORT Printer {
312    public:
313     Printer();
314 
315     // Like TextFormat::Print
316     bool Print(const Message& message, io::ZeroCopyOutputStream* output) const;
317     // Like TextFormat::Printer::Print but takes an additional
318     // internal::FieldReporterLevel
319     bool Print(const Message& message, io::ZeroCopyOutputStream* output,
320                internal::FieldReporterLevel reporter) const;
321     // Like TextFormat::PrintUnknownFields
322     bool PrintUnknownFields(const UnknownFieldSet& unknown_fields,
323                             io::ZeroCopyOutputStream* output) const;
324     // Like TextFormat::PrintToString
325     bool PrintToString(const Message& message, std::string* output) const;
326     // Like TextFormat::PrintUnknownFieldsToString
327     bool PrintUnknownFieldsToString(const UnknownFieldSet& unknown_fields,
328                                     std::string* output) const;
329     // Like TextFormat::PrintFieldValueToString
330     void PrintFieldValueToString(const Message& message,
331                                  const FieldDescriptor* field, int index,
332                                  std::string* output) const;
333 
334     // Adjust the initial indent level of all output.  Each indent level is
335     // equal to two spaces.
SetInitialIndentLevel(int indent_level)336     void SetInitialIndentLevel(int indent_level) {
337       initial_indent_level_ = indent_level;
338     }
339 
340     // If printing in single line mode, then the entire message will be output
341     // on a single line with no line breaks.
SetSingleLineMode(bool single_line_mode)342     void SetSingleLineMode(bool single_line_mode) {
343       single_line_mode_ = single_line_mode;
344     }
345 
IsInSingleLineMode()346     bool IsInSingleLineMode() const { return single_line_mode_; }
347 
348     // If use_field_number is true, uses field number instead of field name.
SetUseFieldNumber(bool use_field_number)349     void SetUseFieldNumber(bool use_field_number) {
350       use_field_number_ = use_field_number;
351     }
352 
353     // Set true to print repeated primitives in a format like:
354     //   field_name: [1, 2, 3, 4]
355     // instead of printing each value on its own line.  Short format applies
356     // only to primitive values -- i.e. everything except strings and
357     // sub-messages/groups.
SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives)358     void SetUseShortRepeatedPrimitives(bool use_short_repeated_primitives) {
359       use_short_repeated_primitives_ = use_short_repeated_primitives;
360     }
361 
362     // Set true to output UTF-8 instead of ASCII.  The only difference
363     // is that bytes >= 0x80 in string fields will not be escaped,
364     // because they are assumed to be part of UTF-8 multi-byte
365     // sequences. This will change the default FastFieldValuePrinter.
366     void SetUseUtf8StringEscaping(bool as_utf8);
367 
368     // Set the default FastFieldValuePrinter that is used for all fields that
369     // don't have a field-specific printer registered.
370     // Takes ownership of the printer.
371     void SetDefaultFieldValuePrinter(const FastFieldValuePrinter* printer);
372 
373     [[deprecated("Please use FastFieldValuePrinter")]] void
374     SetDefaultFieldValuePrinter(const FieldValuePrinter* printer);
375 
376     // Sets whether we want to hide unknown fields or not.
377     // Usually unknown fields are printed in a generic way that includes the
378     // tag number of the field instead of field name. However, sometimes it
379     // is useful to be able to print the message without unknown fields (e.g.
380     // for the python protobuf version to maintain consistency between its pure
381     // python and c++ implementations).
SetHideUnknownFields(bool hide)382     void SetHideUnknownFields(bool hide) { hide_unknown_fields_ = hide; }
383 
384     // If print_message_fields_in_index_order is true, fields of a proto message
385     // will be printed using the order defined in source code instead of the
386     // field number, extensions will be printed at the end of the message
387     // and their relative order is determined by the extension number.
388     // By default, use the field number order.
SetPrintMessageFieldsInIndexOrder(bool print_message_fields_in_index_order)389     void SetPrintMessageFieldsInIndexOrder(
390         bool print_message_fields_in_index_order) {
391       print_message_fields_in_index_order_ =
392           print_message_fields_in_index_order;
393     }
394 
395     // If expand==true, expand google.protobuf.Any payloads. The output
396     // will be of form
397     //    [type_url] { <value_printed_in_text> }
398     //
399     // If expand==false, print Any using the default printer. The output will
400     // look like
401     //    type_url: "<type_url>"  value: "serialized_content"
SetExpandAny(bool expand)402     void SetExpandAny(bool expand) { expand_any_ = expand; }
403 
404     // Set how parser finds message for Any payloads.
SetFinder(const Finder * finder)405     void SetFinder(const Finder* finder) { finder_ = finder; }
406 
407     // If non-zero, we truncate all string fields that are  longer than
408     // this threshold.  This is useful when the proto message has very long
409     // strings, e.g., dump of encoded image file.
410     //
411     // NOTE:  Setting a non-zero value breaks round-trip safe
412     // property of TextFormat::Printer.  That is, from the printed message, we
413     // cannot fully recover the original string field any more.
SetTruncateStringFieldLongerThan(const int64_t truncate_string_field_longer_than)414     void SetTruncateStringFieldLongerThan(
415         const int64_t truncate_string_field_longer_than) {
416       truncate_string_field_longer_than_ = truncate_string_field_longer_than;
417     }
418 
419     // Sets whether sensitive fields found in the message will be reported or
420     // not.
SetReportSensitiveFields(internal::FieldReporterLevel reporter)421     void SetReportSensitiveFields(internal::FieldReporterLevel reporter) {
422       if (report_sensitive_fields_ < reporter) {
423         report_sensitive_fields_ = reporter;
424       }
425     }
426 
427     // Register a custom field-specific FastFieldValuePrinter for fields
428     // with a particular FieldDescriptor.
429     // Returns "true" if the registration succeeded, or "false", if there is
430     // already a printer for that FieldDescriptor.
431     // Takes ownership of the printer on successful registration.
432     bool RegisterFieldValuePrinter(const FieldDescriptor* field,
433                                    const FastFieldValuePrinter* printer);
434 
435     [[deprecated("Please use FastFieldValuePrinter")]] bool
436     RegisterFieldValuePrinter(const FieldDescriptor* field,
437                               const FieldValuePrinter* printer);
438 
439     // Register a custom message-specific MessagePrinter for messages with a
440     // particular Descriptor.
441     // Returns "true" if the registration succeeded, or "false" if there is
442     // already a printer for that Descriptor.
443     // Takes ownership of the printer on successful registration.
444     bool RegisterMessagePrinter(const Descriptor* descriptor,
445                                 const MessagePrinter* printer);
446 
447     // Default printing for messages, which allows registered message printers
448     // to fall back to default printing without losing the ability to control
449     // sub-messages or fields.
450     // NOTE: If the passed in `text_generaor` is not actually the current
451     // `TextGenerator`, then no output will be produced.
452     void PrintMessage(const Message& message,
453                       BaseTextGenerator* generator) const;
454 
455    private:
456     friend std::string Message::DebugString() const;
457     friend std::string Message::ShortDebugString() const;
458     friend std::string Message::Utf8DebugString() const;
459     friend std::string internal::StringifyMessage(
460         const Message& message, internal::Option option,
461         internal::FieldReporterLevel reporter_level, bool enable_safe_format);
462 
463     // Sets whether silent markers will be inserted.
SetInsertSilentMarker(bool v)464     void SetInsertSilentMarker(bool v) { insert_silent_marker_ = v; }
465 
466     // Sets whether strings will be redacted and thus unparsable.
SetRedactDebugString(bool redact)467     void SetRedactDebugString(bool redact) { redact_debug_string_ = redact; }
468 
469     // Sets whether the output string should be made non-deterministic.
470     // This discourages equality checks based on serialized string comparisons.
SetRandomizeDebugString(bool randomize)471     void SetRandomizeDebugString(bool randomize) {
472       randomize_debug_string_ = randomize;
473     }
474 
475     // Forward declaration of an internal class used to print the text
476     // output to the OutputStream (see text_format.cc for implementation).
477     class TextGenerator;
478     using MarkerToken = BaseTextGenerator::MarkerToken;
479 
480     // Forward declaration of an internal class used to print field values for
481     // DebugString APIs (see text_format.cc for implementation).
482     class DebugStringFieldValuePrinter;
483 
484     // Forward declaration of an internal class used to print UTF-8 escaped
485     // strings (see text_format.cc for implementation).
486     class FastFieldValuePrinterUtf8Escaping;
487 
488     // Internal Print method, used for writing to the OutputStream via
489     // the TextGenerator class.
490     void Print(const Message& message, BaseTextGenerator* generator) const;
491 
492     // Print a single field.
493     void PrintField(const Message& message, const Reflection* reflection,
494                     const FieldDescriptor* field,
495                     BaseTextGenerator* generator) const;
496 
497     // Print a repeated primitive field in short form.
498     void PrintShortRepeatedField(const Message& message,
499                                  const Reflection* reflection,
500                                  const FieldDescriptor* field,
501                                  BaseTextGenerator* generator) const;
502 
503     // Print the name of a field -- i.e. everything that comes before the
504     // ':' for a single name/value pair.
505     void PrintFieldName(const Message& message, int field_index,
506                         int field_count, const Reflection* reflection,
507                         const FieldDescriptor* field,
508                         BaseTextGenerator* generator) const;
509 
510     // Outputs a textual representation of the value of the field supplied on
511     // the message supplied or the default value if not set.
512     void PrintFieldValue(const Message& message, const Reflection* reflection,
513                          const FieldDescriptor* field, int index,
514                          BaseTextGenerator* generator) const;
515 
516     // Print the fields in an UnknownFieldSet.  They are printed by tag number
517     // only.  Embedded messages are heuristically identified by attempting to
518     // parse them (subject to the recursion budget).
519     void PrintUnknownFields(const UnknownFieldSet& unknown_fields,
520                             BaseTextGenerator* generator,
521                             int recursion_budget) const;
522 
523     bool PrintAny(const Message& message, BaseTextGenerator* generator) const;
524 
525     // Try to redact a field value based on the annotations associated with
526     // the field. This function returns true if it redacts the field value.
527     bool TryRedactFieldValue(const Message& message,
528                              const FieldDescriptor* field,
529                              BaseTextGenerator* generator,
530                              bool insert_value_separator) const;
531 
GetFieldPrinter(const FieldDescriptor * field)532     const FastFieldValuePrinter* GetFieldPrinter(
533         const FieldDescriptor* field) const {
534       auto it = custom_printers_.find(field);
535       return it == custom_printers_.end() ? default_field_value_printer_.get()
536                                           : it->second.get();
537     }
538 
539     friend class google::protobuf::python::cmessage::PythonFieldValuePrinter;
540     static void HardenedPrintString(absl::string_view src,
541                                     TextFormat::BaseTextGenerator* generator);
542 
543     int initial_indent_level_;
544     bool single_line_mode_;
545     bool use_field_number_;
546     bool use_short_repeated_primitives_;
547     bool insert_silent_marker_;
548     bool redact_debug_string_;
549     bool randomize_debug_string_;
550     internal::FieldReporterLevel report_sensitive_fields_;
551     bool hide_unknown_fields_;
552     bool print_message_fields_in_index_order_;
553     bool expand_any_;
554     int64_t truncate_string_field_longer_than_;
555 
556     std::unique_ptr<const FastFieldValuePrinter> default_field_value_printer_;
557     absl::flat_hash_map<const FieldDescriptor*,
558                         std::unique_ptr<const FastFieldValuePrinter>>
559         custom_printers_;
560 
561     absl::flat_hash_map<const Descriptor*,
562                         std::unique_ptr<const MessagePrinter>>
563         custom_message_printers_;
564 
565     const Finder* finder_;
566   };
567 
568   // Parses a text-format protocol message from the given input stream to
569   // the given message object. This function parses the human-readable
570   // serialization format written by Print(). Returns true on success. The
571   // message is cleared first, even if the function fails -- See Merge() to
572   // avoid this behavior.
573   //
574   // Example input: "user {\n id: 123 extra { gender: MALE language: 'en' }\n}"
575   //
576   // One common use for this function is parsing handwritten strings in test
577   // code.
578   //
579   // If you would like to read a protocol buffer serialized in the
580   // (non-human-readable) binary wire format, see
581   // google::protobuf::MessageLite::ParseFromString().
582   static bool Parse(io::ZeroCopyInputStream* input, Message* output);
583   // Like Parse(), but reads directly from a string.
584   static bool ParseFromString(absl::string_view input, Message* output);
585   // Like Parse(), but reads directly from a Cord.
586   static bool ParseFromCord(const absl::Cord& input, Message* output);
587 
588   // Like Parse(), but the data is merged into the given message, as if
589   // using Message::MergeFrom().
590   static bool Merge(io::ZeroCopyInputStream* input, Message* output);
591   // Like Merge(), but reads directly from a string.
592   static bool MergeFromString(absl::string_view input, Message* output);
593 
594   // Parse the given text as a single field value and store it into the
595   // given field of the given message. If the field is a repeated field,
596   // the new value will be added to the end
597   static bool ParseFieldValueFromString(absl::string_view input,
598                                         const FieldDescriptor* field,
599                                         Message* message);
600 
601   // A location in the parsed text.
602   struct ParseLocation {
603     int line;
604     int column;
605 
ParseLocationParseLocation606     ParseLocation() : line(-1), column(-1) {}
ParseLocationParseLocation607     ParseLocation(int line_param, int column_param)
608         : line(line_param), column(column_param) {}
609   };
610 
611   // A range of locations in the parsed text, including `start` and excluding
612   // `end`.
613   struct ParseLocationRange {
614     ParseLocation start;
615     ParseLocation end;
ParseLocationRangeParseLocationRange616     ParseLocationRange() : start(), end() {}
ParseLocationRangeParseLocationRange617     ParseLocationRange(ParseLocation start_param, ParseLocation end_param)
618         : start(start_param), end(end_param) {}
619   };
620 
621   // Data structure which is populated with the locations of each field
622   // value parsed from the text.
623   class PROTOBUF_EXPORT ParseInfoTree {
624    public:
625     ParseInfoTree() = default;
626     ParseInfoTree(const ParseInfoTree&) = delete;
627     ParseInfoTree& operator=(const ParseInfoTree&) = delete;
628 
629     // Returns the parse location range for index-th value of the field in
630     // the parsed text. If none exists, returns a location with start and end
631     // line -1. Index should be -1 for not-repeated fields.
632     ParseLocationRange GetLocationRange(const FieldDescriptor* field,
633                                         int index) const;
634 
635     // Returns the starting parse location for index-th value of the field in
636     // the parsed text. If none exists, returns a location with line = -1. Index
637     // should be -1 for not-repeated fields.
GetLocation(const FieldDescriptor * field,int index)638     ParseLocation GetLocation(const FieldDescriptor* field, int index) const {
639       return GetLocationRange(field, index).start;
640     }
641 
642     // Returns the parse info tree for the given field, which must be a message
643     // type. The nested information tree is owned by the root tree and will be
644     // deleted when it is deleted.
645     ParseInfoTree* GetTreeForNested(const FieldDescriptor* field,
646                                     int index) const;
647 
648    private:
649     // Allow the text format parser to record information into the tree.
650     friend class TextFormat;
651 
652     // Records the starting and ending locations of a single value for a field.
653     void RecordLocation(const FieldDescriptor* field, ParseLocationRange range);
654 
655     // Create and records a nested tree for a nested message field.
656     ParseInfoTree* CreateNested(const FieldDescriptor* field);
657 
658     // Defines the map from the index-th field descriptor to its parse location.
659     absl::flat_hash_map<const FieldDescriptor*, std::vector<ParseLocationRange>>
660         locations_;
661     // Defines the map from the index-th field descriptor to the nested parse
662     // info tree.
663     absl::flat_hash_map<const FieldDescriptor*,
664                         std::vector<std::unique_ptr<ParseInfoTree>>>
665         nested_;
666   };
667 
668   // For more control over parsing, use this class.
669   class PROTOBUF_EXPORT Parser {
670    public:
671     Parser();
672     ~Parser();
673 
674     // Like TextFormat::Parse().
675     bool Parse(io::ZeroCopyInputStream* input, Message* output);
676     // Like TextFormat::ParseFromString().
677     bool ParseFromString(absl::string_view input, Message* output);
678     // Like TextFormat::ParseFromCord().
679     bool ParseFromCord(const absl::Cord& input, Message* output);
680     // Like TextFormat::Merge().
681     bool Merge(io::ZeroCopyInputStream* input, Message* output);
682     // Like TextFormat::MergeFromString().
683     bool MergeFromString(absl::string_view input, Message* output);
684 
685     // Set where to report parse errors.  If nullptr (the default), errors will
686     // be printed to stderr.
RecordErrorsTo(io::ErrorCollector * error_collector)687     void RecordErrorsTo(io::ErrorCollector* error_collector) {
688       error_collector_ = error_collector;
689     }
690 
691     // Set how parser finds extensions.  If nullptr (the default), the
692     // parser will use the standard Reflection object associated with
693     // the message being parsed.
SetFinder(const Finder * finder)694     void SetFinder(const Finder* finder) { finder_ = finder; }
695 
696     // Sets where location information about the parse will be written. If
697     // nullptr
698     // (the default), then no location will be written.
WriteLocationsTo(ParseInfoTree * tree)699     void WriteLocationsTo(ParseInfoTree* tree) { parse_info_tree_ = tree; }
700 
701     // Normally parsing fails if, after parsing, output->IsInitialized()
702     // returns false.  Call AllowPartialMessage(true) to skip this check.
AllowPartialMessage(bool allow)703     void AllowPartialMessage(bool allow) { allow_partial_ = allow; }
704 
705     // Allow field names to be matched case-insensitively.
706     // This is not advisable if there are fields that only differ in case, or
707     // if you want to enforce writing in the canonical form.
708     // This is 'false' by default.
AllowCaseInsensitiveField(bool allow)709     void AllowCaseInsensitiveField(bool allow) {
710       allow_case_insensitive_field_ = allow;
711     }
712 
713     // Like TextFormat::ParseFieldValueFromString
714     bool ParseFieldValueFromString(absl::string_view input,
715                                    const FieldDescriptor* field,
716                                    Message* output);
717 
718     // When an unknown extension is met, parsing will fail if this option is
719     // set to false (the default). If true, unknown extensions will be ignored
720     // and a warning message will be generated.
721     // Beware! Setting this option true may hide some errors (e.g. spelling
722     // error on extension name).  This allows data loss; unlike binary format,
723     // text format cannot preserve unknown extensions.  Avoid using this option
724     // if possible.
AllowUnknownExtension(bool allow)725     void AllowUnknownExtension(bool allow) { allow_unknown_extension_ = allow; }
726 
727     // When an unknown field is met, parsing will fail if this option is set
728     // to false (the default). If true, unknown fields will be ignored and
729     // a warning message will be generated.
730     // Beware! Setting this option true may hide some errors (e.g. spelling
731     // error on field name). This allows data loss; unlike binary format, text
732     // format cannot preserve unknown fields.  Avoid using this option
733     // if possible.
AllowUnknownField(bool allow)734     void AllowUnknownField(bool allow) { allow_unknown_field_ = allow; }
735 
736 
AllowFieldNumber(bool allow)737     void AllowFieldNumber(bool allow) { allow_field_number_ = allow; }
738 
739     // Sets maximum recursion depth which parser can use. This is effectively
740     // the maximum allowed nesting of proto messages.
SetRecursionLimit(int limit)741     void SetRecursionLimit(int limit) { recursion_limit_ = limit; }
742 
743     // Metadata representing all the fields that were explicitly unset in
744     // textproto. Example:
745     // "some_int_field: 0"
746     // where some_int_field has implicit presence.
747     //
748     // This class should only be used to pass data between TextFormat and the
749     // MessageDifferencer.
750     class UnsetFieldsMetadata {
751      public:
752       UnsetFieldsMetadata() = default;
753 
754      private:
755       using Id = std::pair<const Message*, const FieldDescriptor*>;
756       // Return an id representing the unset field in the given message.
757       static Id GetUnsetFieldId(const Message& message,
758                                 const FieldDescriptor& fd);
759 
760       // List of ids of explicitly unset proto fields.
761       absl::flat_hash_set<Id> ids_;
762 
763       friend class ::google::protobuf::internal::
764           UnsetFieldsMetadataMessageDifferencerTestUtil;
765       friend class ::google::protobuf::internal::UnsetFieldsMetadataTextFormatTestUtil;
766       friend class ::google::protobuf::util::MessageDifferencer;
767       friend class ::google::protobuf::TextFormat::Parser;
768     };
769 
770     // If called, the parser will report the parsed fields that had no
771     // effect on the resulting proto (for example, fields with no presence that
772     // were set to their default value). These can be passed to the Partially()
773     // matcher as an indicator to explicitly check these fields are missing
774     // in the actual.
OutputNoOpFields(UnsetFieldsMetadata * no_op_fields)775     void OutputNoOpFields(UnsetFieldsMetadata* no_op_fields) {
776       no_op_fields_ = no_op_fields;
777     }
778 
779    private:
780     // Forward declaration of an internal class used to parse text
781     // representations (see text_format.cc for implementation).
782     class ParserImpl;
783 
784     // Like TextFormat::Merge().  The provided implementation is used
785     // to do the parsing.
786     bool MergeUsingImpl(io::ZeroCopyInputStream* input, Message* output,
787                         ParserImpl* parser_impl);
788 
789     io::ErrorCollector* error_collector_;
790     const Finder* finder_;
791     ParseInfoTree* parse_info_tree_;
792     bool allow_partial_;
793     bool allow_case_insensitive_field_;
794     bool allow_unknown_field_;
795     bool allow_unknown_extension_;
796     bool allow_unknown_enum_;
797     bool allow_field_number_;
798     bool allow_relaxed_whitespace_;
799     bool allow_singular_overwrites_;
800     int recursion_limit_;
801     UnsetFieldsMetadata* no_op_fields_ = nullptr;
802   };
803 
804 
805  private:
806   // Hack: ParseInfoTree declares TextFormat as a friend which should extend
807   // the friendship to TextFormat::Parser::ParserImpl, but unfortunately some
808   // old compilers (e.g. GCC 3.4.6) don't implement this correctly. We provide
809   // helpers for ParserImpl to call methods of ParseInfoTree.
810   static inline void RecordLocation(ParseInfoTree* info_tree,
811                                     const FieldDescriptor* field,
812                                     ParseLocationRange location);
813   static inline ParseInfoTree* CreateNested(ParseInfoTree* info_tree,
814                                             const FieldDescriptor* field);
815   // To reduce stack frame bloat we use an out-of-line function to print
816   // strings. This avoid local std::string temporaries.
817   template <typename... T>
818   static void OutOfLinePrintString(BaseTextGenerator* generator,
819                                    const T&... values);
820 };
821 
822 
RecordLocation(ParseInfoTree * info_tree,const FieldDescriptor * field,ParseLocationRange location)823 inline void TextFormat::RecordLocation(ParseInfoTree* info_tree,
824                                        const FieldDescriptor* field,
825                                        ParseLocationRange location) {
826   info_tree->RecordLocation(field, location);
827 }
828 
CreateNested(ParseInfoTree * info_tree,const FieldDescriptor * field)829 inline TextFormat::ParseInfoTree* TextFormat::CreateNested(
830     ParseInfoTree* info_tree, const FieldDescriptor* field) {
831   return info_tree->CreateNested(field);
832 }
833 
834 }  // namespace protobuf
835 }  // namespace google
836 
837 #include "google/protobuf/port_undef.inc"
838 
839 #endif  // GOOGLE_PROTOBUF_TEXT_FORMAT_H__
840