• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file defines static methods and classes for comparing Protocol
36 // Messages.
37 //
38 // Aug. 2008: Added Unknown Fields Comparison for messages.
39 // Aug. 2009: Added different options to compare repeated fields.
40 // Apr. 2010: Moved field comparison to FieldComparator
41 // Sep. 2020: Added option to output map keys in path
42 
43 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
44 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
45 
46 
47 #include <functional>
48 #include <map>
49 #include <memory>
50 #include <set>
51 #include <string>
52 #include <vector>
53 
54 #include <google/protobuf/descriptor.h>  // FieldDescriptor
55 #include <google/protobuf/message.h>     // Message
56 #include <google/protobuf/unknown_field_set.h>
57 #include <google/protobuf/util/field_comparator.h>
58 
59 // Always include as last one, otherwise it can break compilation
60 #include <google/protobuf/port_def.inc>
61 
62 namespace google {
63 namespace protobuf {
64 
65 class DynamicMessageFactory;
66 class FieldDescriptor;
67 
68 namespace io {
69 class ZeroCopyOutputStream;
70 class Printer;
71 }  // namespace io
72 
73 namespace util {
74 
75 class DefaultFieldComparator;
76 class FieldContext;  // declared below MessageDifferencer
77 
78 // Defines a collection of field descriptors.
79 // In case of internal google codebase we are using absl::FixedArray instead
80 // of vector. It significantly speeds up proto comparison (by ~30%) by
81 // reducing the number of malloc/free operations
82 typedef std::vector<const FieldDescriptor*> FieldDescriptorArray;
83 
84 // A basic differencer that can be used to determine
85 // the differences between two specified Protocol Messages. If any differences
86 // are found, the Compare method will return false, and any differencer reporter
87 // specified via ReportDifferencesTo will have its reporting methods called (see
88 // below for implementation of the report). Based off of the original
89 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h
90 // (Thanks Todd!).
91 //
92 // MessageDifferencer REQUIRES that compared messages be the same type, defined
93 // as messages that share the same descriptor.  If not, the behavior of this
94 // class is undefined.
95 //
96 // People disagree on what MessageDifferencer should do when asked to compare
97 // messages with different descriptors.  Some people think it should always
98 // return false.  Others expect it to try to look for similar fields and
99 // compare them anyway -- especially if the descriptors happen to be identical.
100 // If we chose either of these behaviors, some set of people would find it
101 // surprising, and could end up writing code expecting the other behavior
102 // without realizing their error.  Therefore, we forbid that usage.
103 //
104 // This class is implemented based on the proto2 reflection. The performance
105 // should be good enough for normal usages. However, for places where the
106 // performance is extremely sensitive, there are several alternatives:
107 // - Comparing serialized string
108 // Downside: false negatives (there are messages that are the same but their
109 // serialized strings are different).
110 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin)
111 // Downside: more generated code; maintenance overhead for the additional rule
112 // (must be in sync with the original proto_library).
113 //
114 // Note on handling of google.protobuf.Any: MessageDifferencer automatically
115 // unpacks Any::value into a Message and compares its individual fields.
116 // Messages encoded in a repeated Any cannot be compared using TreatAsMap.
117 //
118 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to
119 // guard it with a lock to use the same MessageDifferencer instance from
120 // multiple threads. Note that it's fine to call static comparison methods
121 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for
122 // performance critical code as it leads to extra allocations.
123 class PROTOBUF_EXPORT MessageDifferencer {
124  public:
125   // Determines whether the supplied messages are equal. Equality is defined as
126   // all fields within the two messages being set to the same value. Primitive
127   // fields and strings are compared by value while embedded messages/groups
128   // are compared as if via a recursive call. Use Compare() with IgnoreField()
129   // if some fields should be ignored in the comparison. Use Compare() with
130   // TreatAsSet() if there are repeated fields where ordering does not matter.
131   //
132   // This method REQUIRES that the two messages have the same
133   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
134   static bool Equals(const Message& message1, const Message& message2);
135 
136   // Determines whether the supplied messages are equivalent. Equivalency is
137   // defined as all fields within the two messages having the same value. This
138   // differs from the Equals method above in that fields with default values
139   // are considered set to said value automatically. For details on how default
140   // values are defined for each field type, see:
141   // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional.
142   // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare()
143   // if some fields should be ignored in the comparison.
144   //
145   // This method REQUIRES that the two messages have the same
146   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
147   static bool Equivalent(const Message& message1, const Message& message2);
148 
149   // Determines whether the supplied messages are approximately equal.
150   // Approximate equality is defined as all fields within the two messages
151   // being approximately equal.  Primitive (non-float) fields and strings are
152   // compared by value, floats are compared using MathUtil::AlmostEquals() and
153   // embedded messages/groups are compared as if via a recursive call. Use
154   // IgnoreField() and Compare() if some fields should be ignored in the
155   // comparison.
156   //
157   // This method REQUIRES that the two messages have the same
158   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
159   static bool ApproximatelyEquals(const Message& message1,
160                                   const Message& message2);
161 
162   // Determines whether the supplied messages are approximately equivalent.
163   // Approximate equivalency is defined as all fields within the two messages
164   // being approximately equivalent. As in
165   // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and
166   // strings are compared by value, floats are compared using
167   // MathUtil::AlmostEquals() and embedded messages/groups are compared as if
168   // via a recursive call. However, fields with default values are considered
169   // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField()
170   // and Compare() if some fields should be ignored in the comparison.
171   //
172   // This method REQUIRES that the two messages have the same
173   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
174   static bool ApproximatelyEquivalent(const Message& message1,
175                                       const Message& message2);
176 
177   // Identifies an individual field in a message instance.  Used for field_path,
178   // below.
179   struct SpecificField {
180     // For known fields, "field" is filled in and "unknown_field_number" is -1.
181     // For unknown fields, "field" is NULL, "unknown_field_number" is the field
182     // number, and "unknown_field_type" is its type.
183     const FieldDescriptor* field = nullptr;
184     int unknown_field_number = -1;
185     UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT;
186 
187     // If this a repeated field, "index" is the index within it.  For unknown
188     // fields, this is the index of the field among all unknown fields of the
189     // same field number and type.
190     int index = -1;
191 
192     // If "field" is a repeated field which is being treated as a map or
193     // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates
194     // the index the position to which the element has moved.  If the element
195     // has not moved, "new_index" will have the same value as "index".
196     int new_index = -1;
197 
198     // If "field" is a map field, point to the map entry.
199     const Message* map_entry1 = nullptr;
200     const Message* map_entry2 = nullptr;
201 
202     // For unknown fields, these are the pointers to the UnknownFieldSet
203     // containing the unknown fields. In certain cases (e.g. proto1's
204     // MessageSet, or nested groups of unknown fields), these may differ from
205     // the messages' internal UnknownFieldSets.
206     const UnknownFieldSet* unknown_field_set1 = nullptr;
207     const UnknownFieldSet* unknown_field_set2 = nullptr;
208 
209     // For unknown fields, these are the index of the field within the
210     // UnknownFieldSets. One or the other will be -1 when
211     // reporting an addition or deletion.
212     int unknown_field_index1 = -1;
213     int unknown_field_index2 = -1;
214   };
215 
216   // Abstract base class from which all MessageDifferencer
217   // reporters derive. The five Report* methods below will be called when
218   // a field has been added, deleted, modified, moved, or matched. The third
219   // argument is a vector of FieldDescriptor pointers which describes the chain
220   // of fields that was taken to find the current field. For example, for a
221   // field found in an embedded message, the vector will contain two
222   // FieldDescriptors. The first will be the field of the embedded message
223   // itself and the second will be the actual field in the embedded message
224   // that was added/deleted/modified.
225   // Fields will be reported in PostTraversalOrder.
226   // For example, given following proto, if both baz and mooo are changed.
227   // foo {
228   //   bar {
229   //     baz: 1
230   //     mooo: 2
231   //   }
232   // }
233   // ReportModified will be invoked with following order:
234   // 1. foo.bar.baz or foo.bar.mooo
235   // 2. foo.bar.mooo or foo.bar.baz
236   // 2. foo.bar
237   // 3. foo
238   class PROTOBUF_EXPORT Reporter {
239    public:
240     Reporter();
241     virtual ~Reporter();
242 
243     // Reports that a field has been added into Message2.
244     virtual void ReportAdded(const Message& message1, const Message& message2,
245                              const std::vector<SpecificField>& field_path) = 0;
246 
247     // Reports that a field has been deleted from Message1.
248     virtual void ReportDeleted(
249         const Message& message1, const Message& message2,
250         const std::vector<SpecificField>& field_path) = 0;
251 
252     // Reports that the value of a field has been modified.
253     virtual void ReportModified(
254         const Message& message1, const Message& message2,
255         const std::vector<SpecificField>& field_path) = 0;
256 
257     // Reports that a repeated field has been moved to another location.  This
258     // only applies when using TreatAsSet or TreatAsMap()  -- see below. Also
259     // note that for any given field, ReportModified and ReportMoved are
260     // mutually exclusive. If a field has been both moved and modified, then
261     // only ReportModified will be called.
ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)262     virtual void ReportMoved(
263         const Message& /* message1 */, const Message& /* message2 */,
264         const std::vector<SpecificField>& /* field_path */) {}
265 
266     // Reports that two fields match. Useful for doing side-by-side diffs.
267     // This function is mutually exclusive with ReportModified and ReportMoved.
268     // Note that you must call set_report_matches(true) before calling Compare
269     // to make use of this function.
ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)270     virtual void ReportMatched(
271         const Message& /* message1 */, const Message& /* message2 */,
272         const std::vector<SpecificField>& /* field_path */) {}
273 
274     // Reports that two fields would have been compared, but the
275     // comparison has been skipped because the field was marked as
276     // 'ignored' using IgnoreField().  This function is mutually
277     // exclusive with all the other Report() functions.
278     //
279     // The contract of ReportIgnored is slightly different than the
280     // other Report() functions, in that |field_path.back().index| is
281     // always equal to -1, even if the last field is repeated. This is
282     // because while the other Report() functions indicate where in a
283     // repeated field the action (Addition, Deletion, etc...)
284     // happened, when a repeated field is 'ignored', the differencer
285     // simply calls ReportIgnored on the repeated field as a whole and
286     // moves on without looking at its individual elements.
287     //
288     // Furthermore, ReportIgnored() does not indicate whether the
289     // fields were in fact equal or not, as Compare() does not inspect
290     // these fields at all. It is up to the Reporter to decide whether
291     // the fields are equal or not (perhaps with a second call to
292     // Compare()), if it cares.
ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)293     virtual void ReportIgnored(
294         const Message& /* message1 */, const Message& /* message2 */,
295         const std::vector<SpecificField>& /* field_path */) {}
296 
297     // Report that an unknown field is ignored. (see comment above).
298     // Note this is a different function since the last SpecificField in field
299     // path has a null field.  This could break existing Reporter.
ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)300     virtual void ReportUnknownFieldIgnored(
301         const Message& /* message1 */, const Message& /* message2 */,
302         const std::vector<SpecificField>& /* field_path */) {}
303 
304    private:
305     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter);
306   };
307 
308   // MapKeyComparator is used to determine if two elements have the same key
309   // when comparing elements of a repeated field as a map.
310   class PROTOBUF_EXPORT MapKeyComparator {
311    public:
312     MapKeyComparator();
313     virtual ~MapKeyComparator();
314 
IsMatch(const Message &,const Message &,const std::vector<SpecificField> &)315     virtual bool IsMatch(
316         const Message& /* message1 */, const Message& /* message2 */,
317         const std::vector<SpecificField>& /* parent_fields */) const {
318       GOOGLE_CHECK(false) << "IsMatch() is not implemented.";
319       return false;
320     }
321 
322    private:
323     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator);
324   };
325 
326   // Abstract base class from which all IgnoreCriteria derive.
327   // By adding IgnoreCriteria more complex ignore logic can be implemented.
328   // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared
329   // field IsIgnored is called on each added IgnoreCriteria until one returns
330   // true or all return false.
331   // IsIgnored is called for fields where at least one side has a value.
332   class PROTOBUF_EXPORT IgnoreCriteria {
333    public:
334     IgnoreCriteria();
335     virtual ~IgnoreCriteria();
336 
337     // Returns true if the field should be ignored.
338     virtual bool IsIgnored(
339         const Message& /* message1 */, const Message& /* message2 */,
340         const FieldDescriptor* /* field */,
341         const std::vector<SpecificField>& /* parent_fields */) = 0;
342 
343     // Returns true if the unknown field should be ignored.
344     // Note: This will be called for unknown fields as well in which case
345     //       field.field will be null.
IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)346     virtual bool IsUnknownFieldIgnored(
347         const Message& /* message1 */, const Message& /* message2 */,
348         const SpecificField& /* field */,
349         const std::vector<SpecificField>& /* parent_fields */) {
350       return false;
351     }
352   };
353 
354   // To add a Reporter, construct default here, then use ReportDifferencesTo or
355   // ReportDifferencesToString.
356   explicit MessageDifferencer();
357 
358   ~MessageDifferencer();
359 
360   enum MessageFieldComparison {
361     EQUAL,       // Fields must be present in both messages
362                  // for the messages to be considered the same.
363     EQUIVALENT,  // Fields with default values are considered set
364                  // for comparison purposes even if not explicitly
365                  // set in the messages themselves.  Unknown fields
366                  // are ignored.
367   };
368 
369   enum Scope {
370     FULL,    // All fields of both messages are considered in the comparison.
371     PARTIAL  // Only fields present in the first message are considered; fields
372              // set only in the second message will be skipped during
373              // comparison.
374   };
375 
376   // DEPRECATED. Use FieldComparator::FloatComparison instead.
377   enum FloatComparison {
378     EXACT,       // Floats and doubles are compared exactly.
379     APPROXIMATE  // Floats and doubles are compared using the
380                  // MathUtil::AlmostEquals method.
381   };
382 
383   enum RepeatedFieldComparison {
384     AS_LIST,  // Repeated fields are compared in order.  Differing values at
385               // the same index are reported using ReportModified().  If the
386               // repeated fields have different numbers of elements, the
387               // unpaired elements are reported using ReportAdded() or
388               // ReportDeleted().
389     AS_SET,   // Treat all the repeated fields as sets.
390               // See TreatAsSet(), as below.
391     AS_SMART_LIST,  // Similar to AS_SET, but preserve the order and find the
392                     // longest matching sequence from the first matching
393                     // element. To use an optimal solution, call
394                     // SetMatchIndicesForSmartListCallback() to pass it in.
395     AS_SMART_SET,   // Similar to AS_SET, but match elements with fewest diffs.
396   };
397 
398   // The elements of the given repeated field will be treated as a set for
399   // diffing purposes, so different orderings of the same elements will be
400   // considered equal.  Elements which are present on both sides of the
401   // comparison but which have changed position will be reported with
402   // ReportMoved().  Elements which only exist on one side or the other are
403   // reported with ReportAdded() and ReportDeleted() regardless of their
404   // positions.  ReportModified() is never used for this repeated field.  If
405   // the only differences between the compared messages is that some fields
406   // have been moved, then the comparison returns true.
407   //
408   // Note that despite the name of this method, this is really
409   // comparison as multisets: if one side of the comparison has a duplicate
410   // in the repeated field but the other side doesn't, this will count as
411   // a mismatch.
412   //
413   // If the scope of comparison is set to PARTIAL, then in addition to what's
414   // above, extra values added to repeated fields of the second message will
415   // not cause the comparison to fail.
416   //
417   // Note that set comparison is currently O(k * n^2) (where n is the total
418   // number of elements, and k is the average size of each element). In theory
419   // it could be made O(n * k) with a more complex hashing implementation. Feel
420   // free to contribute one if the current implementation is too slow for you.
421   // If partial matching is also enabled, the time complexity will be O(k * n^2
422   // + n^3) in which n^3 is the time complexity of the maximum matching
423   // algorithm.
424   //
425   // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
426   void TreatAsSet(const FieldDescriptor* field);
427   void TreatAsSmartSet(const FieldDescriptor* field);
428 
429   // The elements of the given repeated field will be treated as a list for
430   // diffing purposes, so different orderings of the same elements will NOT be
431   // considered equal.
432   //
433   // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
434   void TreatAsList(const FieldDescriptor* field);
435   // Note that the complexity is similar to treating as SET.
436   void TreatAsSmartList(const FieldDescriptor* field);
437 
438   // The elements of the given repeated field will be treated as a map for
439   // diffing purposes, with |key| being the map key.  Thus, elements with the
440   // same key will be compared even if they do not appear at the same index.
441   // Differences are reported similarly to TreatAsSet(), except that
442   // ReportModified() is used to report elements with the same key but
443   // different values.  Note that if an element is both moved and modified,
444   // only ReportModified() will be called.  As with TreatAsSet, if the only
445   // differences between the compared messages is that some fields have been
446   // moved, then the comparison returns true. See TreatAsSet for notes on
447   // performance.
448   //
449   // REQUIRES:  field->is_repeated()
450   // REQUIRES:  field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
451   // REQUIRES:  key->containing_type() == field->message_type()
452   void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key);
453   // Same as TreatAsMap except that this method will use multiple fields as
454   // the key in comparison. All specified fields in 'key_fields' should be
455   // present in the compared elements. Two elements will be treated as having
456   // the same key iff they have the same value for every specified field. There
457   // are two steps in the comparison process. The first one is key matching.
458   // Every element from one message will be compared to every element from
459   // the other message. Only fields in 'key_fields' are compared in this step
460   // to decide if two elements have the same key. The second step is value
461   // comparison. Those pairs of elements with the same key (with equal value
462   // for every field in 'key_fields') will be compared in this step.
463   // Time complexity of the first step is O(s * m * n ^ 2) where s is the
464   // average size of the fields specified in 'key_fields', m is the number of
465   // fields in 'key_fields' and n is the number of elements. If partial
466   // matching is enabled, an extra O(n^3) will be incured by the maximum
467   // matching algorithm. The second step is O(k * n) where k is the average
468   // size of each element.
469   void TreatAsMapWithMultipleFieldsAsKey(
470       const FieldDescriptor* field,
471       const std::vector<const FieldDescriptor*>& key_fields);
472   // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field
473   // do not necessarily need to be a direct subfield. Each element in
474   // key_field_paths indicate a path from the message being compared, listing
475   // successive subfield to reach the key field.
476   //
477   // REQUIRES:
478   //   for key_field_path in key_field_paths:
479   //     key_field_path[0]->containing_type() == field->message_type()
480   //     for i in [0, key_field_path.size() - 1):
481   //       key_field_path[i+1]->containing_type() ==
482   //           key_field_path[i]->message_type()
483   //       key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
484   //       !key_field_path[i]->is_repeated()
485   void TreatAsMapWithMultipleFieldPathsAsKey(
486       const FieldDescriptor* field,
487       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
488 
489   // Uses a custom MapKeyComparator to determine if two elements have the same
490   // key when comparing a repeated field as a map.
491   // The caller is responsible to delete the key_comparator.
492   // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the
493   // first key matching step. Rather than comparing some specified fields, it
494   // will invoke the IsMatch method of the given 'key_comparator' to decide if
495   // two elements have the same key.
496   void TreatAsMapUsingKeyComparator(const FieldDescriptor* field,
497                                     const MapKeyComparator* key_comparator);
498 
499   // Initiates and returns a new instance of MultipleFieldsMapKeyComparator.
500   MapKeyComparator* CreateMultipleFieldsMapKeyComparator(
501       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
502 
503   // Add a custom ignore criteria that is evaluated in addition to the
504   // ignored fields added with IgnoreField.
505   // Takes ownership of ignore_criteria.
506   void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria);
507 
508   // Indicates that any field with the given descriptor should be
509   // ignored for the purposes of comparing two messages. This applies
510   // to fields nested in the message structure as well as top level
511   // ones. When the MessageDifferencer encounters an ignored field,
512   // ReportIgnored is called on the reporter, if one is specified.
513   //
514   // The only place where the field's 'ignored' status is not applied is when
515   // it is being used as a key in a field passed to TreatAsMap or is one of
516   // the fields passed to TreatAsMapWithMultipleFieldsAsKey.
517   // In this case it is compared in key matching but after that it's ignored
518   // in value comparison.
519   void IgnoreField(const FieldDescriptor* field);
520 
521   // Sets the field comparator used to determine differences between protocol
522   // buffer fields. By default it's set to a DefaultFieldComparator instance.
523   // MessageDifferencer doesn't take ownership over the passed object.
524   // Note that this method must be called before Compare for the comparator to
525   // be used.
526   void set_field_comparator(FieldComparator* comparator);
527 #ifdef PROTOBUF_FUTURE_BREAKING_CHANGES
528   void set_field_comparator(DefaultFieldComparator* comparator);
529 #endif  // PROTOBUF_FUTURE_BREAKING_CHANGES
530 
531   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
532   // Sets the fraction and margin for the float comparison of a given field.
533   // Uses MathUtil::WithinFractionOrMargin to compare the values.
534   // NOTE: this method does nothing if differencer's field comparator has been
535   //       set to a custom object.
536   //
537   // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or
538   //           field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT
539   // REQUIRES: float_comparison_ == APPROXIMATE
540   void SetFractionAndMargin(const FieldDescriptor* field, double fraction,
541                             double margin);
542 
543   // Sets the type of comparison (as defined in the MessageFieldComparison
544   // enumeration above) that is used by this differencer when determining how
545   // to compare fields in messages.
546   void set_message_field_comparison(MessageFieldComparison comparison);
547 
548   // Returns the current message field comparison used in this differencer.
549   MessageFieldComparison message_field_comparison() const;
550 
551   // Tells the differencer whether or not to report matches. This method must
552   // be called before Compare. The default for a new differencer is false.
set_report_matches(bool report_matches)553   void set_report_matches(bool report_matches) {
554     report_matches_ = report_matches;
555   }
556 
557   // Tells the differencer whether or not to report moves (in a set or map
558   // repeated field). This method must be called before Compare. The default for
559   // a new differencer is true.
set_report_moves(bool report_moves)560   void set_report_moves(bool report_moves) { report_moves_ = report_moves; }
561 
562   // Tells the differencer whether or not to report ignored values. This method
563   // must be called before Compare. The default for a new differencer is true.
set_report_ignores(bool report_ignores)564   void set_report_ignores(bool report_ignores) {
565     report_ignores_ = report_ignores;
566   }
567 
568   // Sets the scope of the comparison (as defined in the Scope enumeration
569   // above) that is used by this differencer when determining which fields to
570   // compare between the messages.
571   void set_scope(Scope scope);
572 
573   // Returns the current scope used by this differencer.
574   Scope scope() const;
575 
576   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
577   // Sets the type of comparison (as defined in the FloatComparison enumeration
578   // above) that is used by this differencer when comparing float (and double)
579   // fields in messages.
580   // NOTE: this method does nothing if differencer's field comparator has been
581   //       set to a custom object.
582   void set_float_comparison(FloatComparison comparison);
583 
584   // Sets the type of comparison for repeated field (as defined in the
585   // RepeatedFieldComparison enumeration above) that is used by this
586   // differencer when compare repeated fields in messages.
587   void set_repeated_field_comparison(RepeatedFieldComparison comparison);
588 
589   // Returns the current repeated field comparison used by this differencer.
590   RepeatedFieldComparison repeated_field_comparison() const;
591 
592   // Compares the two specified messages, returning true if they are the same,
593   // false otherwise. If this method returns false, any changes between the
594   // two messages will be reported if a Reporter was specified via
595   // ReportDifferencesTo (see also ReportDifferencesToString).
596   //
597   // This method REQUIRES that the two messages have the same
598   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
599   bool Compare(const Message& message1, const Message& message2);
600 
601   // Same as above, except comparing only the list of fields specified by the
602   // two vectors of FieldDescriptors.
603   bool CompareWithFields(
604       const Message& message1, const Message& message2,
605       const std::vector<const FieldDescriptor*>& message1_fields,
606       const std::vector<const FieldDescriptor*>& message2_fields);
607 
608   // Automatically creates a reporter that will output the differences
609   // found (if any) to the specified output string pointer. Note that this
610   // method must be called before Compare.
611   void ReportDifferencesToString(std::string* output);
612 
613   // Tells the MessageDifferencer to report differences via the specified
614   // reporter. Note that this method must be called before Compare for
615   // the reporter to be used. It is the responsibility of the caller to delete
616   // this object.
617   // If the provided pointer equals NULL, the MessageDifferencer stops reporting
618   // differences to any previously set reporters or output strings.
619   void ReportDifferencesTo(Reporter* reporter);
620 
621  private:
622   // Class for processing Any deserialization.  This logic is used by both the
623   // MessageDifferencer and StreamReporter classes.
624   class UnpackAnyField {
625    private:
626     std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_;
627 
628    public:
629     UnpackAnyField() = default;
630     ~UnpackAnyField() = default;
631     // If "any" is of type google.protobuf.Any, extract its payload using
632     // DynamicMessageFactory and store in "data".
633     bool UnpackAny(const Message& any, std::unique_ptr<Message>* data);
634   };
635 
636  public:
637   // An implementation of the MessageDifferencer Reporter that outputs
638   // any differences found in human-readable form to the supplied
639   // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter
640   // *must* be '$'.
641   //
642   // WARNING: this reporter does not necessarily flush its output until it is
643   // destroyed. As a result, it is not safe to assume the output is valid or
644   // complete until after you destroy the reporter. For example, if you use a
645   // StreamReporter to write to a StringOutputStream, the target string may
646   // contain uninitialized data until the reporter is destroyed.
647   class PROTOBUF_EXPORT StreamReporter : public Reporter {
648    public:
649     explicit StreamReporter(io::ZeroCopyOutputStream* output);
650     explicit StreamReporter(io::Printer* printer);  // delimiter '$'
651     ~StreamReporter() override;
652 
653     // When set to true, the stream reporter will also output aggregates nodes
654     // (i.e. messages and groups) whose subfields have been modified. When
655     // false, will only report the individual subfields. Defaults to false.
set_report_modified_aggregates(bool report)656     void set_report_modified_aggregates(bool report) {
657       report_modified_aggregates_ = report;
658     }
659 
660     // The following are implementations of the methods described above.
661 
662     void ReportAdded(const Message& message1, const Message& message2,
663                      const std::vector<SpecificField>& field_path) override;
664 
665     void ReportDeleted(const Message& message1, const Message& message2,
666                        const std::vector<SpecificField>& field_path) override;
667 
668     void ReportModified(const Message& message1, const Message& message2,
669                         const std::vector<SpecificField>& field_path) override;
670 
671     void ReportMoved(const Message& message1, const Message& message2,
672                      const std::vector<SpecificField>& field_path) override;
673 
674     void ReportMatched(const Message& message1, const Message& message2,
675                        const std::vector<SpecificField>& field_path) override;
676 
677     void ReportIgnored(const Message& message1, const Message& message2,
678                        const std::vector<SpecificField>& field_path) override;
679 
680     void ReportUnknownFieldIgnored(
681         const Message& message1, const Message& message2,
682         const std::vector<SpecificField>& field_path) override;
683 
684     // Messages that are being compared must be provided to StreamReporter prior
685     // to processing
686     void SetMessages(const Message& message1, const Message& message2);
687 
688    protected:
689     // Prints the specified path of fields to the buffer.
690     virtual void PrintPath(const std::vector<SpecificField>& field_path,
691                            bool left_side);
692 
693     // Prints the value of fields to the buffer.  left_side is true if the
694     // given message is from the left side of the comparison, false if it
695     // was the right.  This is relevant only to decide whether to follow
696     // unknown_field_index1 or unknown_field_index2 when an unknown field
697     // is encountered in field_path.
698     virtual void PrintValue(const Message& message,
699                             const std::vector<SpecificField>& field_path,
700                             bool left_side);
701 
702     // Prints the specified path of unknown fields to the buffer.
703     virtual void PrintUnknownFieldValue(const UnknownField* unknown_field);
704 
705     // Just print a string
706     void Print(const std::string& str);
707 
708    private:
709     // helper function for PrintPath that contains logic for printing maps
710     void PrintMapKey(bool left_side, const SpecificField& specific_field);
711 
712     io::Printer* printer_;
713     bool delete_printer_;
714     bool report_modified_aggregates_;
715     const Message* message1_;
716     const Message* message2_;
717     MessageDifferencer::UnpackAnyField unpack_any_field_;
718     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter);
719   };
720 
721  private:
722   friend class SimpleFieldComparator;
723 
724   // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator.
725   // Implementation of this class needs to do field value comparison which
726   // relies on some private methods of MessageDifferencer. That's why this
727   // class is declared as a nested class of MessageDifferencer.
728   class MultipleFieldsMapKeyComparator;
729 
730   // A MapKeyComparator for use with map_entries.
731   class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator {
732    public:
733     explicit MapEntryKeyComparator(MessageDifferencer* message_differencer);
734     bool IsMatch(
735         const Message& message1, const Message& message2,
736         const std::vector<SpecificField>& parent_fields) const override;
737 
738    private:
739     MessageDifferencer* message_differencer_;
740   };
741 
742   // Returns true if field1's number() is less than field2's.
743   static bool FieldBefore(const FieldDescriptor* field1,
744                           const FieldDescriptor* field2);
745 
746   // Retrieve all the set fields, including extensions.
747   FieldDescriptorArray RetrieveFields(const Message& message,
748                                       bool base_message);
749 
750   // Combine the two lists of fields into the combined_fields output vector.
751   // All fields present in both lists will always be included in the combined
752   // list.  Fields only present in one of the lists will only appear in the
753   // combined list if the corresponding fields_scope option is set to FULL.
754   FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1,
755                                      Scope fields1_scope,
756                                      const FieldDescriptorArray& fields2,
757                                      Scope fields2_scope);
758 
759   // Internal version of the Compare method which performs the actual
760   // comparison. The parent_fields vector is a vector containing field
761   // descriptors of all fields accessed to get to this comparison operation
762   // (i.e. if the current message is an embedded message, the parent_fields
763   // vector will contain the field that has this embedded message).
764   bool Compare(const Message& message1, const Message& message2,
765                std::vector<SpecificField>* parent_fields);
766 
767   // Compares all the unknown fields in two messages.
768   bool CompareUnknownFields(const Message& message1, const Message& message2,
769                             const UnknownFieldSet&, const UnknownFieldSet&,
770                             std::vector<SpecificField>* parent_fields);
771 
772   // Compares the specified messages for the requested field lists. The field
773   // lists are modified depending on comparison settings, and then passed to
774   // CompareWithFieldsInternal.
775   bool CompareRequestedFieldsUsingSettings(
776       const Message& message1, const Message& message2,
777       const FieldDescriptorArray& message1_fields,
778       const FieldDescriptorArray& message2_fields,
779       std::vector<SpecificField>* parent_fields);
780 
781   // Compares the specified messages with the specified field lists.
782   bool CompareWithFieldsInternal(const Message& message1,
783                                  const Message& message2,
784                                  const FieldDescriptorArray& message1_fields,
785                                  const FieldDescriptorArray& message2_fields,
786                                  std::vector<SpecificField>* parent_fields);
787 
788   // Compares the repeated fields, and report the error.
789   bool CompareRepeatedField(const Message& message1, const Message& message2,
790                             const FieldDescriptor* field,
791                             std::vector<SpecificField>* parent_fields);
792 
793   // Compares map fields, and report the error.
794   bool CompareMapField(const Message& message1, const Message& message2,
795                        const FieldDescriptor* field,
796                        std::vector<SpecificField>* parent_fields);
797 
798   // Helper for CompareRepeatedField and CompareMapField: compares and reports
799   // differences element-wise. This is the implementation for non-map fields,
800   // and can also compare map fields by using the underlying representation.
801   bool CompareRepeatedRep(const Message& message1, const Message& message2,
802                           const FieldDescriptor* field,
803                           std::vector<SpecificField>* parent_fields);
804 
805   // Helper for CompareMapField: compare the map fields using map reflection
806   // instead of sync to repeated.
807   bool CompareMapFieldByMapReflection(const Message& message1,
808                                       const Message& message2,
809                                       const FieldDescriptor* field,
810                                       std::vector<SpecificField>* parent_fields,
811                                       DefaultFieldComparator* comparator);
812 
813   // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields.
814   bool CompareFieldValue(const Message& message1, const Message& message2,
815                          const FieldDescriptor* field, int index1, int index2);
816 
817   // Compares the specified field on the two messages, returning
818   // true if they are the same, false otherwise. For repeated fields,
819   // this method only compares the value in the specified index. This method
820   // uses Compare functions to recurse into submessages.
821   // The parent_fields vector is used in calls to a Reporter instance calls.
822   // It can be NULL, in which case the MessageDifferencer will create new
823   // list of parent messages if it needs to recursively compare the given field.
824   // To avoid confusing users you should not set it to NULL unless you modified
825   // Reporter to handle the change of parent_fields correctly.
826   bool CompareFieldValueUsingParentFields(
827       const Message& message1, const Message& message2,
828       const FieldDescriptor* field, int index1, int index2,
829       std::vector<SpecificField>* parent_fields);
830 
831   // Compares the specified field on the two messages, returning comparison
832   // result, as returned by appropriate FieldComparator.
833   FieldComparator::ComparisonResult GetFieldComparisonResult(
834       const Message& message1, const Message& message2,
835       const FieldDescriptor* field, int index1, int index2,
836       const FieldContext* field_context);
837 
838   // Check if the two elements in the repeated field are match to each other.
839   // if the key_comprator is NULL, this function returns true when the two
840   // elements are equal.
841   bool IsMatch(const FieldDescriptor* repeated_field,
842                const MapKeyComparator* key_comparator, const Message* message1,
843                const Message* message2,
844                const std::vector<SpecificField>& parent_fields,
845                Reporter* reporter, int index1, int index2);
846 
847   // Returns true when this repeated field has been configured to be treated
848   // as a Set / SmartSet / SmartList.
849   bool IsTreatedAsSet(const FieldDescriptor* field);
850   bool IsTreatedAsSmartSet(const FieldDescriptor* field);
851 
852   bool IsTreatedAsSmartList(const FieldDescriptor* field);
853   // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList
854   // by default to find the longest matching sequence from the first matching
855   // element. The callback takes two vectors showing the matching indices from
856   // the other vector, where -1 means an unmatch.
857   void SetMatchIndicesForSmartListCallback(
858       std::function<void(std::vector<int>*, std::vector<int>*)> callback);
859 
860   // Returns true when this repeated field is to be compared as a subset, ie.
861   // has been configured to be treated as a set or map and scope is set to
862   // PARTIAL.
863   bool IsTreatedAsSubset(const FieldDescriptor* field);
864 
865   // Returns true if this field is to be ignored when this
866   // MessageDifferencer compares messages.
867   bool IsIgnored(const Message& message1, const Message& message2,
868                  const FieldDescriptor* field,
869                  const std::vector<SpecificField>& parent_fields);
870 
871   // Returns true if this unknown field is to be ignored when this
872   // MessageDifferencer compares messages.
873   bool IsUnknownFieldIgnored(const Message& message1, const Message& message2,
874                              const SpecificField& field,
875                              const std::vector<SpecificField>& parent_fields);
876 
877   // Returns MapKeyComparator* when this field has been configured to be treated
878   // as a map or its is_map() return true.  If not, returns NULL.
879   const MapKeyComparator* GetMapKeyComparator(
880       const FieldDescriptor* field) const;
881 
882   // Attempts to match indices of a repeated field, so that the contained values
883   // match. Clears output vectors and sets their values to indices of paired
884   // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1
885   // and match_list2[1] == 0. The unmatched indices are indicated by -1.
886   // Assumes the repeated field is not treated as a simple list.
887   // This method returns false if the match failed. However, it doesn't mean
888   // that the comparison succeeds when this method returns true (you need to
889   // double-check in this case).
890   bool MatchRepeatedFieldIndices(
891       const Message& message1, const Message& message2,
892       const FieldDescriptor* repeated_field,
893       const MapKeyComparator* key_comparator,
894       const std::vector<SpecificField>& parent_fields,
895       std::vector<int>* match_list1, std::vector<int>* match_list2);
896 
897   // Checks if index is equal to new_index in all the specific fields.
898   static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields);
899 
900   // CHECKs that the given repeated field can be compared according to
901   // new_comparison.
902   void CheckRepeatedFieldComparisons(
903       const FieldDescriptor* field,
904       const RepeatedFieldComparison& new_comparison);
905 
906   // Defines a map between field descriptors and their MapKeyComparators.
907   // Used for repeated fields when they are configured as TreatAsMap.
908   typedef std::map<const FieldDescriptor*, const MapKeyComparator*>
909       FieldKeyComparatorMap;
910 
911   // Defines a set to store field descriptors.  Used for repeated fields when
912   // they are configured as TreatAsSet.
913   typedef std::set<const FieldDescriptor*> FieldSet;
914   typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap;
915 
916   Reporter* reporter_;
917   DefaultFieldComparator default_field_comparator_;
918   MessageFieldComparison message_field_comparison_;
919   Scope scope_;
920   RepeatedFieldComparison repeated_field_comparison_;
921 
922   FieldMap repeated_field_comparisons_;
923   // Keeps track of MapKeyComparators that are created within
924   // MessageDifferencer. These MapKeyComparators should be deleted
925   // before MessageDifferencer is destroyed.
926   // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't
927   // store the supplied FieldDescriptors directly. Instead, a new
928   // MapKeyComparator is created for comparison purpose.
929   std::vector<MapKeyComparator*> owned_key_comparators_;
930   FieldKeyComparatorMap map_field_key_comparator_;
931   MapEntryKeyComparator map_entry_key_comparator_;
932   std::vector<IgnoreCriteria*> ignore_criteria_;
933   // Reused multiple times in RetrieveFields to avoid extra allocations
934   std::vector<const FieldDescriptor*> tmp_message_fields_;
935 
936   FieldSet ignored_fields_;
937 
938   union {
939     DefaultFieldComparator* default_impl;
940     FieldComparator* base;
941   } field_comparator_ = {&default_field_comparator_};
942   enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault;
943 
944   bool report_matches_;
945   bool report_moves_;
946   bool report_ignores_;
947 
948   std::string* output_string_;
949 
950   // Callback to post-process the matched indices to support SMART_LIST.
951   std::function<void(std::vector<int>*, std::vector<int>*)>
952       match_indices_for_smart_list_callback_;
953 
954   MessageDifferencer::UnpackAnyField unpack_any_field_;
955   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer);
956 };
957 
958 // This class provides extra information to the FieldComparator::Compare
959 // function.
960 class PROTOBUF_EXPORT FieldContext {
961  public:
FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)962   explicit FieldContext(
963       std::vector<MessageDifferencer::SpecificField>* parent_fields)
964       : parent_fields_(parent_fields) {}
965 
parent_fields()966   std::vector<MessageDifferencer::SpecificField>* parent_fields() const {
967     return parent_fields_;
968   }
969 
970  private:
971   std::vector<MessageDifferencer::SpecificField>* parent_fields_;
972 };
973 
974 }  // namespace util
975 }  // namespace protobuf
976 }  // namespace google
977 
978 #include <google/protobuf/port_undef.inc>
979 
980 #endif  // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
981