• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 //
4 // Use of this source code is governed by a BSD-style
5 // license that can be found in the LICENSE file or at
6 // https://developers.google.com/open-source/licenses/bsd
7 
8 // Author: jschorr@google.com (Joseph Schorr)
9 //  Based on original Protocol Buffers design by
10 //  Sanjay Ghemawat, Jeff Dean, and others.
11 //
12 // This file defines static methods and classes for comparing Protocol
13 // Messages.
14 //
15 // Aug. 2008: Added Unknown Fields Comparison for messages.
16 // Aug. 2009: Added different options to compare repeated fields.
17 // Apr. 2010: Moved field comparison to FieldComparator
18 // Sep. 2020: Added option to output map keys in path
19 
20 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
21 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
22 
23 #include <functional>
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include "absl/base/macros.h"
29 #include "absl/container/fixed_array.h"
30 #include "absl/container/flat_hash_map.h"
31 #include "absl/container/flat_hash_set.h"
32 #include "absl/log/absl_check.h"
33 #include "google/protobuf/descriptor.h"  // FieldDescriptor
34 #include "google/protobuf/message.h"     // Message
35 #include "google/protobuf/text_format.h"
36 #include "google/protobuf/unknown_field_set.h"
37 #include "google/protobuf/util/field_comparator.h"
38 
39 // Always include as last one, otherwise it can break compilation
40 #include "google/protobuf/port_def.inc"
41 
42 namespace google {
43 namespace protobuf {
44 
45 class DynamicMessageFactory;
46 class FieldDescriptor;
47 
48 namespace io {
49 class ZeroCopyOutputStream;
50 class Printer;
51 }  // namespace io
52 
53 namespace util {
54 
55 class DefaultFieldComparator;
56 class FieldContext;  // declared below MessageDifferencer
57 
58 // A basic differencer that can be used to determine
59 // the differences between two specified Protocol Messages. If any differences
60 // are found, the Compare method will return false, and any differencer reporter
61 // specified via ReportDifferencesTo will have its reporting methods called (see
62 // below for implementation of the report). Based off of the original
63 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h
64 // (Thanks Todd!).
65 //
66 // MessageDifferencer REQUIRES that compared messages be the same type, defined
67 // as messages that share the same descriptor.  If not, the behavior of this
68 // class is undefined.
69 //
70 // People disagree on what MessageDifferencer should do when asked to compare
71 // messages with different descriptors.  Some people think it should always
72 // return false.  Others expect it to try to look for similar fields and
73 // compare them anyway -- especially if the descriptors happen to be identical.
74 // If we chose either of these behaviors, some set of people would find it
75 // surprising, and could end up writing code expecting the other behavior
76 // without realizing their error.  Therefore, we forbid that usage.
77 //
78 // This class is implemented based on the proto2 reflection. The performance
79 // should be good enough for normal usages. However, for places where the
80 // performance is extremely sensitive, there are several alternatives:
81 // - Comparing serialized string
82 // Downside: false negatives (there are messages that are the same but their
83 // serialized strings are different).
84 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin)
85 // Downside: more generated code; maintenance overhead for the additional rule
86 // (must be in sync with the original proto_library).
87 //
88 // Note on handling of google.protobuf.Any: MessageDifferencer automatically
89 // unpacks Any::value into a Message and compares its individual fields.
90 // Messages encoded in a repeated Any cannot be compared using TreatAsMap.
91 //
92 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to
93 // guard it with a lock to use the same MessageDifferencer instance from
94 // multiple threads. Note that it's fine to call static comparison methods
95 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for
96 // performance critical code as it leads to extra allocations.
97 class PROTOBUF_EXPORT MessageDifferencer {
98  public:
99   // Determines whether the supplied messages are equal. Equality is defined as
100   // all fields within the two messages being set to the same value. Primitive
101   // fields and strings are compared by value while embedded messages/groups
102   // are compared as if via a recursive call. Use Compare() with IgnoreField()
103   // if some fields should be ignored in the comparison. Use Compare() with
104   // TreatAsSet() if there are repeated fields where ordering does not matter.
105   //
106   // This method REQUIRES that the two messages have the same
107   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
108   static bool Equals(const Message& message1, const Message& message2);
109 
110   // Determines whether the supplied messages are equivalent. Equivalency is
111   // defined as all fields within the two messages having the same value. This
112   // differs from the Equals method above in that fields with default values
113   // are considered set to said value automatically. For details on how default
114   // values are defined for each field type, see:
115   // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional.
116   // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare()
117   // if some fields should be ignored in the comparison.
118   //
119   // This method REQUIRES that the two messages have the same
120   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
121   static bool Equivalent(const Message& message1, const Message& message2);
122 
123   // Determines whether the supplied messages are approximately equal.
124   // Approximate equality is defined as all fields within the two messages
125   // being approximately equal.  Primitive (non-float) fields and strings are
126   // compared by value, floats are compared using MathUtil::AlmostEquals() and
127   // embedded messages/groups are compared as if via a recursive call. Use
128   // IgnoreField() and Compare() if some fields should be ignored in the
129   // comparison.
130   //
131   // This method REQUIRES that the two messages have the same
132   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
133   static bool ApproximatelyEquals(const Message& message1,
134                                   const Message& message2);
135 
136   // Determines whether the supplied messages are approximately equivalent.
137   // Approximate equivalency is defined as all fields within the two messages
138   // being approximately equivalent. As in
139   // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and
140   // strings are compared by value, floats are compared using
141   // MathUtil::AlmostEquals() and embedded messages/groups are compared as if
142   // via a recursive call. However, fields with default values are considered
143   // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField()
144   // and Compare() if some fields should be ignored in the comparison.
145   //
146   // This method REQUIRES that the two messages have the same
147   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
148   static bool ApproximatelyEquivalent(const Message& message1,
149                                       const Message& message2);
150 
151   // Identifies an individual field in a message instance.  Used for field_path,
152   // below.
153   struct SpecificField {
154     // The messages that contain this field. They are always set. They are valid
155     // only during a call to Reporter::Report* methods.
156     //
157     // If the original messages are of type google.protobuf.Any, these fields
158     // will store the unpacked payloads, and unpacked_any will become > 0.  More
159     // precisely, unpacked_any defines the nesting level of Any.  For example,
160     // if the original message packs another Any, then unpacked_any=2, assuming
161     // the differencer unpacked both of them.
162     //
163     // When an Any object packs a non-Any proto object whose field includes
164     // Any, then unpacked_any=1. Thus, in most practical applications,
165     // unpacked_any will be 0 or 1.
166     const Message* message1 = nullptr;
167     const Message* message2 = nullptr;
168     int unpacked_any = 0;
169 
170     // For known fields, "field" is filled in and "unknown_field_number" is -1.
171     // For unknown fields, "field" is NULL, "unknown_field_number" is the field
172     // number, and "unknown_field_type" is its type.
173     const FieldDescriptor* field = nullptr;
174     int unknown_field_number = -1;
175     UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT;
176 
177     // If this a repeated field, "index" is the index within it.  For unknown
178     // fields, this is the index of the field among all unknown fields of the
179     // same field number and type.
180     int index = -1;
181 
182     // If "field" is a repeated field which is being treated as a map or
183     // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates
184     // the index the position to which the element has moved.  If the element
185     // has not moved, "new_index" will have the same value as "index".
186     int new_index = -1;
187 
188     // If "field" is a map field, point to the map entry.
189     const Message* map_entry1 = nullptr;
190     const Message* map_entry2 = nullptr;
191 
192     // For unknown fields, these are the pointers to the UnknownFieldSet
193     // containing the unknown fields. In certain cases (e.g. proto1's
194     // MessageSet, or nested groups of unknown fields), these may differ from
195     // the messages' internal UnknownFieldSets.
196     const UnknownFieldSet* unknown_field_set1 = nullptr;
197     const UnknownFieldSet* unknown_field_set2 = nullptr;
198 
199     // For unknown fields, these are the index of the field within the
200     // UnknownFieldSets. One or the other will be -1 when
201     // reporting an addition or deletion.
202     int unknown_field_index1 = -1;
203     int unknown_field_index2 = -1;
204 
205     // Was this field added to the diffing because set_force_compare_no_presence
206     // was called on the MessageDifferencer object.
207     bool forced_compare_no_presence_ = false;
208   };
209 
210   // Abstract base class from which all MessageDifferencer
211   // reporters derive. The five Report* methods below will be called when
212   // a field has been added, deleted, modified, moved, or matched. The third
213   // argument is a vector of FieldDescriptor pointers which describes the chain
214   // of fields that was taken to find the current field. For example, for a
215   // field found in an embedded message, the vector will contain two
216   // FieldDescriptors. The first will be the field of the embedded message
217   // itself and the second will be the actual field in the embedded message
218   // that was added/deleted/modified.
219   // Fields will be reported in PostTraversalOrder.
220   // For example, given following proto, if both baz and mooo are changed.
221   // foo {
222   //   bar {
223   //     baz: 1
224   //     mooo: 2
225   //   }
226   // }
227   // ReportModified will be invoked with following order:
228   // 1. foo.bar.baz or foo.bar.mooo
229   // 2. foo.bar.mooo or foo.bar.baz
230   // 2. foo.bar
231   // 3. foo
232   class PROTOBUF_EXPORT Reporter {
233    public:
234     Reporter();
235     Reporter(const Reporter&) = delete;
236     Reporter& operator=(const Reporter&) = delete;
237     virtual ~Reporter();
238 
239     // Reports that a field has been added into Message2.
ReportAdded(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)240     virtual void ReportAdded(const Message& message1, const Message& message2,
241                              const std::vector<SpecificField>& field_path) {}
242 
243     // Reports that a field has been deleted from Message1.
ReportDeleted(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)244     virtual void ReportDeleted(const Message& message1, const Message& message2,
245                                const std::vector<SpecificField>& field_path) {}
246 
247     // Reports that the value of a field has been modified.
ReportModified(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)248     virtual void ReportModified(const Message& message1,
249                                 const Message& message2,
250                                 const std::vector<SpecificField>& field_path) {}
251 
252     // Reports that a repeated field has been moved to another location.  This
253     // only applies when using TreatAsSet or TreatAsMap()  -- see below. Also
254     // note that for any given field, ReportModified and ReportMoved are
255     // mutually exclusive. If a field has been both moved and modified, then
256     // only ReportModified will be called.
ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)257     virtual void ReportMoved(
258         const Message& /* message1 */, const Message& /* message2 */,
259         const std::vector<SpecificField>& /* field_path */) {}
260 
261     // Reports that two fields match. Useful for doing side-by-side diffs.
262     // This function is mutually exclusive with ReportModified and ReportMoved.
263     // Note that you must call set_report_matches(true) before calling Compare
264     // to make use of this function.
ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)265     virtual void ReportMatched(
266         const Message& /* message1 */, const Message& /* message2 */,
267         const std::vector<SpecificField>& /* field_path */) {}
268 
269     // Reports that two fields would have been compared, but the
270     // comparison has been skipped because the field was marked as
271     // 'ignored' using IgnoreField().  This function is mutually
272     // exclusive with all the other Report() functions.
273     //
274     // The contract of ReportIgnored is slightly different than the
275     // other Report() functions, in that |field_path.back().index| is
276     // always equal to -1, even if the last field is repeated. This is
277     // because while the other Report() functions indicate where in a
278     // repeated field the action (Addition, Deletion, etc...)
279     // happened, when a repeated field is 'ignored', the differencer
280     // simply calls ReportIgnored on the repeated field as a whole and
281     // moves on without looking at its individual elements.
282     //
283     // Furthermore, ReportIgnored() does not indicate whether the
284     // fields were in fact equal or not, as Compare() does not inspect
285     // these fields at all. It is up to the Reporter to decide whether
286     // the fields are equal or not (perhaps with a second call to
287     // Compare()), if it cares.
ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)288     virtual void ReportIgnored(
289         const Message& /* message1 */, const Message& /* message2 */,
290         const std::vector<SpecificField>& /* field_path */) {}
291 
292     // Report that an unknown field is ignored. (see comment above).
293     // Note this is a different function since the last SpecificField in field
294     // path has a null field.  This could break existing Reporter.
ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)295     virtual void ReportUnknownFieldIgnored(
296         const Message& /* message1 */, const Message& /* message2 */,
297         const std::vector<SpecificField>& /* field_path */) {}
298   };
299 
300   // MapKeyComparator is used to determine if two elements have the same key
301   // when comparing elements of a repeated field as a map.
302   class PROTOBUF_EXPORT MapKeyComparator {
303    public:
304     MapKeyComparator();
305     MapKeyComparator(const MapKeyComparator&) = delete;
306     MapKeyComparator& operator=(const MapKeyComparator&) = delete;
307     virtual ~MapKeyComparator();
308 
309     // This method should be overridden by every implementation.  The arg
310     // unmapped_any is nonzero the original messages provided by the user are of
311     // type google.protobuf.Any.
312     //
313     // More precisely, unpacked_any defines the nesting level of Any.  For
314     // example, if Any packs another Any then unpacked_any=2, assuming the
315     // patcher unpacked both.  Note that when an Any object packs a non-Any
316     // proto object whose field includes Any, then unpacked_any=1. Thus, in most
317     // practical applications, unpacked_any will be 0 or 1.
IsMatch(const Message & message1,const Message & message2,int,const std::vector<SpecificField> & fields)318     virtual bool IsMatch(const Message& message1, const Message& message2,
319                          int /* unmapped_any */,
320                          const std::vector<SpecificField>& fields) const {
321       ABSL_CHECK(false) << "IsMatch() is not implemented.";
322       return false;
323     }
324   };
325 
326   // Abstract base class from which all IgnoreCriteria derive.
327   // By adding IgnoreCriteria more complex ignore logic can be implemented.
328   // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared
329   // field IsIgnored is called on each added IgnoreCriteria until one returns
330   // true or all return false.
331   // IsIgnored is called for fields where at least one side has a value.
332   class PROTOBUF_EXPORT IgnoreCriteria {
333    public:
334     IgnoreCriteria();
335     virtual ~IgnoreCriteria();
336 
337     // Returns true if the field should be ignored.
338     virtual bool IsIgnored(
339         const Message& /* message1 */, const Message& /* message2 */,
340         const FieldDescriptor* /* field */,
341         const std::vector<SpecificField>& /* parent_fields */) = 0;
342 
343     // Returns true if the unknown field should be ignored.
344     // Note: This will be called for unknown fields as well in which case
345     //       field.field will be null.
IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)346     virtual bool IsUnknownFieldIgnored(
347         const Message& /* message1 */, const Message& /* message2 */,
348         const SpecificField& /* field */,
349         const std::vector<SpecificField>& /* parent_fields */) {
350       return false;
351     }
352   };
353 
354   // To add a Reporter, construct default here, then use ReportDifferencesTo or
355   // ReportDifferencesToString.
356   explicit MessageDifferencer();
357   MessageDifferencer(const MessageDifferencer&) = delete;
358   MessageDifferencer& operator=(const MessageDifferencer&) = delete;
359 
360   ~MessageDifferencer();
361 
362   enum MessageFieldComparison {
363     EQUAL,       // Fields must be present in both messages
364                  // for the messages to be considered the same.
365     EQUIVALENT,  // Fields with default values are considered set
366                  // for comparison purposes even if not explicitly
367                  // set in the messages themselves.  Unknown fields
368                  // are ignored.
369   };
370 
371   enum Scope {
372     FULL,    // All fields of both messages are considered in the comparison.
373     PARTIAL  // Only fields present in the first message are considered; fields
374              // set only in the second message will be skipped during
375              // comparison.
376   };
377 
378   // DEPRECATED. Use FieldComparator::FloatComparison instead.
379   enum FloatComparison {
380     EXACT,       // Floats and doubles are compared exactly.
381     APPROXIMATE  // Floats and doubles are compared using the
382                  // MathUtil::AlmostEquals method.
383   };
384 
385   enum RepeatedFieldComparison {
386     AS_LIST,  // Repeated fields are compared in order.  Differing values at
387               // the same index are reported using ReportModified().  If the
388               // repeated fields have different numbers of elements, the
389               // unpaired elements are reported using ReportAdded() or
390               // ReportDeleted().
391     AS_SET,   // Treat all the repeated fields as sets.
392               // See TreatAsSet(), as below.
393     AS_SMART_LIST,  // Similar to AS_SET, but preserve the order and find the
394                     // longest matching sequence from the first matching
395                     // element. To use an optimal solution, call
396                     // SetMatchIndicesForSmartListCallback() to pass it in.
397     AS_SMART_SET,   // Similar to AS_SET, but match elements with fewest diffs.
398   };
399 
400   // The elements of the given repeated field will be treated as a set for
401   // diffing purposes, so different orderings of the same elements will be
402   // considered equal.  Elements which are present on both sides of the
403   // comparison but which have changed position will be reported with
404   // ReportMoved().  Elements which only exist on one side or the other are
405   // reported with ReportAdded() and ReportDeleted() regardless of their
406   // positions.  ReportModified() is never used for this repeated field.  If
407   // the only differences between the compared messages is that some fields
408   // have been moved, then the comparison returns true.
409   //
410   // Note that despite the name of this method, this is really
411   // comparison as multisets: if one side of the comparison has a duplicate
412   // in the repeated field but the other side doesn't, this will count as
413   // a mismatch.
414   //
415   // If the scope of comparison is set to PARTIAL, then in addition to what's
416   // above, extra values added to repeated fields of the second message will
417   // not cause the comparison to fail.
418   //
419   // Note that set comparison is currently O(k * n^2) (where n is the total
420   // number of elements, and k is the average size of each element). In theory
421   // it could be made O(n * k) with a more complex hashing implementation. Feel
422   // free to contribute one if the current implementation is too slow for you.
423   // If partial matching is also enabled, the time complexity will be O(k * n^2
424   // + n^3) in which n^3 is the time complexity of the maximum matching
425   // algorithm.
426   //
427   // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
428   void TreatAsSet(const FieldDescriptor* field);
429   void TreatAsSmartSet(const FieldDescriptor* field);
430 
431   // The elements of the given repeated field will be treated as a list for
432   // diffing purposes, so different orderings of the same elements will NOT be
433   // considered equal.
434   //
435   // REQUIRES: field->is_repeated() and field not registered with TreatAsMap*
436   void TreatAsList(const FieldDescriptor* field);
437   // Note that the complexity is similar to treating as SET.
438   void TreatAsSmartList(const FieldDescriptor* field);
439 
440   // The elements of the given repeated field will be treated as a map for
441   // diffing purposes, with |key| being the map key.  Thus, elements with the
442   // same key will be compared even if they do not appear at the same index.
443   // Differences are reported similarly to TreatAsSet(), except that
444   // ReportModified() is used to report elements with the same key but
445   // different values.  Note that if an element is both moved and modified,
446   // only ReportModified() will be called.  As with TreatAsSet, if the only
447   // differences between the compared messages is that some fields have been
448   // moved, then the comparison returns true. See TreatAsSet for notes on
449   // performance.
450   //
451   // REQUIRES:  field->is_repeated()
452   // REQUIRES:  field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
453   // REQUIRES:  key->containing_type() == field->message_type()
454   void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key);
455   // Same as TreatAsMap except that this method will use multiple fields as
456   // the key in comparison. All specified fields in 'key_fields' should be
457   // present in the compared elements. Two elements will be treated as having
458   // the same key iff they have the same value for every specified field. There
459   // are two steps in the comparison process. The first one is key matching.
460   // Every element from one message will be compared to every element from
461   // the other message. Only fields in 'key_fields' are compared in this step
462   // to decide if two elements have the same key. The second step is value
463   // comparison. Those pairs of elements with the same key (with equal value
464   // for every field in 'key_fields') will be compared in this step.
465   // Time complexity of the first step is O(s * m * n ^ 2) where s is the
466   // average size of the fields specified in 'key_fields', m is the number of
467   // fields in 'key_fields' and n is the number of elements. If partial
468   // matching is enabled, an extra O(n^3) will be incured by the maximum
469   // matching algorithm. The second step is O(k * n) where k is the average
470   // size of each element.
471   void TreatAsMapWithMultipleFieldsAsKey(
472       const FieldDescriptor* field,
473       const std::vector<const FieldDescriptor*>& key_fields);
474   // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field
475   // do not necessarily need to be a direct subfield. Each element in
476   // key_field_paths indicate a path from the message being compared, listing
477   // successive subfield to reach the key field.
478   //
479   // REQUIRES:
480   //   for key_field_path in key_field_paths:
481   //     key_field_path[0]->containing_type() == field->message_type()
482   //     for i in [0, key_field_path.size() - 1):
483   //       key_field_path[i+1]->containing_type() ==
484   //           key_field_path[i]->message_type()
485   //       key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
486   //       !key_field_path[i]->is_repeated()
487   void TreatAsMapWithMultipleFieldPathsAsKey(
488       const FieldDescriptor* field,
489       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
490 
491   // Uses a custom MapKeyComparator to determine if two elements have the same
492   // key when comparing a repeated field as a map.
493   // The caller is responsible to delete the key_comparator.
494   // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the
495   // first key matching step. Rather than comparing some specified fields, it
496   // will invoke the IsMatch method of the given 'key_comparator' to decide if
497   // two elements have the same key.
498   void TreatAsMapUsingKeyComparator(const FieldDescriptor* field,
499                                     const MapKeyComparator* key_comparator);
500 
501   // Initiates and returns a new instance of MultipleFieldsMapKeyComparator.
502   MapKeyComparator* CreateMultipleFieldsMapKeyComparator(
503       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
504 
505   // Add a custom ignore criteria that is evaluated in addition to the
506   // ignored fields added with IgnoreField.
507   // Takes ownership of ignore_criteria.
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)508   void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
509     AddIgnoreCriteria(absl::WrapUnique(ignore_criteria));
510   }
511   void AddIgnoreCriteria(std::unique_ptr<IgnoreCriteria> ignore_criteria);
512 
513   // Indicates that any field with the given descriptor should be
514   // ignored for the purposes of comparing two messages. This applies
515   // to fields nested in the message structure as well as top level
516   // ones. When the MessageDifferencer encounters an ignored field,
517   // ReportIgnored is called on the reporter, if one is specified.
518   //
519   // The only place where the field's 'ignored' status is not applied is when
520   // it is being used as a key in a field passed to TreatAsMap or is one of
521   // the fields passed to TreatAsMapWithMultipleFieldsAsKey.
522   // In this case it is compared in key matching but after that it's ignored
523   // in value comparison.
524   void IgnoreField(const FieldDescriptor* field);
525 
526   // Sets the field comparator used to determine differences between protocol
527   // buffer fields. By default it's set to a DefaultFieldComparator instance.
528   // MessageDifferencer doesn't take ownership over the passed object.
529   // Note that this method must be called before Compare for the comparator to
530   // be used.
531   void set_field_comparator(FieldComparator* comparator);
532   void set_field_comparator(DefaultFieldComparator* comparator);
533 
534   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
535   // Sets the fraction and margin for the float comparison of a given field.
536   // Uses MathUtil::WithinFractionOrMargin to compare the values.
537   // NOTE: this method does nothing if differencer's field comparator has been
538   //       set to a custom object.
539   //
540   // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or
541   //           field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT
542   // REQUIRES: float_comparison_ == APPROXIMATE
543   void SetFractionAndMargin(const FieldDescriptor* field, double fraction,
544                             double margin);
545 
546   // Sets the type of comparison (as defined in the MessageFieldComparison
547   // enumeration above) that is used by this differencer when determining how
548   // to compare fields in messages.
549   void set_message_field_comparison(MessageFieldComparison comparison);
550 
551   // Returns the current message field comparison used in this differencer.
552   MessageFieldComparison message_field_comparison() const;
553 
554   // Tells the differencer whether or not to report matches. This method must
555   // be called before Compare. The default for a new differencer is false.
set_report_matches(bool report_matches)556   void set_report_matches(bool report_matches) {
557     report_matches_ = report_matches;
558   }
559 
560   // Tells the differencer whether or not to report moves (in a set or map
561   // repeated field). This method must be called before Compare. The default for
562   // a new differencer is true.
set_report_moves(bool report_moves)563   void set_report_moves(bool report_moves) { report_moves_ = report_moves; }
564 
565   // Tells the differencer whether or not to report ignored values. This method
566   // must be called before Compare. The default for a new differencer is true.
set_report_ignores(bool report_ignores)567   void set_report_ignores(bool report_ignores) {
568     report_ignores_ = report_ignores;
569   }
570 
571   // Sets the scope of the comparison (as defined in the Scope enumeration
572   // above) that is used by this differencer when determining which fields to
573   // compare between the messages.
574   void set_scope(Scope scope);
575 
576   // Returns the current scope used by this differencer.
577   Scope scope() const;
578 
579   // Only affects PARTIAL diffing. When set, all non-repeated no-presence fields
580   // which are set to their default value (which is the same as being unset) in
581   // message1 but are set to a non-default value in message2 will also be used
582   // in the comparison.
583   void set_force_compare_no_presence(bool value);
584 
585   // If set, the fields in message1 that equal the fields passed here will be
586   // treated as required for comparison, even if they are absent.
set_require_no_presence_fields(const google::protobuf::TextFormat::Parser::UnsetFieldsMetadata & fields)587   void set_require_no_presence_fields(
588       const google::protobuf::TextFormat::Parser::UnsetFieldsMetadata& fields) {
589     require_no_presence_fields_ = fields;
590   }
591 
592   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
593   // Sets the type of comparison (as defined in the FloatComparison enumeration
594   // above) that is used by this differencer when comparing float (and double)
595   // fields in messages.
596   // NOTE: this method does nothing if differencer's field comparator has been
597   //       set to a custom object.
598   void set_float_comparison(FloatComparison comparison);
599 
600   // Sets the type of comparison for repeated field (as defined in the
601   // RepeatedFieldComparison enumeration above) that is used by this
602   // differencer when compare repeated fields in messages.
603   void set_repeated_field_comparison(RepeatedFieldComparison comparison);
604 
605   // Returns the current repeated field comparison used by this differencer.
606   RepeatedFieldComparison repeated_field_comparison() const;
607 
608   // Compares the two specified messages, returning true if they are the same,
609   // false otherwise. If this method returns false, any changes between the
610   // two messages will be reported if a Reporter was specified via
611   // ReportDifferencesTo (see also ReportDifferencesToString).
612   //
613   // This method REQUIRES that the two messages have the same
614   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
615   bool Compare(const Message& message1, const Message& message2);
616 
617   // Same as above, except comparing only the list of fields specified by the
618   // two vectors of FieldDescriptors.
619   bool CompareWithFields(
620       const Message& message1, const Message& message2,
621       const std::vector<const FieldDescriptor*>& message1_fields,
622       const std::vector<const FieldDescriptor*>& message2_fields);
623 
624   // Automatically creates a reporter that will output the differences
625   // found (if any) to the specified output string pointer. Note that this
626   // method must be called before Compare.
627   void ReportDifferencesToString(std::string* output);
628 
629   // Tells the MessageDifferencer to report differences via the specified
630   // reporter. Note that this method must be called before Compare for
631   // the reporter to be used. It is the responsibility of the caller to delete
632   // this object.
633   // If the provided pointer equals NULL, the MessageDifferencer stops reporting
634   // differences to any previously set reporters or output strings.
635   void ReportDifferencesTo(Reporter* reporter);
636 
637   // Returns the list of fields which was automatically added to the list of
638   // compared fields by calling set_force_compare_no_presence and caused the
639   // last call to Compare to fail.
NoPresenceFieldsCausingFailure()640   const absl::flat_hash_set<std::string>& NoPresenceFieldsCausingFailure() {
641     return force_compare_failure_triggering_fields_;
642   }
643 
644  private:
645   // Class for processing Any deserialization.  This logic is used by both the
646   // MessageDifferencer and StreamReporter classes.
647   class UnpackAnyField {
648    private:
649     std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_;
650 
651    public:
652     UnpackAnyField() = default;
653     ~UnpackAnyField() = default;
654     // If "any" is of type google.protobuf.Any, extract its payload using
655     // DynamicMessageFactory and store in "data".
656     bool UnpackAny(const Message& any, std::unique_ptr<Message>* data);
657   };
658 
659  public:
660   // An implementation of the MessageDifferencer Reporter that outputs
661   // any differences found in human-readable form to the supplied
662   // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter
663   // *must* be '$'.
664   //
665   // WARNING: this reporter does not necessarily flush its output until it is
666   // destroyed. As a result, it is not safe to assume the output is valid or
667   // complete until after you destroy the reporter. For example, if you use a
668   // StreamReporter to write to a StringOutputStream, the target string may
669   // contain uninitialized data until the reporter is destroyed.
670   class PROTOBUF_EXPORT StreamReporter : public Reporter {
671    public:
672     explicit StreamReporter(io::ZeroCopyOutputStream* output);
673     explicit StreamReporter(io::Printer* printer);  // delimiter '$'
674     StreamReporter(const StreamReporter&) = delete;
675     StreamReporter& operator=(const StreamReporter&) = delete;
676     ~StreamReporter() override;
677 
678     // When set to true, the stream reporter will also output aggregates nodes
679     // (i.e. messages and groups) whose subfields have been modified. When
680     // false, will only report the individual subfields. Defaults to false.
set_report_modified_aggregates(bool report)681     void set_report_modified_aggregates(bool report) {
682       report_modified_aggregates_ = report;
683     }
684 
685     // The following are implementations of the methods described above.
686 
687     void ReportAdded(const Message& message1, const Message& message2,
688                      const std::vector<SpecificField>& field_path) override;
689 
690     void ReportDeleted(const Message& message1, const Message& message2,
691                        const std::vector<SpecificField>& field_path) override;
692 
693     void ReportModified(const Message& message1, const Message& message2,
694                         const std::vector<SpecificField>& field_path) override;
695 
696     void ReportMoved(const Message& message1, const Message& message2,
697                      const std::vector<SpecificField>& field_path) override;
698 
699     void ReportMatched(const Message& message1, const Message& message2,
700                        const std::vector<SpecificField>& field_path) override;
701 
702     void ReportIgnored(const Message& message1, const Message& message2,
703                        const std::vector<SpecificField>& field_path) override;
704 
705     void ReportUnknownFieldIgnored(
706         const Message& message1, const Message& message2,
707         const std::vector<SpecificField>& field_path) override;
708 
709     // Messages that are being compared must be provided to StreamReporter prior
710     // to processing
711     void SetMessages(const Message& message1, const Message& message2);
712 
713    protected:
714     // Prints the specified path of fields to the buffer.
715     virtual void PrintPath(const std::vector<SpecificField>& field_path,
716                            bool left_side);
717 
718     // Prints the value of fields to the buffer.  left_side is true if the
719     // given message is from the left side of the comparison, false if it
720     // was the right.  This is relevant only to decide whether to follow
721     // unknown_field_index1 or unknown_field_index2 when an unknown field
722     // is encountered in field_path.
723     virtual void PrintValue(const Message& message,
724                             const std::vector<SpecificField>& field_path,
725                             bool left_side);
726 
727     // Prints the specified path of unknown fields to the buffer.
728     virtual void PrintUnknownFieldValue(const UnknownField* unknown_field);
729 
730     // Just print a string
731     void Print(const std::string& str);
732 
733    private:
734     // helper function for PrintPath that contains logic for printing maps
735     void PrintMapKey(bool left_side, const SpecificField& specific_field);
736 
737     io::Printer* printer_;
738     bool delete_printer_;
739     bool report_modified_aggregates_;
740     const Message* message1_;
741     const Message* message2_;
742     MessageDifferencer::UnpackAnyField unpack_any_field_;
743   };
744 
745  private:
746   friend class SimpleFieldComparator;
747 
748   // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator.
749   // Implementation of this class needs to do field value comparison which
750   // relies on some private methods of MessageDifferencer. That's why this
751   // class is declared as a nested class of MessageDifferencer.
752   class MultipleFieldsMapKeyComparator;
753 
754   // A MapKeyComparator for use with map_entries.
755   class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator {
756    public:
757     explicit MapEntryKeyComparator(MessageDifferencer* message_differencer);
758     bool IsMatch(
759         const Message& message1, const Message& message2, int unpacked_any,
760         const std::vector<SpecificField>& parent_fields) const override;
761 
762    private:
763     MessageDifferencer* message_differencer_;
764   };
765 
766   // Returns true if field1's number() is less than field2's.
767   static bool FieldBefore(const FieldDescriptor* field1,
768                           const FieldDescriptor* field2);
769 
770   // Retrieve all the set fields, including extensions.
771   std::vector<const FieldDescriptor*> RetrieveFields(const Message& message,
772                                                      bool base_message);
773 
774   // Combine the two lists of fields into the combined_fields output vector.
775   // All fields present in both lists will always be included in the combined
776   // list.  Fields only present in one of the lists will only appear in the
777   // combined list if the corresponding fields_scope option is set to FULL.
778   std::vector<const FieldDescriptor*> CombineFields(
779       const Message& message1,
780       const std::vector<const FieldDescriptor*>& fields1, Scope fields1_scope,
781       const std::vector<const FieldDescriptor*>& fields2, Scope fields2_scope);
782 
783   // Internal version of the Compare method which performs the actual
784   // comparison. The parent_fields vector is a vector containing field
785   // descriptors of all fields accessed to get to this comparison operation
786   // (i.e. if the current message is an embedded message, the parent_fields
787   // vector will contain the field that has this embedded message).
788   bool Compare(const Message& message1, const Message& message2,
789                int unpacked_any, std::vector<SpecificField>* parent_fields);
790 
791   // Compares all the unknown fields in two messages.
792   bool CompareUnknownFields(const Message& message1, const Message& message2,
793                             const UnknownFieldSet&, const UnknownFieldSet&,
794                             std::vector<SpecificField>* parent_fields);
795 
796   // Compares the specified messages for the requested field lists. The field
797   // lists are modified depending on comparison settings, and then passed to
798   // CompareWithFieldsInternal.
799   bool CompareRequestedFieldsUsingSettings(
800       const Message& message1, const Message& message2, int unpacked_any,
801       const std::vector<const FieldDescriptor*>& message1_fields,
802       const std::vector<const FieldDescriptor*>& message2_fields,
803       std::vector<SpecificField>* parent_fields);
804 
805   // Compares the specified messages with the specified field lists.
806   bool CompareWithFieldsInternal(
807       const Message& message1, const Message& message2, int unpacked_any,
808       const std::vector<const FieldDescriptor*>& message1_fields,
809       const std::vector<const FieldDescriptor*>& message2_fields,
810       std::vector<SpecificField>* parent_fields);
811 
812   // Compares the repeated fields, and report the error.
813   bool CompareRepeatedField(const Message& message1, const Message& message2,
814                             int unpacked_any, const FieldDescriptor* field,
815                             std::vector<SpecificField>* parent_fields);
816 
817   // Compares map fields, and report the error.
818   bool CompareMapField(const Message& message1, const Message& message2,
819                        int unpacked_any, const FieldDescriptor* field,
820                        std::vector<SpecificField>* parent_fields);
821 
822   // Helper for CompareRepeatedField and CompareMapField: compares and reports
823   // differences element-wise. This is the implementation for non-map fields,
824   // and can also compare map fields by using the underlying representation.
825   bool CompareRepeatedRep(const Message& message1, const Message& message2,
826                           int unpacked_any, const FieldDescriptor* field,
827                           std::vector<SpecificField>* parent_fields);
828 
829   // Helper for CompareMapField: compare the map fields using map reflection
830   // instead of sync to repeated.
831   bool CompareMapFieldByMapReflection(const Message& message1,
832                                       const Message& message2, int unpacked_any,
833                                       const FieldDescriptor* field,
834                                       std::vector<SpecificField>* parent_fields,
835                                       DefaultFieldComparator* comparator);
836 
837   // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields.
838   bool CompareFieldValue(const Message& message1, const Message& message2,
839                          int unpacked_any, const FieldDescriptor* field,
840                          int index1, int index2);
841 
842   // Compares the specified field on the two messages, returning
843   // true if they are the same, false otherwise. For repeated fields,
844   // this method only compares the value in the specified index. This method
845   // uses Compare functions to recurse into submessages.
846   // The parent_fields vector is used in calls to a Reporter instance calls.
847   // It can be NULL, in which case the MessageDifferencer will create new
848   // list of parent messages if it needs to recursively compare the given field.
849   // To avoid confusing users you should not set it to NULL unless you modified
850   // Reporter to handle the change of parent_fields correctly.
851   bool CompareFieldValueUsingParentFields(
852       const Message& message1, const Message& message2, int unpacked_any,
853       const FieldDescriptor* field, int index1, int index2,
854       std::vector<SpecificField>* parent_fields);
855 
856   // Compares the specified field on the two messages, returning comparison
857   // result, as returned by appropriate FieldComparator.
858   FieldComparator::ComparisonResult GetFieldComparisonResult(
859       const Message& message1, const Message& message2,
860       const FieldDescriptor* field, int index1, int index2,
861       const FieldContext* field_context);
862 
863   // Check if the two elements in the repeated field are match to each other.
864   // if the key_comprator is NULL, this function returns true when the two
865   // elements are equal.
866   bool IsMatch(const FieldDescriptor* repeated_field,
867                const MapKeyComparator* key_comparator, const Message* message1,
868                const Message* message2, int unpacked_any,
869                const std::vector<SpecificField>& parent_fields,
870                Reporter* reporter, int index1, int index2);
871 
872   // Returns true when this repeated field has been configured to be treated
873   // as a Set / SmartSet / SmartList.
874   bool IsTreatedAsSet(const FieldDescriptor* field);
875   bool IsTreatedAsSmartSet(const FieldDescriptor* field);
876 
877   bool IsTreatedAsSmartList(const FieldDescriptor* field);
878   // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList
879   // by default to find the longest matching sequence from the first matching
880   // element. The callback takes two vectors showing the matching indices from
881   // the other vector, where -1 means an unmatch.
882   void SetMatchIndicesForSmartListCallback(
883       std::function<void(std::vector<int>*, std::vector<int>*)> callback);
884 
885   // Returns true when this repeated field is to be compared as a subset, ie.
886   // has been configured to be treated as a set or map and scope is set to
887   // PARTIAL.
888   bool IsTreatedAsSubset(const FieldDescriptor* field);
889 
890   // Returns true if this field is to be ignored when this
891   // MessageDifferencer compares messages.
892   bool IsIgnored(const Message& message1, const Message& message2,
893                  const FieldDescriptor* field,
894                  const std::vector<SpecificField>& parent_fields);
895 
896   // Returns true if this unknown field is to be ignored when this
897   // MessageDifferencer compares messages.
898   bool IsUnknownFieldIgnored(const Message& message1, const Message& message2,
899                              const SpecificField& field,
900                              const std::vector<SpecificField>& parent_fields);
901 
902   // Returns MapKeyComparator* when this field has been configured to be treated
903   // as a map or its is_map() return true.  If not, returns NULL.
904   const MapKeyComparator* GetMapKeyComparator(
905       const FieldDescriptor* field) const;
906 
907   // Attempts to match indices of a repeated field, so that the contained values
908   // match. Clears output vectors and sets their values to indices of paired
909   // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1
910   // and match_list2[1] == 0. The unmatched indices are indicated by -1.
911   // Assumes the repeated field is not treated as a simple list.
912   // This method returns false if the match failed. However, it doesn't mean
913   // that the comparison succeeds when this method returns true (you need to
914   // double-check in this case).
915   bool MatchRepeatedFieldIndices(
916       const Message& message1, const Message& message2, int unpacked_any,
917       const FieldDescriptor* repeated_field,
918       const MapKeyComparator* key_comparator,
919       const std::vector<SpecificField>& parent_fields,
920       std::vector<int>* match_list1, std::vector<int>* match_list2);
921 
922   // Checks if index is equal to new_index in all the specific fields.
923   static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields);
924 
925   // ABSL_CHECKs that the given repeated field can be compared according to
926   // new_comparison.
927   void CheckRepeatedFieldComparisons(
928       const FieldDescriptor* field,
929       const RepeatedFieldComparison& new_comparison);
930 
931   // Whether we should still compare the field despite its absence in message1.
932   bool ShouldCompareNoPresence(const Message& message1,
933                                const Reflection& reflection1,
934                                const FieldDescriptor* field2) const;
935 
936   Reporter* reporter_;
937   DefaultFieldComparator default_field_comparator_;
938   MessageFieldComparison message_field_comparison_;
939   Scope scope_;
940   absl::flat_hash_set<const FieldDescriptor*> force_compare_no_presence_fields_;
941   google::protobuf::TextFormat::Parser::UnsetFieldsMetadata require_no_presence_fields_;
942   absl::flat_hash_set<std::string> force_compare_failure_triggering_fields_;
943   RepeatedFieldComparison repeated_field_comparison_;
944 
945   absl::flat_hash_map<const FieldDescriptor*, RepeatedFieldComparison>
946       repeated_field_comparisons_;
947   // Keeps track of MapKeyComparators that are created within
948   // MessageDifferencer. These MapKeyComparators should be deleted
949   // before MessageDifferencer is destroyed.
950   // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't
951   // store the supplied FieldDescriptors directly. Instead, a new
952   // MapKeyComparator is created for comparison purpose.
953   std::vector<MapKeyComparator*> owned_key_comparators_;
954   absl::flat_hash_map<const FieldDescriptor*, const MapKeyComparator*>
955       map_field_key_comparator_;
956   MapEntryKeyComparator map_entry_key_comparator_;
957   std::vector<std::unique_ptr<IgnoreCriteria>> ignore_criteria_;
958   // Reused multiple times in RetrieveFields to avoid extra allocations
959   std::vector<const FieldDescriptor*> tmp_message_fields_;
960 
961   absl::flat_hash_set<const FieldDescriptor*> ignored_fields_;
962 
963   union {
964     DefaultFieldComparator* default_impl;
965     FieldComparator* base;
966   } field_comparator_ = {&default_field_comparator_};
967   enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault;
968 
969   bool report_matches_;
970   bool report_moves_;
971   bool report_ignores_;
972   bool force_compare_no_presence_ = false;
973 
974   std::string* output_string_;
975 
976   // Callback to post-process the matched indices to support SMART_LIST.
977   std::function<void(std::vector<int>*, std::vector<int>*)>
978       match_indices_for_smart_list_callback_;
979 
980   MessageDifferencer::UnpackAnyField unpack_any_field_;
981 };
982 
983 // This class provides extra information to the FieldComparator::Compare
984 // function.
985 class PROTOBUF_EXPORT FieldContext {
986  public:
FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)987   explicit FieldContext(
988       std::vector<MessageDifferencer::SpecificField>* parent_fields)
989       : parent_fields_(parent_fields) {}
990 
parent_fields()991   std::vector<MessageDifferencer::SpecificField>* parent_fields() const {
992     return parent_fields_;
993   }
994 
995  private:
996   std::vector<MessageDifferencer::SpecificField>* parent_fields_;
997 };
998 
999 }  // namespace util
1000 }  // namespace protobuf
1001 }  // namespace google
1002 
1003 #include "google/protobuf/port_undef.inc"
1004 
1005 #endif  // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
1006