• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 //
35 // This file defines static methods and classes for comparing Protocol
36 // Messages.
37 //
38 // Aug. 2008: Added Unknown Fields Comparison for messages.
39 // Aug. 2009: Added different options to compare repeated fields.
40 // Apr. 2010: Moved field comparison to FieldComparator.
41 
42 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
43 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
44 
45 #include <functional>
46 #include <map>
47 #include <set>
48 #include <string>
49 #include <vector>
50 
51 #include <google/protobuf/descriptor.h>  // FieldDescriptor
52 #include <google/protobuf/message.h>     // Message
53 #include <google/protobuf/unknown_field_set.h>
54 #include <google/protobuf/util/field_comparator.h>
55 
56 // Always include as last one, otherwise it can break compilation
57 #include <google/protobuf/port_def.inc>
58 
59 namespace google {
60 namespace protobuf {
61 
62 class DynamicMessageFactory;
63 class FieldDescriptor;
64 
65 namespace io {
66 class ZeroCopyOutputStream;
67 class Printer;
68 }  // namespace io
69 
70 namespace util {
71 
72 class DefaultFieldComparator;
73 class FieldContext;  // declared below MessageDifferencer
74 
75 // Defines a collection of field descriptors.
76 // In case of internal google codebase we are using absl::FixedArray instead
77 // of vector. It significantly speeds up proto comparison (by ~30%) by
78 // reducing the number of malloc/free operations
79 typedef std::vector<const FieldDescriptor*> FieldDescriptorArray;
80 
81 // A basic differencer that can be used to determine
82 // the differences between two specified Protocol Messages. If any differences
83 // are found, the Compare method will return false, and any differencer reporter
84 // specified via ReportDifferencesTo will have its reporting methods called (see
85 // below for implementation of the report). Based off of the original
86 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h
87 // (Thanks Todd!).
88 //
89 // MessageDifferencer REQUIRES that compared messages be the same type, defined
90 // as messages that share the same descriptor.  If not, the behavior of this
91 // class is undefined.
92 //
93 // People disagree on what MessageDifferencer should do when asked to compare
94 // messages with different descriptors.  Some people think it should always
95 // return false.  Others expect it to try to look for similar fields and
96 // compare them anyway -- especially if the descriptors happen to be identical.
97 // If we chose either of these behaviors, some set of people would find it
98 // surprising, and could end up writing code expecting the other behavior
99 // without realizing their error.  Therefore, we forbid that usage.
100 //
101 // This class is implemented based on the proto2 reflection. The performance
102 // should be good enough for normal usages. However, for places where the
103 // performance is extremely sensitive, there are several alternatives:
104 // - Comparing serialized string
105 // Downside: false negatives (there are messages that are the same but their
106 // serialized strings are different).
107 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin)
108 // Downside: more generated code; maintenance overhead for the additional rule
109 // (must be in sync with the original proto_library).
110 //
111 // Note on handling of google.protobuf.Any: MessageDifferencer automatically
112 // unpacks Any::value into a Message and compares its individual fields.
113 // Messages encoded in a repeated Any cannot be compared using TreatAsMap.
114 //
115 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to
116 // guard it with a lock to use the same MessageDifferencer instance from
117 // multiple threads. Note that it's fine to call static comparison methods
118 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for
119 // performance critical code as it leads to extra allocations.
120 class PROTOBUF_EXPORT MessageDifferencer {
121  public:
122   // Determines whether the supplied messages are equal. Equality is defined as
123   // all fields within the two messages being set to the same value. Primitive
124   // fields and strings are compared by value while embedded messages/groups
125   // are compared as if via a recursive call. Use Compare() with IgnoreField()
126   // if some fields should be ignored in the comparison. Use Compare() with
127   // TreatAsSet() if there are repeated fields where ordering does not matter.
128   //
129   // This method REQUIRES that the two messages have the same
130   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
131   static bool Equals(const Message& message1, const Message& message2);
132 
133   // Determines whether the supplied messages are equivalent. Equivalency is
134   // defined as all fields within the two messages having the same value. This
135   // differs from the Equals method above in that fields with default values
136   // are considered set to said value automatically. For details on how default
137   // values are defined for each field type, see:
138   // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional.
139   // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare()
140   // if some fields should be ignored in the comparison.
141   //
142   // This method REQUIRES that the two messages have the same
143   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
144   static bool Equivalent(const Message& message1, const Message& message2);
145 
146   // Determines whether the supplied messages are approximately equal.
147   // Approximate equality is defined as all fields within the two messages
148   // being approximately equal.  Primitive (non-float) fields and strings are
149   // compared by value, floats are compared using MathUtil::AlmostEquals() and
150   // embedded messages/groups are compared as if via a recursive call. Use
151   // IgnoreField() and Compare() if some fields should be ignored in the
152   // comparison.
153   //
154   // This method REQUIRES that the two messages have the same
155   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
156   static bool ApproximatelyEquals(const Message& message1,
157                                   const Message& message2);
158 
159   // Determines whether the supplied messages are approximately equivalent.
160   // Approximate equivalency is defined as all fields within the two messages
161   // being approximately equivalent. As in
162   // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and
163   // strings are compared by value, floats are compared using
164   // MathUtil::AlmostEquals() and embedded messages/groups are compared as if
165   // via a recursive call. However, fields with default values are considered
166   // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField()
167   // and Compare() if some fields should be ignored in the comparison.
168   //
169   // This method REQUIRES that the two messages have the same
170   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
171   static bool ApproximatelyEquivalent(const Message& message1,
172                                       const Message& message2);
173 
174   // Identifies an individual field in a message instance.  Used for field_path,
175   // below.
176   struct SpecificField {
177     // For known fields, "field" is filled in and "unknown_field_number" is -1.
178     // For unknown fields, "field" is NULL, "unknown_field_number" is the field
179     // number, and "unknown_field_type" is its type.
180     const FieldDescriptor* field = nullptr;
181     int unknown_field_number = -1;
182     UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT;
183 
184     // If this a repeated field, "index" is the index within it.  For unknown
185     // fields, this is the index of the field among all unknown fields of the
186     // same field number and type.
187     int index = -1;
188 
189     // If "field" is a repeated field which is being treated as a map or
190     // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates
191     // the index the position to which the element has moved.  If the element
192     // has not moved, "new_index" will have the same value as "index".
193     int new_index = -1;
194 
195     // For unknown fields, these are the pointers to the UnknownFieldSet
196     // containing the unknown fields. In certain cases (e.g. proto1's
197     // MessageSet, or nested groups of unknown fields), these may differ from
198     // the messages' internal UnknownFieldSets.
199     const UnknownFieldSet* unknown_field_set1 = nullptr;
200     const UnknownFieldSet* unknown_field_set2 = nullptr;
201 
202     // For unknown fields, these are the index of the field within the
203     // UnknownFieldSets. One or the other will be -1 when
204     // reporting an addition or deletion.
205     int unknown_field_index1 = -1;
206     int unknown_field_index2 = -1;
207   };
208 
209   // Abstract base class from which all MessageDifferencer
210   // reporters derive. The five Report* methods below will be called when
211   // a field has been added, deleted, modified, moved, or matched. The third
212   // argument is a vector of FieldDescriptor pointers which describes the chain
213   // of fields that was taken to find the current field. For example, for a
214   // field found in an embedded message, the vector will contain two
215   // FieldDescriptors. The first will be the field of the embedded message
216   // itself and the second will be the actual field in the embedded message
217   // that was added/deleted/modified.
218   // Fields will be reported in PostTraversalOrder.
219   // For example, given following proto, if both baz and quux are changed.
220   // foo {
221   //   bar {
222   //     baz: 1
223   //     quux: 2
224   //   }
225   // }
226   // ReportModified will be invoked with following order:
227   // 1. foo.bar.baz or foo.bar.quux
228   // 2. foo.bar.quux or foo.bar.baz
229   // 2. foo.bar
230   // 3. foo
231   class PROTOBUF_EXPORT Reporter {
232    public:
233     Reporter();
234     virtual ~Reporter();
235 
236     // Reports that a field has been added into Message2.
237     virtual void ReportAdded(const Message& message1, const Message& message2,
238                              const std::vector<SpecificField>& field_path) = 0;
239 
240     // Reports that a field has been deleted from Message1.
241     virtual void ReportDeleted(
242         const Message& message1, const Message& message2,
243         const std::vector<SpecificField>& field_path) = 0;
244 
245     // Reports that the value of a field has been modified.
246     virtual void ReportModified(
247         const Message& message1, const Message& message2,
248         const std::vector<SpecificField>& field_path) = 0;
249 
250     // Reports that a repeated field has been moved to another location.  This
251     // only applies when using TreatAsSet or TreatAsMap()  -- see below. Also
252     // note that for any given field, ReportModified and ReportMoved are
253     // mutually exclusive. If a field has been both moved and modified, then
254     // only ReportModified will be called.
ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)255     virtual void ReportMoved(
256         const Message& /* message1 */, const Message& /* message2 */,
257         const std::vector<SpecificField>& /* field_path */) {}
258 
259     // Reports that two fields match. Useful for doing side-by-side diffs.
260     // This function is mutually exclusive with ReportModified and ReportMoved.
261     // Note that you must call set_report_matches(true) before calling Compare
262     // to make use of this function.
ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)263     virtual void ReportMatched(
264         const Message& /* message1 */, const Message& /* message2 */,
265         const std::vector<SpecificField>& /* field_path */) {}
266 
267     // Reports that two fields would have been compared, but the
268     // comparison has been skipped because the field was marked as
269     // 'ignored' using IgnoreField().  This function is mutually
270     // exclusive with all the other Report() functions.
271     //
272     // The contract of ReportIgnored is slightly different than the
273     // other Report() functions, in that |field_path.back().index| is
274     // always equal to -1, even if the last field is repeated. This is
275     // because while the other Report() functions indicate where in a
276     // repeated field the action (Addition, Deletion, etc...)
277     // happened, when a repeated field is 'ignored', the differencer
278     // simply calls ReportIgnored on the repeated field as a whole and
279     // moves on without looking at its individual elements.
280     //
281     // Furthermore, ReportIgnored() does not indicate whether the
282     // fields were in fact equal or not, as Compare() does not inspect
283     // these fields at all. It is up to the Reporter to decide whether
284     // the fields are equal or not (perhaps with a second call to
285     // Compare()), if it cares.
ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)286     virtual void ReportIgnored(
287         const Message& /* message1 */, const Message& /* message2 */,
288         const std::vector<SpecificField>& /* field_path */) {}
289 
290     // Report that an unknown field is ignored. (see comment above).
291     // Note this is a different function since the last SpecificField in field
292     // path has a null field.  This could break existing Reporter.
ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)293     virtual void ReportUnknownFieldIgnored(
294         const Message& /* message1 */, const Message& /* message2 */,
295         const std::vector<SpecificField>& /* field_path */) {}
296 
297    private:
298     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter);
299   };
300 
301   // MapKeyComparator is used to determine if two elements have the same key
302   // when comparing elements of a repeated field as a map.
303   class PROTOBUF_EXPORT MapKeyComparator {
304    public:
305     MapKeyComparator();
306     virtual ~MapKeyComparator();
307 
IsMatch(const Message &,const Message &,const std::vector<SpecificField> &)308     virtual bool IsMatch(
309         const Message& /* message1 */, const Message& /* message2 */,
310         const std::vector<SpecificField>& /* parent_fields */) const {
311       GOOGLE_CHECK(false) << "IsMatch() is not implemented.";
312       return false;
313     }
314 
315    private:
316     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator);
317   };
318 
319   // Abstract base class from which all IgnoreCriteria derive.
320   // By adding IgnoreCriteria more complex ignore logic can be implemented.
321   // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared
322   // field IsIgnored is called on each added IgnoreCriteria until one returns
323   // true or all return false.
324   // IsIgnored is called for fields where at least one side has a value.
325   class PROTOBUF_EXPORT IgnoreCriteria {
326    public:
327     IgnoreCriteria();
328     virtual ~IgnoreCriteria();
329 
330     // Returns true if the field should be ignored.
331     virtual bool IsIgnored(
332         const Message& /* message1 */, const Message& /* message2 */,
333         const FieldDescriptor* /* field */,
334         const std::vector<SpecificField>& /* parent_fields */) = 0;
335 
336     // Returns true if the unknown field should be ignored.
337     // Note: This will be called for unknown fields as well in which case
338     //       field.field will be null.
IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)339     virtual bool IsUnknownFieldIgnored(
340         const Message& /* message1 */, const Message& /* message2 */,
341         const SpecificField& /* field */,
342         const std::vector<SpecificField>& /* parent_fields */) {
343       return false;
344     }
345   };
346 
347   // To add a Reporter, construct default here, then use ReportDifferencesTo or
348   // ReportDifferencesToString.
349   explicit MessageDifferencer();
350 
351   ~MessageDifferencer();
352 
353   enum MessageFieldComparison {
354     EQUAL,       // Fields must be present in both messages
355                  // for the messages to be considered the same.
356     EQUIVALENT,  // Fields with default values are considered set
357                  // for comparison purposes even if not explicitly
358                  // set in the messages themselves.  Unknown fields
359                  // are ignored.
360   };
361 
362   enum Scope {
363     FULL,    // All fields of both messages are considered in the comparison.
364     PARTIAL  // Only fields present in the first message are considered; fields
365              // set only in the second message will be skipped during
366              // comparison.
367   };
368 
369   // DEPRECATED. Use FieldComparator::FloatComparison instead.
370   enum FloatComparison {
371     EXACT,       // Floats and doubles are compared exactly.
372     APPROXIMATE  // Floats and doubles are compared using the
373                  // MathUtil::AlmostEquals method.
374   };
375 
376   enum RepeatedFieldComparison {
377     AS_LIST,  // Repeated fields are compared in order.  Differing values at
378               // the same index are reported using ReportModified().  If the
379               // repeated fields have different numbers of elements, the
380               // unpaired elements are reported using ReportAdded() or
381               // ReportDeleted().
382     AS_SET,   // Treat all the repeated fields as sets.
383               // See TreatAsSet(), as below.
384     AS_SMART_LIST,  // Similar to AS_SET, but preserve the order and find the
385                     // longest matching sequence from the first matching
386                     // element. To use an optimal solution, call
387                     // SetMatchIndicesForSmartListCallback() to pass it in.
388     AS_SMART_SET,   // Similar to AS_SET, but match elements with fewest diffs.
389   };
390 
391   // The elements of the given repeated field will be treated as a set for
392   // diffing purposes, so different orderings of the same elements will be
393   // considered equal.  Elements which are present on both sides of the
394   // comparison but which have changed position will be reported with
395   // ReportMoved().  Elements which only exist on one side or the other are
396   // reported with ReportAdded() and ReportDeleted() regardless of their
397   // positions.  ReportModified() is never used for this repeated field.  If
398   // the only differences between the compared messages is that some fields
399   // have been moved, then the comparison returns true.
400   //
401   // Note that despite the name of this method, this is really
402   // comparison as multisets: if one side of the comparison has a duplicate
403   // in the repeated field but the other side doesn't, this will count as
404   // a mismatch.
405   //
406   // If the scope of comparison is set to PARTIAL, then in addition to what's
407   // above, extra values added to repeated fields of the second message will
408   // not cause the comparison to fail.
409   //
410   // Note that set comparison is currently O(k * n^2) (where n is the total
411   // number of elements, and k is the average size of each element). In theory
412   // it could be made O(n * k) with a more complex hashing implementation. Feel
413   // free to contribute one if the current implementation is too slow for you.
414   // If partial matching is also enabled, the time complexity will be O(k * n^2
415   // + n^3) in which n^3 is the time complexity of the maximum matching
416   // algorithm.
417   //
418   // REQUIRES:  field->is_repeated() and field not registered with TreatAsList
419   void TreatAsSet(const FieldDescriptor* field);
420   void TreatAsSmartSet(const FieldDescriptor* field);
421 
422   // The elements of the given repeated field will be treated as a list for
423   // diffing purposes, so different orderings of the same elements will NOT be
424   // considered equal.
425   //
426   // REQUIRED: field->is_repeated() and field not registered with TreatAsSet
427   void TreatAsList(const FieldDescriptor* field);
428   // Note that the complexity is similar to treating as SET.
429   void TreatAsSmartList(const FieldDescriptor* field);
430 
431   // The elements of the given repeated field will be treated as a map for
432   // diffing purposes, with |key| being the map key.  Thus, elements with the
433   // same key will be compared even if they do not appear at the same index.
434   // Differences are reported similarly to TreatAsSet(), except that
435   // ReportModified() is used to report elements with the same key but
436   // different values.  Note that if an element is both moved and modified,
437   // only ReportModified() will be called.  As with TreatAsSet, if the only
438   // differences between the compared messages is that some fields have been
439   // moved, then the comparison returns true. See TreatAsSet for notes on
440   // performance.
441   //
442   // REQUIRES:  field->is_repeated()
443   // REQUIRES:  field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
444   // REQUIRES:  key->containing_type() == field->message_type()
445   void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key);
446   // Same as TreatAsMap except that this method will use multiple fields as
447   // the key in comparison. All specified fields in 'key_fields' should be
448   // present in the compared elements. Two elements will be treated as having
449   // the same key iff they have the same value for every specified field. There
450   // are two steps in the comparison process. The first one is key matching.
451   // Every element from one message will be compared to every element from
452   // the other message. Only fields in 'key_fields' are compared in this step
453   // to decide if two elements have the same key. The second step is value
454   // comparison. Those pairs of elements with the same key (with equal value
455   // for every field in 'key_fields') will be compared in this step.
456   // Time complexity of the first step is O(s * m * n ^ 2) where s is the
457   // average size of the fields specified in 'key_fields', m is the number of
458   // fields in 'key_fields' and n is the number of elements. If partial
459   // matching is enabled, an extra O(n^3) will be incured by the maximum
460   // matching algorithm. The second step is O(k * n) where k is the average
461   // size of each element.
462   void TreatAsMapWithMultipleFieldsAsKey(
463       const FieldDescriptor* field,
464       const std::vector<const FieldDescriptor*>& key_fields);
465   // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field
466   // do not necessarily need to be a direct subfield. Each element in
467   // key_field_paths indicate a path from the message being compared, listing
468   // successive subfield to reach the key field.
469   //
470   // REQUIRES:
471   //   for key_field_path in key_field_paths:
472   //     key_field_path[0]->containing_type() == field->message_type()
473   //     for i in [0, key_field_path.size() - 1):
474   //       key_field_path[i+1]->containing_type() ==
475   //           key_field_path[i]->message_type()
476   //       key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE
477   //       !key_field_path[i]->is_repeated()
478   void TreatAsMapWithMultipleFieldPathsAsKey(
479       const FieldDescriptor* field,
480       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
481 
482   // Uses a custom MapKeyComparator to determine if two elements have the same
483   // key when comparing a repeated field as a map.
484   // The caller is responsible to delete the key_comparator.
485   // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the
486   // first key matching step. Rather than comparing some specified fields, it
487   // will invoke the IsMatch method of the given 'key_comparator' to decide if
488   // two elements have the same key.
489   void TreatAsMapUsingKeyComparator(const FieldDescriptor* field,
490                                     const MapKeyComparator* key_comparator);
491 
492   // Initiates and returns a new instance of MultipleFieldsMapKeyComparator.
493   MapKeyComparator* CreateMultipleFieldsMapKeyComparator(
494       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths);
495 
496   // Add a custom ignore criteria that is evaluated in addition to the
497   // ignored fields added with IgnoreField.
498   // Takes ownership of ignore_criteria.
499   void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria);
500 
501   // Indicates that any field with the given descriptor should be
502   // ignored for the purposes of comparing two messages. This applies
503   // to fields nested in the message structure as well as top level
504   // ones. When the MessageDifferencer encounters an ignored field,
505   // ReportIgnored is called on the reporter, if one is specified.
506   //
507   // The only place where the field's 'ignored' status is not applied is when
508   // it is being used as a key in a field passed to TreatAsMap or is one of
509   // the fields passed to TreatAsMapWithMultipleFieldsAsKey.
510   // In this case it is compared in key matching but after that it's ignored
511   // in value comparison.
512   void IgnoreField(const FieldDescriptor* field);
513 
514   // Sets the field comparator used to determine differences between protocol
515   // buffer fields. By default it's set to a DefaultFieldComparator instance.
516   // MessageDifferencer doesn't take ownership over the passed object.
517   // Note that this method must be called before Compare for the comparator to
518   // be used.
519   void set_field_comparator(FieldComparator* comparator);
520 
521   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
522   // Sets the fraction and margin for the float comparison of a given field.
523   // Uses MathUtil::WithinFractionOrMargin to compare the values.
524   // NOTE: this method does nothing if differencer's field comparator has been
525   //       set to a custom object.
526   //
527   // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or
528   //           field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT
529   // REQUIRES: float_comparison_ == APPROXIMATE
530   void SetFractionAndMargin(const FieldDescriptor* field, double fraction,
531                             double margin);
532 
533   // Sets the type of comparison (as defined in the MessageFieldComparison
534   // enumeration above) that is used by this differencer when determining how
535   // to compare fields in messages.
536   void set_message_field_comparison(MessageFieldComparison comparison);
537 
538   // Tells the differencer whether or not to report matches. This method must
539   // be called before Compare. The default for a new differencer is false.
set_report_matches(bool report_matches)540   void set_report_matches(bool report_matches) {
541     report_matches_ = report_matches;
542   }
543 
544   // Tells the differencer whether or not to report moves (in a set or map
545   // repeated field). This method must be called before Compare. The default for
546   // a new differencer is true.
set_report_moves(bool report_moves)547   void set_report_moves(bool report_moves) { report_moves_ = report_moves; }
548 
549   // Tells the differencer whether or not to report ignored values. This method
550   // must be called before Compare. The default for a new differencer is true.
set_report_ignores(bool report_ignores)551   void set_report_ignores(bool report_ignores) {
552     report_ignores_ = report_ignores;
553   }
554 
555   // Sets the scope of the comparison (as defined in the Scope enumeration
556   // above) that is used by this differencer when determining which fields to
557   // compare between the messages.
558   void set_scope(Scope scope);
559 
560   // Returns the current scope used by this differencer.
561   Scope scope();
562 
563   // DEPRECATED. Pass a DefaultFieldComparator instance instead.
564   // Sets the type of comparison (as defined in the FloatComparison enumeration
565   // above) that is used by this differencer when comparing float (and double)
566   // fields in messages.
567   // NOTE: this method does nothing if differencer's field comparator has been
568   //       set to a custom object.
569   void set_float_comparison(FloatComparison comparison);
570 
571   // Sets the type of comparison for repeated field (as defined in the
572   // RepeatedFieldComparison enumeration above) that is used by this
573   // differencer when compare repeated fields in messages.
574   void set_repeated_field_comparison(RepeatedFieldComparison comparison);
575 
576   // Returns the current repeated field comparison used by this differencer.
577   RepeatedFieldComparison repeated_field_comparison();
578 
579   // Compares the two specified messages, returning true if they are the same,
580   // false otherwise. If this method returns false, any changes between the
581   // two messages will be reported if a Reporter was specified via
582   // ReportDifferencesTo (see also ReportDifferencesToString).
583   //
584   // This method REQUIRES that the two messages have the same
585   // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()).
586   bool Compare(const Message& message1, const Message& message2);
587 
588   // Same as above, except comparing only the list of fields specified by the
589   // two vectors of FieldDescriptors.
590   bool CompareWithFields(
591       const Message& message1, const Message& message2,
592       const std::vector<const FieldDescriptor*>& message1_fields,
593       const std::vector<const FieldDescriptor*>& message2_fields);
594 
595   // Automatically creates a reporter that will output the differences
596   // found (if any) to the specified output string pointer. Note that this
597   // method must be called before Compare.
598   void ReportDifferencesToString(std::string* output);
599 
600   // Tells the MessageDifferencer to report differences via the specified
601   // reporter. Note that this method must be called before Compare for
602   // the reporter to be used. It is the responsibility of the caller to delete
603   // this object.
604   // If the provided pointer equals NULL, the MessageDifferencer stops reporting
605   // differences to any previously set reporters or output strings.
606   void ReportDifferencesTo(Reporter* reporter);
607 
608   // An implementation of the MessageDifferencer Reporter that outputs
609   // any differences found in human-readable form to the supplied
610   // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter
611   // *must* be '$'.
612   //
613   // WARNING: this reporter does not necessarily flush its output until it is
614   // destroyed. As a result, it is not safe to assume the output is valid or
615   // complete until after you destroy the reporter. For example, if you use a
616   // StreamReporter to write to a StringOutputStream, the target string may
617   // contain uninitialized data until the reporter is destroyed.
618   class PROTOBUF_EXPORT StreamReporter : public Reporter {
619    public:
620     explicit StreamReporter(io::ZeroCopyOutputStream* output);
621     explicit StreamReporter(io::Printer* printer);  // delimiter '$'
622     ~StreamReporter() override;
623 
624     // When set to true, the stream reporter will also output aggregates nodes
625     // (i.e. messages and groups) whose subfields have been modified. When
626     // false, will only report the individual subfields. Defaults to false.
set_report_modified_aggregates(bool report)627     void set_report_modified_aggregates(bool report) {
628       report_modified_aggregates_ = report;
629     }
630 
631     // The following are implementations of the methods described above.
632 
633     void ReportAdded(const Message& message1, const Message& message2,
634                      const std::vector<SpecificField>& field_path) override;
635 
636     void ReportDeleted(const Message& message1, const Message& message2,
637                        const std::vector<SpecificField>& field_path) override;
638 
639     void ReportModified(const Message& message1, const Message& message2,
640                         const std::vector<SpecificField>& field_path) override;
641 
642     void ReportMoved(const Message& message1, const Message& message2,
643                      const std::vector<SpecificField>& field_path) override;
644 
645     void ReportMatched(const Message& message1, const Message& message2,
646                        const std::vector<SpecificField>& field_path) override;
647 
648     void ReportIgnored(const Message& message1, const Message& message2,
649                        const std::vector<SpecificField>& field_path) override;
650 
651     void ReportUnknownFieldIgnored(
652         const Message& message1, const Message& message2,
653         const std::vector<SpecificField>& field_path) override;
654 
655    protected:
656     // Prints the specified path of fields to the buffer.  message is used to
657     // print map keys.
658     virtual void PrintPath(const std::vector<SpecificField>& field_path,
659                            bool left_side, const Message& message);
660 
661     // Prints the specified path of fields to the buffer.
662     virtual void PrintPath(const std::vector<SpecificField>& field_path,
663                            bool left_side);
664 
665     // Prints the value of fields to the buffer.  left_side is true if the
666     // given message is from the left side of the comparison, false if it
667     // was the right.  This is relevant only to decide whether to follow
668     // unknown_field_index1 or unknown_field_index2 when an unknown field
669     // is encountered in field_path.
670     virtual void PrintValue(const Message& message,
671                             const std::vector<SpecificField>& field_path,
672                             bool left_side);
673 
674     // Prints the specified path of unknown fields to the buffer.
675     virtual void PrintUnknownFieldValue(const UnknownField* unknown_field);
676 
677     // Just print a string
678     void Print(const std::string& str);
679 
680    private:
681     io::Printer* printer_;
682     bool delete_printer_;
683     bool report_modified_aggregates_;
684 
685     GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter);
686   };
687 
688  private:
689   friend class DefaultFieldComparator;
690 
691   // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator.
692   // Implementation of this class needs to do field value comparison which
693   // relies on some private methods of MessageDifferencer. That's why this
694   // class is declared as a nested class of MessageDifferencer.
695   class MultipleFieldsMapKeyComparator;
696 
697   // A MapKeyComparator for use with map_entries.
698   class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator {
699    public:
700     explicit MapEntryKeyComparator(MessageDifferencer* message_differencer);
701     bool IsMatch(
702         const Message& message1, const Message& message2,
703         const std::vector<SpecificField>& parent_fields) const override;
704 
705    private:
706     MessageDifferencer* message_differencer_;
707   };
708 
709   // Returns true if field1's number() is less than field2's.
710   static bool FieldBefore(const FieldDescriptor* field1,
711                           const FieldDescriptor* field2);
712 
713   // Retrieve all the set fields, including extensions.
714   FieldDescriptorArray RetrieveFields(const Message& message,
715                                       bool base_message);
716 
717   // Combine the two lists of fields into the combined_fields output vector.
718   // All fields present in both lists will always be included in the combined
719   // list.  Fields only present in one of the lists will only appear in the
720   // combined list if the corresponding fields_scope option is set to FULL.
721   FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1,
722                                      Scope fields1_scope,
723                                      const FieldDescriptorArray& fields2,
724                                      Scope fields2_scope);
725 
726   // Internal version of the Compare method which performs the actual
727   // comparison. The parent_fields vector is a vector containing field
728   // descriptors of all fields accessed to get to this comparison operation
729   // (i.e. if the current message is an embedded message, the parent_fields
730   // vector will contain the field that has this embedded message).
731   bool Compare(const Message& message1, const Message& message2,
732                std::vector<SpecificField>* parent_fields);
733 
734   // Compares all the unknown fields in two messages.
735   bool CompareUnknownFields(const Message& message1, const Message& message2,
736                             const UnknownFieldSet&, const UnknownFieldSet&,
737                             std::vector<SpecificField>* parent_fields);
738 
739   // Compares the specified messages for the requested field lists. The field
740   // lists are modified depending on comparison settings, and then passed to
741   // CompareWithFieldsInternal.
742   bool CompareRequestedFieldsUsingSettings(
743       const Message& message1, const Message& message2,
744       const FieldDescriptorArray& message1_fields,
745       const FieldDescriptorArray& message2_fields,
746       std::vector<SpecificField>* parent_fields);
747 
748   // Compares the specified messages with the specified field lists.
749   bool CompareWithFieldsInternal(const Message& message1,
750                                  const Message& message2,
751                                  const FieldDescriptorArray& message1_fields,
752                                  const FieldDescriptorArray& message2_fields,
753                                  std::vector<SpecificField>* parent_fields);
754 
755   // Compares the repeated fields, and report the error.
756   bool CompareRepeatedField(const Message& message1, const Message& message2,
757                             const FieldDescriptor* field,
758                             std::vector<SpecificField>* parent_fields);
759 
760   // Compare the map fields using map reflection instead of sync to repeated.
761   bool CompareMapFieldByMapReflection(const Message& message1,
762                                       const Message& message2,
763                                       const FieldDescriptor* field,
764                                       std::vector<SpecificField>* parent_fields,
765                                       DefaultFieldComparator* comparator);
766 
767   // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields.
768   bool CompareFieldValue(const Message& message1, const Message& message2,
769                          const FieldDescriptor* field, int index1, int index2);
770 
771   // Compares the specified field on the two messages, returning
772   // true if they are the same, false otherwise. For repeated fields,
773   // this method only compares the value in the specified index. This method
774   // uses Compare functions to recurse into submessages.
775   // The parent_fields vector is used in calls to a Reporter instance calls.
776   // It can be NULL, in which case the MessageDifferencer will create new
777   // list of parent messages if it needs to recursively compare the given field.
778   // To avoid confusing users you should not set it to NULL unless you modified
779   // Reporter to handle the change of parent_fields correctly.
780   bool CompareFieldValueUsingParentFields(
781       const Message& message1, const Message& message2,
782       const FieldDescriptor* field, int index1, int index2,
783       std::vector<SpecificField>* parent_fields);
784 
785   // Compares the specified field on the two messages, returning comparison
786   // result, as returned by appropriate FieldComparator.
787   FieldComparator::ComparisonResult GetFieldComparisonResult(
788       const Message& message1, const Message& message2,
789       const FieldDescriptor* field, int index1, int index2,
790       const FieldContext* field_context);
791 
792   // Check if the two elements in the repeated field are match to each other.
793   // if the key_comprator is NULL, this function returns true when the two
794   // elements are equal.
795   bool IsMatch(const FieldDescriptor* repeated_field,
796                const MapKeyComparator* key_comparator, const Message* message1,
797                const Message* message2,
798                const std::vector<SpecificField>& parent_fields,
799                Reporter* reporter, int index1, int index2);
800 
801   // Returns true when this repeated field has been configured to be treated
802   // as a Set / SmartSet / SmartList.
803   bool IsTreatedAsSet(const FieldDescriptor* field);
804   bool IsTreatedAsSmartSet(const FieldDescriptor* field);
805 
806   bool IsTreatedAsSmartList(const FieldDescriptor* field);
807   // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList
808   // by default to find the longest matching sequence from the first matching
809   // element. The callback takes two vectors showing the matching indices from
810   // the other vector, where -1 means an unmatch.
811   void SetMatchIndicesForSmartListCallback(
812       std::function<void(std::vector<int>*, std::vector<int>*)> callback);
813 
814   // Returns true when this repeated field is to be compared as a subset, ie.
815   // has been configured to be treated as a set or map and scope is set to
816   // PARTIAL.
817   bool IsTreatedAsSubset(const FieldDescriptor* field);
818 
819   // Returns true if this field is to be ignored when this
820   // MessageDifferencer compares messages.
821   bool IsIgnored(const Message& message1, const Message& message2,
822                  const FieldDescriptor* field,
823                  const std::vector<SpecificField>& parent_fields);
824 
825   // Returns true if this unknown field is to be ignored when this
826   // MessageDifferencer compares messages.
827   bool IsUnknownFieldIgnored(const Message& message1, const Message& message2,
828                              const SpecificField& field,
829                              const std::vector<SpecificField>& parent_fields);
830 
831   // Returns MapKeyComparator* when this field has been configured to be treated
832   // as a map or its is_map() return true.  If not, returns NULL.
833   const MapKeyComparator* GetMapKeyComparator(
834       const FieldDescriptor* field) const;
835 
836   // Attempts to match indices of a repeated field, so that the contained values
837   // match. Clears output vectors and sets their values to indices of paired
838   // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1
839   // and match_list2[1] == 0. The unmatched indices are indicated by -1.
840   // Assumes the repeated field is not treated as a simple list.
841   // This method returns false if the match failed. However, it doesn't mean
842   // that the comparison succeeds when this method returns true (you need to
843   // double-check in this case).
844   bool MatchRepeatedFieldIndices(
845       const Message& message1, const Message& message2,
846       const FieldDescriptor* repeated_field,
847       const MapKeyComparator* key_comparator,
848       const std::vector<SpecificField>& parent_fields,
849       std::vector<int>* match_list1, std::vector<int>* match_list2);
850 
851   // If "any" is of type google.protobuf.Any, extract its payload using
852   // DynamicMessageFactory and store in "data".
853   bool UnpackAny(const Message& any, std::unique_ptr<Message>* data);
854 
855   // Checks if index is equal to new_index in all the specific fields.
856   static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields);
857 
858   // CHECKs that the given repeated field can be compared according to
859   // new_comparison.
860   void CheckRepeatedFieldComparisons(
861       const FieldDescriptor* field,
862       const RepeatedFieldComparison& new_comparison);
863 
864   // Defines a map between field descriptors and their MapKeyComparators.
865   // Used for repeated fields when they are configured as TreatAsMap.
866   typedef std::map<const FieldDescriptor*, const MapKeyComparator*>
867       FieldKeyComparatorMap;
868 
869   // Defines a set to store field descriptors.  Used for repeated fields when
870   // they are configured as TreatAsSet.
871   typedef std::set<const FieldDescriptor*> FieldSet;
872   typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap;
873 
874   Reporter* reporter_;
875   DefaultFieldComparator default_field_comparator_;
876   FieldComparator* field_comparator_;
877   MessageFieldComparison message_field_comparison_;
878   Scope scope_;
879   RepeatedFieldComparison repeated_field_comparison_;
880 
881   FieldMap repeated_field_comparisons_;
882   // Keeps track of MapKeyComparators that are created within
883   // MessageDifferencer. These MapKeyComparators should be deleted
884   // before MessageDifferencer is destroyed.
885   // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't
886   // store the supplied FieldDescriptors directly. Instead, a new
887   // MapKeyComparator is created for comparison purpose.
888   std::vector<MapKeyComparator*> owned_key_comparators_;
889   FieldKeyComparatorMap map_field_key_comparator_;
890   MapEntryKeyComparator map_entry_key_comparator_;
891   std::vector<IgnoreCriteria*> ignore_criteria_;
892   // Reused multiple times in RetrieveFields to avoid extra allocations
893   std::vector<const FieldDescriptor*> tmp_message_fields_;
894 
895   FieldSet ignored_fields_;
896 
897   bool report_matches_;
898   bool report_moves_;
899   bool report_ignores_;
900 
901   std::string* output_string_;
902 
903   // Callback to post-process the matched indices to support SMART_LIST.
904   std::function<void(std::vector<int>*, std::vector<int>*)>
905       match_indices_for_smart_list_callback_;
906 
907   std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_;
908   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer);
909 };
910 
911 // This class provides extra information to the FieldComparator::Compare
912 // function.
913 class PROTOBUF_EXPORT FieldContext {
914  public:
FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)915   explicit FieldContext(
916       std::vector<MessageDifferencer::SpecificField>* parent_fields)
917       : parent_fields_(parent_fields) {}
918 
parent_fields()919   std::vector<MessageDifferencer::SpecificField>* parent_fields() const {
920     return parent_fields_;
921   }
922 
923  private:
924   std::vector<MessageDifferencer::SpecificField>* parent_fields_;
925 };
926 
927 }  // namespace util
928 }  // namespace protobuf
929 }  // namespace google
930 
931 #include <google/protobuf/port_undef.inc>
932 
933 #endif  // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__
934