• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/util/message_differencer.h>
36 
37 #include <algorithm>
38 #include <functional>
39 #include <memory>
40 #include <utility>
41 
42 #include <google/protobuf/stubs/logging.h>
43 #include <google/protobuf/stubs/common.h>
44 #include <google/protobuf/stubs/stringprintf.h>
45 #include <google/protobuf/io/printer.h>
46 #include <google/protobuf/io/zero_copy_stream.h>
47 #include <google/protobuf/io/zero_copy_stream_impl.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/dynamic_message.h>
50 #include <google/protobuf/map_field.h>
51 #include <google/protobuf/text_format.h>
52 #include <google/protobuf/util/field_comparator.h>
53 #include <google/protobuf/stubs/strutil.h>
54 
55 // Always include as last one, otherwise it can break compilation
56 #include <google/protobuf/port_def.inc>
57 
58 namespace google {
59 namespace protobuf {
60 
61 namespace util {
62 
63 // A reporter to report the total number of diffs.
64 // TODO(ykzhu): we can improve this to take into account the value differencers.
65 class NumDiffsReporter : public google::protobuf::util::MessageDifferencer::Reporter {
66  public:
NumDiffsReporter()67   NumDiffsReporter() : num_diffs_(0) {}
68 
69   // Returns the total number of diffs.
GetNumDiffs() const70   int32 GetNumDiffs() const { return num_diffs_; }
Reset()71   void Reset() { num_diffs_ = 0; }
72 
73   // Report that a field has been added into Message2.
ReportAdded(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)74   void ReportAdded(
75       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
76       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
77           field_path) override {
78     ++num_diffs_;
79   }
80 
81   // Report that a field has been deleted from Message1.
ReportDeleted(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)82   void ReportDeleted(
83       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
84       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
85           field_path) override {
86     ++num_diffs_;
87   }
88 
89   // Report that the value of a field has been modified.
ReportModified(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)90   void ReportModified(
91       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
92       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
93           field_path) override {
94     ++num_diffs_;
95   }
96 
97  private:
98   int32 num_diffs_;
99 };
100 
101 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
102 // be used to specify multiple fields as key for key comparison.
103 // Two elements of a repeated field will be regarded as having the same key
104 // iff they have the same value for every specified key field.
105 // Note that you can also specify only one field as key.
106 class MessageDifferencer::MultipleFieldsMapKeyComparator
107     : public MessageDifferencer::MapKeyComparator {
108  public:
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)109   MultipleFieldsMapKeyComparator(
110       MessageDifferencer* message_differencer,
111       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths)
112       : message_differencer_(message_differencer),
113         key_field_paths_(key_field_paths) {
114     GOOGLE_CHECK(!key_field_paths_.empty());
115     for (const auto& path : key_field_paths_) {
116       GOOGLE_CHECK(!path.empty());
117     }
118   }
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const FieldDescriptor * key)119   MultipleFieldsMapKeyComparator(MessageDifferencer* message_differencer,
120                                  const FieldDescriptor* key)
121       : message_differencer_(message_differencer) {
122     std::vector<const FieldDescriptor*> key_field_path;
123     key_field_path.push_back(key);
124     key_field_paths_.push_back(key_field_path);
125   }
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const126   bool IsMatch(const Message& message1, const Message& message2,
127                const std::vector<SpecificField>& parent_fields) const override {
128     for (const auto& path : key_field_paths_) {
129       if (!IsMatchInternal(message1, message2, parent_fields, path, 0)) {
130         return false;
131       }
132     }
133     return true;
134   }
135 
136  private:
IsMatchInternal(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields,const std::vector<const FieldDescriptor * > & key_field_path,int path_index) const137   bool IsMatchInternal(
138       const Message& message1, const Message& message2,
139       const std::vector<SpecificField>& parent_fields,
140       const std::vector<const FieldDescriptor*>& key_field_path,
141       int path_index) const {
142     const FieldDescriptor* field = key_field_path[path_index];
143     std::vector<SpecificField> current_parent_fields(parent_fields);
144     if (path_index == key_field_path.size() - 1) {
145       if (field->is_repeated()) {
146         if (!message_differencer_->CompareRepeatedField(
147                 message1, message2, field, &current_parent_fields)) {
148           return false;
149         }
150       } else {
151         if (!message_differencer_->CompareFieldValueUsingParentFields(
152                 message1, message2, field, -1, -1, &current_parent_fields)) {
153           return false;
154         }
155       }
156       return true;
157     } else {
158       const Reflection* reflection1 = message1.GetReflection();
159       const Reflection* reflection2 = message2.GetReflection();
160       bool has_field1 = reflection1->HasField(message1, field);
161       bool has_field2 = reflection2->HasField(message2, field);
162       if (!has_field1 && !has_field2) {
163         return true;
164       }
165       if (has_field1 != has_field2) {
166         return false;
167       }
168       SpecificField specific_field;
169       specific_field.field = field;
170       current_parent_fields.push_back(specific_field);
171       return IsMatchInternal(reflection1->GetMessage(message1, field),
172                              reflection2->GetMessage(message2, field),
173                              current_parent_fields, key_field_path,
174                              path_index + 1);
175     }
176   }
177   MessageDifferencer* message_differencer_;
178   std::vector<std::vector<const FieldDescriptor*> > key_field_paths_;
179   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
180 };
181 
182 // Preserve the order when treating repeated field as SMART_LIST. The current
183 // implementation is to find the longest matching sequence from the first
184 // element. The optimal solution requires to use //util/diff/lcs.h, which is
185 // not open sourced yet. Overwrite this method if you want to have that.
186 // TODO(ykzhu): change to use LCS once it is open sourced.
MatchIndicesPostProcessorForSmartList(std::vector<int> * match_list1,std::vector<int> * match_list2)187 void MatchIndicesPostProcessorForSmartList(
188     std::vector<int>* match_list1, std::vector<int>* match_list2) {
189   int last_matched_index = -1;
190   for (int i = 0; i < match_list1->size(); ++i) {
191     if (match_list1->at(i) < 0) {
192       continue;
193     }
194     if (last_matched_index < 0 || match_list1->at(i) > last_matched_index) {
195       last_matched_index = match_list1->at(i);
196     } else {
197       match_list2->at(match_list1->at(i)) = -1;
198       match_list1->at(i) = -1;
199     }
200   }
201 }
202 
MapEntryKeyComparator(MessageDifferencer * message_differencer)203 MessageDifferencer::MapEntryKeyComparator::MapEntryKeyComparator(
204     MessageDifferencer* message_differencer)
205     : message_differencer_(message_differencer) {}
206 
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const207 bool MessageDifferencer::MapEntryKeyComparator::IsMatch(
208     const Message& message1, const Message& message2,
209     const std::vector<SpecificField>& parent_fields) const {
210   // Map entry has its key in the field with tag 1.  See the comment for
211   // map_entry in MessageOptions.
212   const FieldDescriptor* key = message1.GetDescriptor()->FindFieldByNumber(1);
213   // If key is not present in message1 and we're doing partial comparison or if
214   // map key is explicitly ignored treat the field as set instead,
215   const bool treat_as_set =
216       (message_differencer_->scope() == PARTIAL &&
217        !message1.GetReflection()->HasField(message1, key)) ||
218       message_differencer_->IsIgnored(message1, message2, key, parent_fields);
219 
220   std::vector<SpecificField> current_parent_fields(parent_fields);
221   if (treat_as_set) {
222     return message_differencer_->Compare(message1, message2,
223                                          &current_parent_fields);
224   }
225   return message_differencer_->CompareFieldValueUsingParentFields(
226       message1, message2, key, -1, -1, &current_parent_fields);
227 }
228 
Equals(const Message & message1,const Message & message2)229 bool MessageDifferencer::Equals(const Message& message1,
230                                 const Message& message2) {
231   MessageDifferencer differencer;
232 
233   return differencer.Compare(message1, message2);
234 }
235 
Equivalent(const Message & message1,const Message & message2)236 bool MessageDifferencer::Equivalent(const Message& message1,
237                                     const Message& message2) {
238   MessageDifferencer differencer;
239   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
240 
241   return differencer.Compare(message1, message2);
242 }
243 
ApproximatelyEquals(const Message & message1,const Message & message2)244 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
245                                              const Message& message2) {
246   MessageDifferencer differencer;
247   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
248 
249   return differencer.Compare(message1, message2);
250 }
251 
ApproximatelyEquivalent(const Message & message1,const Message & message2)252 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
253                                                  const Message& message2) {
254   MessageDifferencer differencer;
255   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
256   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
257 
258   return differencer.Compare(message1, message2);
259 }
260 
261 // ===========================================================================
262 
MessageDifferencer()263 MessageDifferencer::MessageDifferencer()
264     : reporter_(NULL),
265       field_comparator_(NULL),
266       message_field_comparison_(EQUAL),
267       scope_(FULL),
268       repeated_field_comparison_(AS_LIST),
269       map_entry_key_comparator_(this),
270       report_matches_(false),
271       report_moves_(true),
272       report_ignores_(true),
273       output_string_(nullptr),
274       match_indices_for_smart_list_callback_(
275           MatchIndicesPostProcessorForSmartList) {}
276 
~MessageDifferencer()277 MessageDifferencer::~MessageDifferencer() {
278   for (MapKeyComparator* comparator : owned_key_comparators_) {
279     delete comparator;
280   }
281   for (IgnoreCriteria* criteria : ignore_criteria_) {
282     delete criteria;
283   }
284 }
285 
set_field_comparator(FieldComparator * comparator)286 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
287   GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
288   field_comparator_ = comparator;
289 }
290 
set_message_field_comparison(MessageFieldComparison comparison)291 void MessageDifferencer::set_message_field_comparison(
292     MessageFieldComparison comparison) {
293   message_field_comparison_ = comparison;
294 }
295 
set_scope(Scope scope)296 void MessageDifferencer::set_scope(Scope scope) { scope_ = scope; }
297 
scope()298 MessageDifferencer::Scope MessageDifferencer::scope() { return scope_; }
299 
set_float_comparison(FloatComparison comparison)300 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
301   default_field_comparator_.set_float_comparison(
302       comparison == EXACT ? DefaultFieldComparator::EXACT
303                           : DefaultFieldComparator::APPROXIMATE);
304 }
305 
set_repeated_field_comparison(RepeatedFieldComparison comparison)306 void MessageDifferencer::set_repeated_field_comparison(
307     RepeatedFieldComparison comparison) {
308   repeated_field_comparison_ = comparison;
309 }
310 
311 MessageDifferencer::RepeatedFieldComparison
repeated_field_comparison()312 MessageDifferencer::repeated_field_comparison() {
313   return repeated_field_comparison_;
314 }
315 
CheckRepeatedFieldComparisons(const FieldDescriptor * field,const RepeatedFieldComparison & new_comparison)316 void MessageDifferencer::CheckRepeatedFieldComparisons(
317     const FieldDescriptor* field,
318     const RepeatedFieldComparison& new_comparison) {
319   GOOGLE_CHECK(field->is_repeated())
320       << "Field must be repeated: " << field->full_name();
321   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
322   GOOGLE_CHECK(key_comparator == NULL)
323       << "Cannot treat this repeated field as both MAP and " << new_comparison
324       << " for"
325       << " comparison.  Field name is: " << field->full_name();
326   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
327             repeated_field_comparisons_.end() ||
328         repeated_field_comparisons_[field] == new_comparison)
329       << "Cannot treat the same field as both "
330       << repeated_field_comparisons_[field] << " and " << new_comparison
331       << ". Field name is: " << field->full_name();
332 }
333 
TreatAsSet(const FieldDescriptor * field)334 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
335   CheckRepeatedFieldComparisons(field, AS_SET);
336   repeated_field_comparisons_[field] = AS_SET;
337 }
338 
TreatAsSmartSet(const FieldDescriptor * field)339 void MessageDifferencer::TreatAsSmartSet(const FieldDescriptor* field) {
340   CheckRepeatedFieldComparisons(field, AS_SMART_SET);
341   repeated_field_comparisons_[field] = AS_SMART_SET;
342 }
343 
SetMatchIndicesForSmartListCallback(std::function<void (std::vector<int> *,std::vector<int> *)> callback)344 void MessageDifferencer::SetMatchIndicesForSmartListCallback(
345     std::function<void(std::vector<int>*, std::vector<int>*)> callback) {
346   match_indices_for_smart_list_callback_ = callback;
347 }
348 
TreatAsList(const FieldDescriptor * field)349 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
350   CheckRepeatedFieldComparisons(field, AS_LIST);
351   repeated_field_comparisons_[field] = AS_LIST;
352 }
353 
TreatAsSmartList(const FieldDescriptor * field)354 void MessageDifferencer::TreatAsSmartList(const FieldDescriptor* field) {
355   CheckRepeatedFieldComparisons(field, AS_SMART_LIST);
356   repeated_field_comparisons_[field] = AS_SMART_LIST;
357 }
358 
TreatAsMap(const FieldDescriptor * field,const FieldDescriptor * key)359 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
360                                     const FieldDescriptor* key) {
361   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
362       << "Field has to be message type.  Field name is: " << field->full_name();
363   GOOGLE_CHECK(key->containing_type() == field->message_type())
364       << key->full_name()
365       << " must be a direct subfield within the repeated field "
366       << field->full_name() << ", not " << key->containing_type()->full_name();
367   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
368         repeated_field_comparisons_.end())
369       << "Cannot treat the same field as both "
370       << repeated_field_comparisons_[field]
371       << " and MAP. Field name is: " << field->full_name();
372   MapKeyComparator* key_comparator =
373       new MultipleFieldsMapKeyComparator(this, key);
374   owned_key_comparators_.push_back(key_comparator);
375   map_field_key_comparator_[field] = key_comparator;
376 }
377 
TreatAsMapWithMultipleFieldsAsKey(const FieldDescriptor * field,const std::vector<const FieldDescriptor * > & key_fields)378 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
379     const FieldDescriptor* field,
380     const std::vector<const FieldDescriptor*>& key_fields) {
381   std::vector<std::vector<const FieldDescriptor*> > key_field_paths;
382   for (const FieldDescriptor* key_filed : key_fields) {
383     std::vector<const FieldDescriptor*> key_field_path;
384     key_field_path.push_back(key_filed);
385     key_field_paths.push_back(key_field_path);
386   }
387   TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
388 }
389 
TreatAsMapWithMultipleFieldPathsAsKey(const FieldDescriptor * field,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)390 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
391     const FieldDescriptor* field,
392     const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
393   GOOGLE_CHECK(field->is_repeated())
394       << "Field must be repeated: " << field->full_name();
395   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
396       << "Field has to be message type.  Field name is: " << field->full_name();
397   for (const auto& key_field_path : key_field_paths) {
398     for (int j = 0; j < key_field_path.size(); ++j) {
399       const FieldDescriptor* parent_field =
400           j == 0 ? field : key_field_path[j - 1];
401       const FieldDescriptor* child_field = key_field_path[j];
402       GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
403           << child_field->full_name()
404           << " must be a direct subfield within the field: "
405           << parent_field->full_name();
406       if (j != 0) {
407         GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
408             << parent_field->full_name() << " has to be of type message.";
409         GOOGLE_CHECK(!parent_field->is_repeated())
410             << parent_field->full_name() << " cannot be a repeated field.";
411       }
412     }
413   }
414   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
415         repeated_field_comparisons_.end())
416       << "Cannot treat the same field as both "
417       << repeated_field_comparisons_[field]
418       << " and MAP. Field name is: " << field->full_name();
419   MapKeyComparator* key_comparator =
420       new MultipleFieldsMapKeyComparator(this, key_field_paths);
421   owned_key_comparators_.push_back(key_comparator);
422   map_field_key_comparator_[field] = key_comparator;
423 }
424 
TreatAsMapUsingKeyComparator(const FieldDescriptor * field,const MapKeyComparator * key_comparator)425 void MessageDifferencer::TreatAsMapUsingKeyComparator(
426     const FieldDescriptor* field, const MapKeyComparator* key_comparator) {
427   GOOGLE_CHECK(field->is_repeated())
428       << "Field must be repeated: " << field->full_name();
429   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
430         repeated_field_comparisons_.end())
431       << "Cannot treat the same field as both "
432       << repeated_field_comparisons_[field]
433       << " and MAP. Field name is: " << field->full_name();
434   map_field_key_comparator_[field] = key_comparator;
435 }
436 
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)437 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
438   ignore_criteria_.push_back(ignore_criteria);
439 }
440 
IgnoreField(const FieldDescriptor * field)441 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
442   ignored_fields_.insert(field);
443 }
444 
SetFractionAndMargin(const FieldDescriptor * field,double fraction,double margin)445 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
446                                               double fraction, double margin) {
447   default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
448 }
449 
ReportDifferencesToString(std::string * output)450 void MessageDifferencer::ReportDifferencesToString(std::string* output) {
451   GOOGLE_DCHECK(output) << "Specified output string was NULL";
452 
453   output_string_ = output;
454   output_string_->clear();
455 }
456 
ReportDifferencesTo(Reporter * reporter)457 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
458   // If an output string is set, clear it to prevent
459   // it superseding the specified reporter.
460   if (output_string_) {
461     output_string_ = NULL;
462   }
463 
464   reporter_ = reporter;
465 }
466 
FieldBefore(const FieldDescriptor * field1,const FieldDescriptor * field2)467 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
468                                      const FieldDescriptor* field2) {
469   // Handle sentinel values (i.e. make sure NULLs are always ordered
470   // at the end of the list).
471   if (field1 == NULL) {
472     return false;
473   }
474 
475   if (field2 == NULL) {
476     return true;
477   }
478 
479   // Always order fields by their tag number
480   return (field1->number() < field2->number());
481 }
482 
Compare(const Message & message1,const Message & message2)483 bool MessageDifferencer::Compare(const Message& message1,
484                                  const Message& message2) {
485   std::vector<SpecificField> parent_fields;
486 
487   bool result = false;
488 
489   // Setup the internal reporter if need be.
490   if (output_string_) {
491     io::StringOutputStream output_stream(output_string_);
492     StreamReporter reporter(&output_stream);
493     reporter_ = &reporter;
494     result = Compare(message1, message2, &parent_fields);
495     reporter_ = NULL;
496   } else {
497     result = Compare(message1, message2, &parent_fields);
498   }
499 
500   return result;
501 }
502 
CompareWithFields(const Message & message1,const Message & message2,const std::vector<const FieldDescriptor * > & message1_fields_arg,const std::vector<const FieldDescriptor * > & message2_fields_arg)503 bool MessageDifferencer::CompareWithFields(
504     const Message& message1, const Message& message2,
505     const std::vector<const FieldDescriptor*>& message1_fields_arg,
506     const std::vector<const FieldDescriptor*>& message2_fields_arg) {
507   if (message1.GetDescriptor() != message2.GetDescriptor()) {
508     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
509                 << "descriptors.";
510     return false;
511   }
512 
513   std::vector<SpecificField> parent_fields;
514 
515   bool result = false;
516 
517   FieldDescriptorArray message1_fields(message1_fields_arg.size() + 1);
518   FieldDescriptorArray message2_fields(message2_fields_arg.size() + 1);
519 
520   std::copy(message1_fields_arg.cbegin(), message1_fields_arg.cend(),
521             message1_fields.begin());
522   std::copy(message2_fields_arg.cbegin(), message2_fields_arg.cend(),
523             message2_fields.begin());
524 
525   // Append sentinel values.
526   message1_fields[message1_fields_arg.size()] = nullptr;
527   message2_fields[message2_fields_arg.size()] = nullptr;
528 
529   std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
530   std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
531 
532   // Setup the internal reporter if need be.
533   if (output_string_) {
534     io::StringOutputStream output_stream(output_string_);
535     StreamReporter reporter(&output_stream);
536     reporter_ = &reporter;
537     result = CompareRequestedFieldsUsingSettings(
538         message1, message2, message1_fields, message2_fields, &parent_fields);
539     reporter_ = NULL;
540   } else {
541     result = CompareRequestedFieldsUsingSettings(
542         message1, message2, message1_fields, message2_fields, &parent_fields);
543   }
544 
545   return result;
546 }
547 
Compare(const Message & message1,const Message & message2,std::vector<SpecificField> * parent_fields)548 bool MessageDifferencer::Compare(const Message& message1,
549                                  const Message& message2,
550                                  std::vector<SpecificField>* parent_fields) {
551   const Descriptor* descriptor1 = message1.GetDescriptor();
552   const Descriptor* descriptor2 = message2.GetDescriptor();
553   if (descriptor1 != descriptor2) {
554     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
555                 << "descriptors. " << descriptor1->full_name() << " vs "
556                 << descriptor2->full_name();
557     return false;
558   }
559   // Expand google.protobuf.Any payload if possible.
560   if (descriptor1->full_name() == internal::kAnyFullTypeName) {
561     std::unique_ptr<Message> data1;
562     std::unique_ptr<Message> data2;
563     if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
564       // Avoid DFATAL for different descriptors in google.protobuf.Any payloads.
565       if (data1->GetDescriptor() != data2->GetDescriptor()) {
566         return false;
567       }
568       return Compare(*data1, *data2, parent_fields);
569     }
570   }
571   const Reflection* reflection1 = message1.GetReflection();
572   const Reflection* reflection2 = message2.GetReflection();
573 
574   bool unknown_compare_result = true;
575   // Ignore unknown fields in EQUIVALENT mode
576   if (message_field_comparison_ != EQUIVALENT) {
577     const UnknownFieldSet& unknown_field_set1 =
578         reflection1->GetUnknownFields(message1);
579     const UnknownFieldSet& unknown_field_set2 =
580         reflection2->GetUnknownFields(message2);
581     if (!CompareUnknownFields(message1, message2, unknown_field_set1,
582                               unknown_field_set2, parent_fields)) {
583       if (reporter_ == NULL) {
584         return false;
585       }
586       unknown_compare_result = false;
587     }
588   }
589 
590   FieldDescriptorArray message1_fields = RetrieveFields(message1, true);
591   FieldDescriptorArray message2_fields = RetrieveFields(message2, false);
592 
593   return CompareRequestedFieldsUsingSettings(
594       message1, message2,
595       message1_fields, message2_fields,
596       parent_fields) && unknown_compare_result;
597 }
598 
RetrieveFields(const Message & message,bool base_message)599 FieldDescriptorArray MessageDifferencer::RetrieveFields(const Message& message,
600                                                         bool base_message) {
601   const Descriptor* descriptor = message.GetDescriptor();
602 
603   tmp_message_fields_.clear();
604   tmp_message_fields_.reserve(descriptor->field_count() + 1);
605 
606   const Reflection* reflection = message.GetReflection();
607   if (descriptor->options().map_entry()) {
608     if (this->scope_ == PARTIAL && base_message) {
609       reflection->ListFields(message, &tmp_message_fields_);
610     } else {
611       // Map entry fields are always considered present.
612       for (int i = 0; i < descriptor->field_count(); i++) {
613         tmp_message_fields_.push_back(descriptor->field(i));
614       }
615     }
616   } else {
617     reflection->ListFields(message, &tmp_message_fields_);
618   }
619   // Add sentinel values to deal with the
620   // case where the number of the fields in
621   // each list are different.
622   tmp_message_fields_.push_back(nullptr);
623 
624   FieldDescriptorArray message_fields(tmp_message_fields_.begin(),
625                                       tmp_message_fields_.end());
626 
627   return message_fields;
628 }
629 
CompareRequestedFieldsUsingSettings(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)630 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
631     const Message& message1, const Message& message2,
632     const FieldDescriptorArray& message1_fields,
633     const FieldDescriptorArray& message2_fields,
634     std::vector<SpecificField>* parent_fields) {
635   if (scope_ == FULL) {
636     if (message_field_comparison_ == EQUIVALENT) {
637       // We need to merge the field lists of both messages (i.e.
638       // we are merely checking for a difference in field values,
639       // rather than the addition or deletion of fields).
640       FieldDescriptorArray fields_union =
641           CombineFields(message1_fields, FULL, message2_fields, FULL);
642       return CompareWithFieldsInternal(message1, message2, fields_union,
643                                        fields_union, parent_fields);
644     } else {
645       // Simple equality comparison, use the unaltered field lists.
646       return CompareWithFieldsInternal(message1, message2, message1_fields,
647                                        message2_fields, parent_fields);
648     }
649   } else {
650     if (message_field_comparison_ == EQUIVALENT) {
651       // We use the list of fields for message1 for both messages when
652       // comparing.  This way, extra fields in message2 are ignored,
653       // and missing fields in message2 use their default value.
654       return CompareWithFieldsInternal(message1, message2, message1_fields,
655                                        message1_fields, parent_fields);
656     } else {
657       // We need to consider the full list of fields for message1
658       // but only the intersection for message2.  This way, any fields
659       // only present in message2 will be ignored, but any fields only
660       // present in message1 will be marked as a difference.
661       FieldDescriptorArray fields_intersection =
662           CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL);
663       return CompareWithFieldsInternal(message1, message2, message1_fields,
664                                        fields_intersection, parent_fields);
665     }
666   }
667 }
668 
CombineFields(const FieldDescriptorArray & fields1,Scope fields1_scope,const FieldDescriptorArray & fields2,Scope fields2_scope)669 FieldDescriptorArray MessageDifferencer::CombineFields(
670     const FieldDescriptorArray& fields1, Scope fields1_scope,
671     const FieldDescriptorArray& fields2, Scope fields2_scope) {
672   int index1 = 0;
673   int index2 = 0;
674 
675   tmp_message_fields_.clear();
676 
677   while (index1 < fields1.size() && index2 < fields2.size()) {
678     const FieldDescriptor* field1 = fields1[index1];
679     const FieldDescriptor* field2 = fields2[index2];
680 
681     if (FieldBefore(field1, field2)) {
682       if (fields1_scope == FULL) {
683         tmp_message_fields_.push_back(fields1[index1]);
684       }
685       ++index1;
686     } else if (FieldBefore(field2, field1)) {
687       if (fields2_scope == FULL) {
688         tmp_message_fields_.push_back(fields2[index2]);
689       }
690       ++index2;
691     } else {
692       tmp_message_fields_.push_back(fields1[index1]);
693       ++index1;
694       ++index2;
695     }
696   }
697 
698   tmp_message_fields_.push_back(nullptr);
699 
700   FieldDescriptorArray combined_fields(tmp_message_fields_.begin(),
701                                        tmp_message_fields_.end());
702 
703   return combined_fields;
704 }
705 
CompareWithFieldsInternal(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)706 bool MessageDifferencer::CompareWithFieldsInternal(
707     const Message& message1, const Message& message2,
708     const FieldDescriptorArray& message1_fields,
709     const FieldDescriptorArray& message2_fields,
710     std::vector<SpecificField>* parent_fields) {
711   bool isDifferent = false;
712   int field_index1 = 0;
713   int field_index2 = 0;
714 
715   const Reflection* reflection1 = message1.GetReflection();
716   const Reflection* reflection2 = message2.GetReflection();
717 
718   while (true) {
719     const FieldDescriptor* field1 = message1_fields[field_index1];
720     const FieldDescriptor* field2 = message2_fields[field_index2];
721 
722     // Once we have reached sentinel values, we are done the comparison.
723     if (field1 == NULL && field2 == NULL) {
724       break;
725     }
726 
727     // Check for differences in the field itself.
728     if (FieldBefore(field1, field2)) {
729       // Field 1 is not in the field list for message 2.
730       if (IsIgnored(message1, message2, field1, *parent_fields)) {
731         // We are ignoring field1. Report the ignore and move on to
732         // the next field in message1_fields.
733         if (reporter_ != NULL) {
734           SpecificField specific_field;
735           specific_field.field = field1;
736 
737           parent_fields->push_back(specific_field);
738           if (report_ignores_) {
739             reporter_->ReportIgnored(message1, message2, *parent_fields);
740           }
741           parent_fields->pop_back();
742         }
743         ++field_index1;
744         continue;
745       }
746 
747       if (reporter_ != NULL) {
748         assert(field1 != NULL);
749         int count = field1->is_repeated()
750                         ? reflection1->FieldSize(message1, field1)
751                         : 1;
752 
753         for (int i = 0; i < count; ++i) {
754           SpecificField specific_field;
755           specific_field.field = field1;
756           specific_field.index = field1->is_repeated() ? i : -1;
757 
758           parent_fields->push_back(specific_field);
759           reporter_->ReportDeleted(message1, message2, *parent_fields);
760           parent_fields->pop_back();
761         }
762 
763         isDifferent = true;
764       } else {
765         return false;
766       }
767 
768       ++field_index1;
769       continue;
770     } else if (FieldBefore(field2, field1)) {
771       // Field 2 is not in the field list for message 1.
772       if (IsIgnored(message1, message2, field2, *parent_fields)) {
773         // We are ignoring field2. Report the ignore and move on to
774         // the next field in message2_fields.
775         if (reporter_ != NULL) {
776           SpecificField specific_field;
777           specific_field.field = field2;
778 
779           parent_fields->push_back(specific_field);
780           if (report_ignores_) {
781             reporter_->ReportIgnored(message1, message2, *parent_fields);
782           }
783           parent_fields->pop_back();
784         }
785         ++field_index2;
786         continue;
787       }
788 
789       if (reporter_ != NULL) {
790         int count = field2->is_repeated()
791                         ? reflection2->FieldSize(message2, field2)
792                         : 1;
793 
794         for (int i = 0; i < count; ++i) {
795           SpecificField specific_field;
796           specific_field.field = field2;
797           specific_field.index = field2->is_repeated() ? i : -1;
798           specific_field.new_index = specific_field.index;
799 
800           parent_fields->push_back(specific_field);
801           reporter_->ReportAdded(message1, message2, *parent_fields);
802           parent_fields->pop_back();
803         }
804 
805         isDifferent = true;
806       } else {
807         return false;
808       }
809 
810       ++field_index2;
811       continue;
812     }
813 
814     // By this point, field1 and field2 are guaranteed to point to the same
815     // field, so we can now compare the values.
816     if (IsIgnored(message1, message2, field1, *parent_fields)) {
817       // Ignore this field. Report and move on.
818       if (reporter_ != NULL) {
819         SpecificField specific_field;
820         specific_field.field = field1;
821 
822         parent_fields->push_back(specific_field);
823         if (report_ignores_) {
824           reporter_->ReportIgnored(message1, message2, *parent_fields);
825         }
826         parent_fields->pop_back();
827       }
828 
829       ++field_index1;
830       ++field_index2;
831       continue;
832     }
833 
834     bool fieldDifferent = false;
835     assert(field1 != NULL);
836     if (field1->is_repeated()) {
837       fieldDifferent =
838           !CompareRepeatedField(message1, message2, field1, parent_fields);
839       if (fieldDifferent) {
840         if (reporter_ == NULL) return false;
841         isDifferent = true;
842       }
843     } else {
844       fieldDifferent = !CompareFieldValueUsingParentFields(
845           message1, message2, field1, -1, -1, parent_fields);
846 
847       // If we have found differences, either report them or terminate if
848       // no reporter is present.
849       if (fieldDifferent && reporter_ == NULL) {
850         return false;
851       }
852 
853       if (reporter_ != NULL) {
854         SpecificField specific_field;
855         specific_field.field = field1;
856         parent_fields->push_back(specific_field);
857         if (fieldDifferent) {
858           reporter_->ReportModified(message1, message2, *parent_fields);
859           isDifferent = true;
860         } else if (report_matches_) {
861           reporter_->ReportMatched(message1, message2, *parent_fields);
862         }
863         parent_fields->pop_back();
864       }
865     }
866     // Increment the field indices.
867     ++field_index1;
868     ++field_index2;
869   }
870 
871   return !isDifferent;
872 }
873 
IsMatch(const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const Message * message1,const Message * message2,const std::vector<SpecificField> & parent_fields,Reporter * reporter,int index1,int index2)874 bool MessageDifferencer::IsMatch(
875     const FieldDescriptor* repeated_field,
876     const MapKeyComparator* key_comparator, const Message* message1,
877     const Message* message2, const std::vector<SpecificField>& parent_fields,
878     Reporter* reporter, int index1, int index2) {
879   std::vector<SpecificField> current_parent_fields(parent_fields);
880   if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
881     return CompareFieldValueUsingParentFields(*message1, *message2,
882                                               repeated_field, index1, index2,
883                                               &current_parent_fields);
884   }
885   // Back up the Reporter and output_string_.  They will be reset in the
886   // following code.
887   Reporter* backup_reporter = reporter_;
888   std::string* output_string = output_string_;
889   reporter_ = reporter;
890   output_string_ = NULL;
891   bool match;
892 
893   if (key_comparator == NULL) {
894     match = CompareFieldValueUsingParentFields(*message1, *message2,
895                                                repeated_field, index1, index2,
896                                                &current_parent_fields);
897   } else {
898     const Reflection* reflection1 = message1->GetReflection();
899     const Reflection* reflection2 = message2->GetReflection();
900     const Message& m1 =
901         reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
902     const Message& m2 =
903         reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
904     SpecificField specific_field;
905     specific_field.field = repeated_field;
906     specific_field.index = index1;
907     specific_field.new_index = index2;
908     current_parent_fields.push_back(specific_field);
909     match = key_comparator->IsMatch(m1, m2, current_parent_fields);
910   }
911 
912   reporter_ = backup_reporter;
913   output_string_ = output_string;
914   return match;
915 }
916 
CompareMapFieldByMapReflection(const Message & message1,const Message & message2,const FieldDescriptor * map_field,std::vector<SpecificField> * parent_fields,DefaultFieldComparator * comparator)917 bool MessageDifferencer::CompareMapFieldByMapReflection(
918     const Message& message1, const Message& message2,
919     const FieldDescriptor* map_field, std::vector<SpecificField>* parent_fields,
920     DefaultFieldComparator* comparator) {
921   const Reflection* reflection1 = message1.GetReflection();
922   const Reflection* reflection2 = message2.GetReflection();
923   const int count1 = reflection1->MapSize(message1, map_field);
924   const int count2 = reflection2->MapSize(message2, map_field);
925   const bool treated_as_subset = IsTreatedAsSubset(map_field);
926   if (count1 != count2 && !treated_as_subset) {
927     return false;
928   }
929   if (count1 > count2) {
930     return false;
931   }
932   const FieldDescriptor* val_des = map_field->message_type()->map_value();
933   switch (val_des->cpp_type()) {
934 #define HANDLE_TYPE(CPPTYPE, METHOD, COMPAREMETHOD)                           \
935   case FieldDescriptor::CPPTYPE_##CPPTYPE: {                                  \
936     for (MapIterator it = reflection1->MapBegin(                              \
937              const_cast<Message*>(&message1), map_field);                     \
938          it !=                                                                \
939          reflection1->MapEnd(const_cast<Message*>(&message1), map_field);     \
940          ++it) {                                                              \
941       if (!reflection2->ContainsMapKey(message2, map_field, it.GetKey())) {   \
942         return false;                                                         \
943       }                                                                       \
944       MapValueConstRef value2;                                                \
945       reflection2->LookupMapValue(message2, map_field, it.GetKey(), &value2); \
946       if (!comparator->Compare##COMPAREMETHOD(*val_des,                       \
947                                               it.GetValueRef().Get##METHOD(), \
948                                               value2.Get##METHOD())) {        \
949         return false;                                                         \
950       }                                                                       \
951     }                                                                         \
952     break;                                                                    \
953   }
954     HANDLE_TYPE(INT32, Int32Value, Int32);
955     HANDLE_TYPE(INT64, Int64Value, Int64);
956     HANDLE_TYPE(UINT32, UInt32Value, UInt32);
957     HANDLE_TYPE(UINT64, UInt64Value, UInt64);
958     HANDLE_TYPE(DOUBLE, DoubleValue, Double);
959     HANDLE_TYPE(FLOAT, FloatValue, Float);
960     HANDLE_TYPE(BOOL, BoolValue, Bool);
961     HANDLE_TYPE(STRING, StringValue, String);
962     HANDLE_TYPE(ENUM, EnumValue, Int32);
963 #undef HANDLE_TYPE
964     case FieldDescriptor::CPPTYPE_MESSAGE: {
965       for (MapIterator it = reflection1->MapBegin(
966                const_cast<Message*>(&message1), map_field);
967            it !=
968            reflection1->MapEnd(const_cast<Message*>(&message1), map_field);
969            ++it) {
970         if (!reflection2->ContainsMapKey(message2, map_field, it.GetKey())) {
971           return false;
972         }
973         bool compare_result;
974         MapValueConstRef value2;
975         reflection2->LookupMapValue(message2, map_field, it.GetKey(), &value2);
976         // Append currently compared field to the end of parent_fields.
977         SpecificField specific_value_field;
978         specific_value_field.field = val_des;
979         parent_fields->push_back(specific_value_field);
980         compare_result = Compare(it.GetValueRef().GetMessageValue(),
981                                  value2.GetMessageValue(), parent_fields);
982         parent_fields->pop_back();
983         if (!compare_result) {
984           return false;
985         }
986       }
987       break;
988     }
989   }
990   return true;
991 }
992 
CompareRepeatedField(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,std::vector<SpecificField> * parent_fields)993 bool MessageDifferencer::CompareRepeatedField(
994     const Message& message1, const Message& message2,
995     const FieldDescriptor* repeated_field,
996     std::vector<SpecificField>* parent_fields) {
997   // the input FieldDescriptor is guaranteed to be repeated field.
998   const Reflection* reflection1 = message1.GetReflection();
999   const Reflection* reflection2 = message2.GetReflection();
1000 
1001   // When both map fields are on map, do not sync to repeated field.
1002   // TODO(jieluo): Add support for reporter
1003   if (repeated_field->is_map() && reporter_ == nullptr &&
1004       // Users didn't set custom map field key comparator
1005       map_field_key_comparator_.find(repeated_field) ==
1006           map_field_key_comparator_.end() &&
1007       // Users didn't set repeated field comparison
1008       repeated_field_comparison_ == AS_LIST) {
1009     DefaultFieldComparator* map_field_comparator =
1010         field_comparator_ ? nullptr : &default_field_comparator_;
1011 #if PROTOBUF_RTTI
1012     // Inherit class from DefaultFieldComparator can not get the benefit
1013     // because DefaultFieldComparator::Compare() method might be overwrote.
1014     if (field_comparator_ &&
1015         typeid(*field_comparator_) == typeid(default_field_comparator_)) {
1016       map_field_comparator =
1017           static_cast<DefaultFieldComparator*>(field_comparator_);
1018     }
1019 #endif
1020     if (map_field_comparator) {
1021       const FieldDescriptor* key_des =
1022           repeated_field->message_type()->map_key();
1023       const FieldDescriptor* val_des =
1024           repeated_field->message_type()->map_value();
1025       const internal::MapFieldBase* map_field1 =
1026           reflection1->GetMapData(message1, repeated_field);
1027       const internal::MapFieldBase* map_field2 =
1028           reflection2->GetMapData(message2, repeated_field);
1029       std::vector<SpecificField> current_parent_fields(*parent_fields);
1030       SpecificField specific_field;
1031       specific_field.field = repeated_field;
1032       current_parent_fields.push_back(specific_field);
1033       if (map_field1->IsMapValid() && map_field2->IsMapValid() &&
1034           !IsIgnored(message1, message2, key_des, current_parent_fields) &&
1035           !IsIgnored(message1, message2, val_des, current_parent_fields)) {
1036         return CompareMapFieldByMapReflection(
1037             message1, message2, repeated_field, &current_parent_fields,
1038             map_field_comparator);
1039       }
1040     }
1041   }
1042 
1043   const int count1 = reflection1->FieldSize(message1, repeated_field);
1044   const int count2 = reflection2->FieldSize(message2, repeated_field);
1045   const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
1046 
1047   // If the field is not treated as subset and no detailed reports is needed,
1048   // we do a quick check on the number of the elements to avoid unnecessary
1049   // comparison.
1050   if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
1051     return false;
1052   }
1053   // A match can never be found if message1 has more items than message2.
1054   if (count1 > count2 && reporter_ == NULL) {
1055     return false;
1056   }
1057 
1058   // These two list are used for store the index of the correspondent
1059   // element in peer repeated field.
1060   std::vector<int> match_list1;
1061   std::vector<int> match_list2;
1062 
1063   const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
1064   bool smart_list = IsTreatedAsSmartList(repeated_field);
1065   bool simple_list = key_comparator == nullptr &&
1066                      !IsTreatedAsSet(repeated_field) &&
1067                      !IsTreatedAsSmartSet(repeated_field) && !smart_list;
1068 
1069   // For simple lists, we avoid matching repeated field indices, saving the
1070   // memory allocations that would otherwise be needed for match_list1 and
1071   // match_list2.
1072   if (!simple_list) {
1073     // Try to match indices of the repeated fields. Return false if match fails.
1074     if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
1075                                    key_comparator, *parent_fields, &match_list1,
1076                                    &match_list2) &&
1077         reporter_ == nullptr) {
1078       return false;
1079     }
1080   }
1081 
1082   bool fieldDifferent = false;
1083   SpecificField specific_field;
1084   specific_field.field = repeated_field;
1085 
1086   // At this point, we have already matched pairs of fields (with the reporting
1087   // to be done later). Now to check if the paired elements are different.
1088   int next_unmatched_index = 0;
1089   for (int i = 0; i < count1; i++) {
1090     if (simple_list && i >= count2) {
1091       break;
1092     }
1093     if (!simple_list && match_list1[i] == -1) {
1094       if (smart_list) {
1095         if (reporter_ == nullptr) return false;
1096         specific_field.index = i;
1097         parent_fields->push_back(specific_field);
1098         reporter_->ReportDeleted(message1, message2, *parent_fields);
1099         parent_fields->pop_back();
1100         fieldDifferent = true;
1101         // Use -2 to mark this element has been reported.
1102         match_list1[i] = -2;
1103       }
1104       continue;
1105     }
1106     if (smart_list) {
1107       for (int j = next_unmatched_index; j < match_list1[i]; ++j) {
1108         GOOGLE_CHECK_LE(0, j);
1109         if (reporter_ == nullptr) return false;
1110         specific_field.index = j;
1111         specific_field.new_index = j;
1112         parent_fields->push_back(specific_field);
1113         reporter_->ReportAdded(message1, message2, *parent_fields);
1114         parent_fields->pop_back();
1115         fieldDifferent = true;
1116         // Use -2 to mark this element has been reported.
1117         match_list2[j] = -2;
1118       }
1119     }
1120     specific_field.index = i;
1121     if (simple_list) {
1122       specific_field.new_index = i;
1123     } else {
1124       specific_field.new_index = match_list1[i];
1125       next_unmatched_index = match_list1[i] + 1;
1126     }
1127 
1128     const bool result = CompareFieldValueUsingParentFields(
1129         message1, message2, repeated_field, i, specific_field.new_index,
1130         parent_fields);
1131 
1132     // If we have found differences, either report them or terminate if
1133     // no reporter is present. Note that ReportModified, ReportMoved, and
1134     // ReportMatched are all mutually exclusive.
1135     if (!result) {
1136       if (reporter_ == NULL) return false;
1137       parent_fields->push_back(specific_field);
1138       reporter_->ReportModified(message1, message2, *parent_fields);
1139       parent_fields->pop_back();
1140       fieldDifferent = true;
1141     } else if (reporter_ != NULL &&
1142                specific_field.index != specific_field.new_index &&
1143                !specific_field.field->is_map() && report_moves_) {
1144       parent_fields->push_back(specific_field);
1145       reporter_->ReportMoved(message1, message2, *parent_fields);
1146       parent_fields->pop_back();
1147     } else if (report_matches_ && reporter_ != NULL) {
1148       parent_fields->push_back(specific_field);
1149       reporter_->ReportMatched(message1, message2, *parent_fields);
1150       parent_fields->pop_back();
1151     }
1152   }
1153 
1154   // Report any remaining additions or deletions.
1155   for (int i = 0; i < count2; ++i) {
1156     if (!simple_list && match_list2[i] != -1) continue;
1157     if (simple_list && i < count1) continue;
1158     if (!treated_as_subset) {
1159       fieldDifferent = true;
1160     }
1161 
1162     if (reporter_ == NULL) continue;
1163     specific_field.index = i;
1164     specific_field.new_index = i;
1165     parent_fields->push_back(specific_field);
1166     reporter_->ReportAdded(message1, message2, *parent_fields);
1167     parent_fields->pop_back();
1168   }
1169 
1170   for (int i = 0; i < count1; ++i) {
1171     if (!simple_list && match_list1[i] != -1) continue;
1172     if (simple_list && i < count2) continue;
1173     assert(reporter_ != NULL);
1174     specific_field.index = i;
1175     parent_fields->push_back(specific_field);
1176     reporter_->ReportDeleted(message1, message2, *parent_fields);
1177     parent_fields->pop_back();
1178     fieldDifferent = true;
1179   }
1180   return !fieldDifferent;
1181 }
1182 
CompareFieldValue(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2)1183 bool MessageDifferencer::CompareFieldValue(const Message& message1,
1184                                            const Message& message2,
1185                                            const FieldDescriptor* field,
1186                                            int index1, int index2) {
1187   return CompareFieldValueUsingParentFields(message1, message2, field, index1,
1188                                             index2, NULL);
1189 }
1190 
CompareFieldValueUsingParentFields(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,std::vector<SpecificField> * parent_fields)1191 bool MessageDifferencer::CompareFieldValueUsingParentFields(
1192     const Message& message1, const Message& message2,
1193     const FieldDescriptor* field, int index1, int index2,
1194     std::vector<SpecificField>* parent_fields) {
1195   FieldContext field_context(parent_fields);
1196   FieldComparator::ComparisonResult result = GetFieldComparisonResult(
1197       message1, message2, field, index1, index2, &field_context);
1198 
1199   if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1200       result == FieldComparator::RECURSE) {
1201     // Get the nested messages and compare them using one of the Compare
1202     // methods.
1203     const Reflection* reflection1 = message1.GetReflection();
1204     const Reflection* reflection2 = message2.GetReflection();
1205     const Message& m1 =
1206         field->is_repeated()
1207             ? reflection1->GetRepeatedMessage(message1, field, index1)
1208             : reflection1->GetMessage(message1, field);
1209     const Message& m2 =
1210         field->is_repeated()
1211             ? reflection2->GetRepeatedMessage(message2, field, index2)
1212             : reflection2->GetMessage(message2, field);
1213 
1214     // parent_fields is used in calls to Reporter methods.
1215     if (parent_fields != NULL) {
1216       // Append currently compared field to the end of parent_fields.
1217       SpecificField specific_field;
1218       specific_field.field = field;
1219       specific_field.index = index1;
1220       specific_field.new_index = index2;
1221       parent_fields->push_back(specific_field);
1222       const bool compare_result = Compare(m1, m2, parent_fields);
1223       parent_fields->pop_back();
1224       return compare_result;
1225     } else {
1226       // Recreates parent_fields as if m1 and m2 had no parents.
1227       return Compare(m1, m2);
1228     }
1229   } else {
1230     return (result == FieldComparator::SAME);
1231   }
1232 }
1233 
CheckPathChanged(const std::vector<SpecificField> & field_path)1234 bool MessageDifferencer::CheckPathChanged(
1235     const std::vector<SpecificField>& field_path) {
1236   for (const SpecificField& specific_field : field_path) {
1237     // Don't check indexes for map entries -- maps are unordered.
1238     if (specific_field.field != nullptr && specific_field.field->is_map())
1239       continue;
1240     if (specific_field.index != specific_field.new_index) return true;
1241   }
1242   return false;
1243 }
1244 
IsTreatedAsSet(const FieldDescriptor * field)1245 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
1246   if (!field->is_repeated()) return false;
1247   if (repeated_field_comparisons_.find(field) !=
1248       repeated_field_comparisons_.end()) {
1249     return repeated_field_comparisons_[field] == AS_SET;
1250   }
1251   return GetMapKeyComparator(field) == nullptr &&
1252          repeated_field_comparison_ == AS_SET;
1253 }
1254 
IsTreatedAsSmartSet(const FieldDescriptor * field)1255 bool MessageDifferencer::IsTreatedAsSmartSet(const FieldDescriptor* field) {
1256   if (!field->is_repeated()) return false;
1257   if (repeated_field_comparisons_.find(field) !=
1258       repeated_field_comparisons_.end()) {
1259     return repeated_field_comparisons_[field] == AS_SMART_SET;
1260   }
1261   return GetMapKeyComparator(field) == nullptr &&
1262          repeated_field_comparison_ == AS_SMART_SET;
1263 }
1264 
IsTreatedAsSmartList(const FieldDescriptor * field)1265 bool MessageDifferencer::IsTreatedAsSmartList(const FieldDescriptor* field) {
1266   if (!field->is_repeated()) return false;
1267   if (repeated_field_comparisons_.find(field) !=
1268       repeated_field_comparisons_.end()) {
1269     return repeated_field_comparisons_[field] == AS_SMART_LIST;
1270   }
1271   return GetMapKeyComparator(field) == nullptr &&
1272          repeated_field_comparison_ == AS_SMART_LIST;
1273 }
1274 
IsTreatedAsSubset(const FieldDescriptor * field)1275 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
1276   return scope_ == PARTIAL &&
1277          (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
1278 }
1279 
IsIgnored(const Message & message1,const Message & message2,const FieldDescriptor * field,const std::vector<SpecificField> & parent_fields)1280 bool MessageDifferencer::IsIgnored(
1281     const Message& message1, const Message& message2,
1282     const FieldDescriptor* field,
1283     const std::vector<SpecificField>& parent_fields) {
1284   if (ignored_fields_.find(field) != ignored_fields_.end()) {
1285     return true;
1286   }
1287   for (IgnoreCriteria* criteria : ignore_criteria_) {
1288     if (criteria->IsIgnored(message1, message2, field, parent_fields)) {
1289       return true;
1290     }
1291   }
1292   return false;
1293 }
1294 
IsUnknownFieldIgnored(const Message & message1,const Message & message2,const SpecificField & field,const std::vector<SpecificField> & parent_fields)1295 bool MessageDifferencer::IsUnknownFieldIgnored(
1296     const Message& message1, const Message& message2,
1297     const SpecificField& field,
1298     const std::vector<SpecificField>& parent_fields) {
1299   for (IgnoreCriteria* criteria : ignore_criteria_) {
1300     if (criteria->IsUnknownFieldIgnored(message1, message2, field,
1301                                         parent_fields)) {
1302       return true;
1303     }
1304   }
1305   return false;
1306 }
1307 
1308 const MessageDifferencer::MapKeyComparator*
GetMapKeyComparator(const FieldDescriptor * field) const1309 MessageDifferencer ::GetMapKeyComparator(const FieldDescriptor* field) const {
1310   if (!field->is_repeated()) return NULL;
1311   FieldKeyComparatorMap::const_iterator it =
1312       map_field_key_comparator_.find(field);
1313   if (it != map_field_key_comparator_.end()) {
1314     return it->second;
1315   }
1316   if (field->is_map()) {
1317     // field cannot already be treated as list or set since TreatAsList() and
1318     // TreatAsSet() call GetMapKeyComparator() and fail if it returns non-NULL.
1319     return &map_entry_key_comparator_;
1320   }
1321   return NULL;
1322 }
1323 
1324 namespace {
1325 
1326 typedef std::pair<int, const UnknownField*> IndexUnknownFieldPair;
1327 
1328 struct UnknownFieldOrdering {
operator ()google::protobuf::util::__anon0b44c3070111::UnknownFieldOrdering1329   inline bool operator()(const IndexUnknownFieldPair& a,
1330                          const IndexUnknownFieldPair& b) const {
1331     if (a.second->number() < b.second->number()) return true;
1332     if (a.second->number() > b.second->number()) return false;
1333     return a.second->type() < b.second->type();
1334   }
1335 };
1336 
1337 }  // namespace
1338 
UnpackAny(const Message & any,std::unique_ptr<Message> * data)1339 bool MessageDifferencer::UnpackAny(const Message& any,
1340                                    std::unique_ptr<Message>* data) {
1341   const Reflection* reflection = any.GetReflection();
1342   const FieldDescriptor* type_url_field;
1343   const FieldDescriptor* value_field;
1344   if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1345     return false;
1346   }
1347   const std::string& type_url = reflection->GetString(any, type_url_field);
1348   std::string full_type_name;
1349   if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1350     return false;
1351   }
1352 
1353   const Descriptor* desc =
1354       any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1355           full_type_name);
1356   if (desc == NULL) {
1357     GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1358     return false;
1359   }
1360 
1361   if (dynamic_message_factory_ == NULL) {
1362     dynamic_message_factory_.reset(new DynamicMessageFactory());
1363   }
1364   data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1365   std::string serialized_value = reflection->GetString(any, value_field);
1366   if (!(*data)->ParsePartialFromString(serialized_value)) {
1367     GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1368     return false;
1369   }
1370   return true;
1371 }
1372 
CompareUnknownFields(const Message & message1,const Message & message2,const UnknownFieldSet & unknown_field_set1,const UnknownFieldSet & unknown_field_set2,std::vector<SpecificField> * parent_field)1373 bool MessageDifferencer::CompareUnknownFields(
1374     const Message& message1, const Message& message2,
1375     const UnknownFieldSet& unknown_field_set1,
1376     const UnknownFieldSet& unknown_field_set2,
1377     std::vector<SpecificField>* parent_field) {
1378   // Ignore unknown fields in EQUIVALENT mode.
1379   if (message_field_comparison_ == EQUIVALENT) return true;
1380 
1381   if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1382     return true;
1383   }
1384 
1385   bool is_different = false;
1386 
1387   // We first sort the unknown fields by field number and type (in other words,
1388   // in tag order), making sure to preserve ordering of values with the same
1389   // tag.  This allows us to report only meaningful differences between the
1390   // two sets -- that is, differing values for the same tag.  We use
1391   // IndexUnknownFieldPairs to keep track of the field's original index for
1392   // reporting purposes.
1393   std::vector<IndexUnknownFieldPair> fields1;  // unknown_field_set1, sorted
1394   std::vector<IndexUnknownFieldPair> fields2;  // unknown_field_set2, sorted
1395   fields1.reserve(unknown_field_set1.field_count());
1396   fields2.reserve(unknown_field_set2.field_count());
1397 
1398   for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1399     fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1400   }
1401   for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1402     fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1403   }
1404 
1405   UnknownFieldOrdering is_before;
1406   std::stable_sort(fields1.begin(), fields1.end(), is_before);
1407   std::stable_sort(fields2.begin(), fields2.end(), is_before);
1408 
1409   // In order to fill in SpecificField::index, we have to keep track of how
1410   // many values we've seen with the same field number and type.
1411   // current_repeated points at the first field in this range, and
1412   // current_repeated_start{1,2} are the indexes of the first field in the
1413   // range within fields1 and fields2.
1414   const UnknownField* current_repeated = NULL;
1415   int current_repeated_start1 = 0;
1416   int current_repeated_start2 = 0;
1417 
1418   // Now that we have two sorted lists, we can detect fields which appear only
1419   // in one list or the other by traversing them simultaneously.
1420   int index1 = 0;
1421   int index2 = 0;
1422   while (index1 < fields1.size() || index2 < fields2.size()) {
1423     enum {
1424       ADDITION,
1425       DELETION,
1426       MODIFICATION,
1427       COMPARE_GROUPS,
1428       NO_CHANGE
1429     } change_type;
1430 
1431     // focus_field is the field we're currently reporting on.  (In the case
1432     // of a modification, it's the field on the left side.)
1433     const UnknownField* focus_field;
1434     bool match = false;
1435 
1436     if (index2 == fields2.size() ||
1437         (index1 < fields1.size() &&
1438          is_before(fields1[index1], fields2[index2]))) {
1439       // fields1[index1] is not present in fields2.
1440       change_type = DELETION;
1441       focus_field = fields1[index1].second;
1442     } else if (index1 == fields1.size() ||
1443                is_before(fields2[index2], fields1[index1])) {
1444       // fields2[index2] is not present in fields1.
1445       if (scope_ == PARTIAL) {
1446         // Ignore.
1447         ++index2;
1448         continue;
1449       }
1450       change_type = ADDITION;
1451       focus_field = fields2[index2].second;
1452     } else {
1453       // Field type and number are the same.  See if the values differ.
1454       change_type = MODIFICATION;
1455       focus_field = fields1[index1].second;
1456 
1457       switch (focus_field->type()) {
1458         case UnknownField::TYPE_VARINT:
1459           match = fields1[index1].second->varint() ==
1460                   fields2[index2].second->varint();
1461           break;
1462         case UnknownField::TYPE_FIXED32:
1463           match = fields1[index1].second->fixed32() ==
1464                   fields2[index2].second->fixed32();
1465           break;
1466         case UnknownField::TYPE_FIXED64:
1467           match = fields1[index1].second->fixed64() ==
1468                   fields2[index2].second->fixed64();
1469           break;
1470         case UnknownField::TYPE_LENGTH_DELIMITED:
1471           match = fields1[index1].second->length_delimited() ==
1472                   fields2[index2].second->length_delimited();
1473           break;
1474         case UnknownField::TYPE_GROUP:
1475           // We must deal with this later, after building the SpecificField.
1476           change_type = COMPARE_GROUPS;
1477           break;
1478       }
1479       if (match && change_type != COMPARE_GROUPS) {
1480         change_type = NO_CHANGE;
1481       }
1482     }
1483 
1484     if (current_repeated == NULL ||
1485         focus_field->number() != current_repeated->number() ||
1486         focus_field->type() != current_repeated->type()) {
1487       // We've started a new repeated field.
1488       current_repeated = focus_field;
1489       current_repeated_start1 = index1;
1490       current_repeated_start2 = index2;
1491     }
1492 
1493     if (change_type == NO_CHANGE && reporter_ == NULL) {
1494       // Fields were already compared and matched and we have no reporter.
1495       ++index1;
1496       ++index2;
1497       continue;
1498     }
1499 
1500     // Build the SpecificField.  This is slightly complicated.
1501     SpecificField specific_field;
1502     specific_field.unknown_field_number = focus_field->number();
1503     specific_field.unknown_field_type = focus_field->type();
1504 
1505     specific_field.unknown_field_set1 = &unknown_field_set1;
1506     specific_field.unknown_field_set2 = &unknown_field_set2;
1507 
1508     if (change_type != ADDITION) {
1509       specific_field.unknown_field_index1 = fields1[index1].first;
1510     }
1511     if (change_type != DELETION) {
1512       specific_field.unknown_field_index2 = fields2[index2].first;
1513     }
1514 
1515     // Calculate the field index.
1516     if (change_type == ADDITION) {
1517       specific_field.index = index2 - current_repeated_start2;
1518       specific_field.new_index = index2 - current_repeated_start2;
1519     } else {
1520       specific_field.index = index1 - current_repeated_start1;
1521       specific_field.new_index = index2 - current_repeated_start2;
1522     }
1523 
1524     if (IsUnknownFieldIgnored(message1, message2, specific_field,
1525                               *parent_field)) {
1526       if (reporter_ != NULL) {
1527         parent_field->push_back(specific_field);
1528         reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1529         parent_field->pop_back();
1530       }
1531       return true;
1532     }
1533 
1534     if (change_type == ADDITION || change_type == DELETION ||
1535         change_type == MODIFICATION) {
1536       if (reporter_ == NULL) {
1537         // We found a difference and we have no reporter.
1538         return false;
1539       }
1540       is_different = true;
1541     }
1542 
1543     parent_field->push_back(specific_field);
1544 
1545     switch (change_type) {
1546       case ADDITION:
1547         reporter_->ReportAdded(message1, message2, *parent_field);
1548         ++index2;
1549         break;
1550       case DELETION:
1551         reporter_->ReportDeleted(message1, message2, *parent_field);
1552         ++index1;
1553         break;
1554       case MODIFICATION:
1555         reporter_->ReportModified(message1, message2, *parent_field);
1556         ++index1;
1557         ++index2;
1558         break;
1559       case COMPARE_GROUPS:
1560         if (!CompareUnknownFields(
1561                 message1, message2, fields1[index1].second->group(),
1562                 fields2[index2].second->group(), parent_field)) {
1563           if (reporter_ == NULL) return false;
1564           is_different = true;
1565           reporter_->ReportModified(message1, message2, *parent_field);
1566         }
1567         ++index1;
1568         ++index2;
1569         break;
1570       case NO_CHANGE:
1571         ++index1;
1572         ++index2;
1573         if (report_matches_) {
1574           reporter_->ReportMatched(message1, message2, *parent_field);
1575         }
1576     }
1577 
1578     parent_field->pop_back();
1579   }
1580 
1581   return !is_different;
1582 }
1583 
1584 namespace {
1585 
1586 // Find maximum bipartite matching using the argumenting path algorithm.
1587 class MaximumMatcher {
1588  public:
1589   typedef std::function<bool(int, int)> NodeMatchCallback;
1590   // MaximumMatcher takes ownership of the passed in callback and uses it to
1591   // determine whether a node on the left side of the bipartial graph matches
1592   // a node on the right side. count1 is the number of nodes on the left side
1593   // of the graph and count2 to is the number of nodes on the right side.
1594   // Every node is referred to using 0-based indices.
1595   // If a maximum match is found, the result will be stored in match_list1 and
1596   // match_list2. match_list1[i] == j means the i-th node on the left side is
1597   // matched to the j-th node on the right side and match_list2[x] == y means
1598   // the x-th node on the right side is matched to y-th node on the left side.
1599   // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1600   MaximumMatcher(int count1, int count2, NodeMatchCallback callback,
1601                  std::vector<int>* match_list1, std::vector<int>* match_list2);
1602   // Find a maximum match and return the number of matched node pairs.
1603   // If early_return is true, this method will return 0 immediately when it
1604   // finds that not all nodes on the left side can be matched.
1605   int FindMaximumMatch(bool early_return);
1606 
1607  private:
1608   // Determines whether the node on the left side of the bipartial graph
1609   // matches the one on the right side.
1610   bool Match(int left, int right);
1611   // Find an argumenting path starting from the node v on the left side. If a
1612   // path can be found, update match_list2_ to reflect the path and return
1613   // true.
1614   bool FindArgumentPathDFS(int v, std::vector<bool>* visited);
1615 
1616   int count1_;
1617   int count2_;
1618   NodeMatchCallback match_callback_;
1619   std::map<std::pair<int, int>, bool> cached_match_results_;
1620   std::vector<int>* match_list1_;
1621   std::vector<int>* match_list2_;
1622   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1623 };
1624 
MaximumMatcher(int count1,int count2,NodeMatchCallback callback,std::vector<int> * match_list1,std::vector<int> * match_list2)1625 MaximumMatcher::MaximumMatcher(int count1, int count2,
1626                                NodeMatchCallback callback,
1627                                std::vector<int>* match_list1,
1628                                std::vector<int>* match_list2)
1629     : count1_(count1),
1630       count2_(count2),
1631       match_callback_(std::move(callback)),
1632       match_list1_(match_list1),
1633       match_list2_(match_list2) {
1634   match_list1_->assign(count1, -1);
1635   match_list2_->assign(count2, -1);
1636 }
1637 
FindMaximumMatch(bool early_return)1638 int MaximumMatcher::FindMaximumMatch(bool early_return) {
1639   int result = 0;
1640   for (int i = 0; i < count1_; ++i) {
1641     std::vector<bool> visited(count1_);
1642     if (FindArgumentPathDFS(i, &visited)) {
1643       ++result;
1644     } else if (early_return) {
1645       return 0;
1646     }
1647   }
1648   // Backfill match_list1_ as we only filled match_list2_ when finding
1649   // argumenting paths.
1650   for (int i = 0; i < count2_; ++i) {
1651     if ((*match_list2_)[i] != -1) {
1652       (*match_list1_)[(*match_list2_)[i]] = i;
1653     }
1654   }
1655   return result;
1656 }
1657 
Match(int left,int right)1658 bool MaximumMatcher::Match(int left, int right) {
1659   std::pair<int, int> p(left, right);
1660   std::map<std::pair<int, int>, bool>::iterator it =
1661       cached_match_results_.find(p);
1662   if (it != cached_match_results_.end()) {
1663     return it->second;
1664   }
1665   cached_match_results_[p] = match_callback_(left, right);
1666   return cached_match_results_[p];
1667 }
1668 
FindArgumentPathDFS(int v,std::vector<bool> * visited)1669 bool MaximumMatcher::FindArgumentPathDFS(int v, std::vector<bool>* visited) {
1670   (*visited)[v] = true;
1671   // We try to match those un-matched nodes on the right side first. This is
1672   // the step that the naive greedy matching algorithm uses. In the best cases
1673   // where the greedy algorithm can find a maximum matching, we will always
1674   // find a match in this step and the performance will be identical to the
1675   // greedy algorithm.
1676   for (int i = 0; i < count2_; ++i) {
1677     int matched = (*match_list2_)[i];
1678     if (matched == -1 && Match(v, i)) {
1679       (*match_list2_)[i] = v;
1680       return true;
1681     }
1682   }
1683   // Then we try those already matched nodes and see if we can find an
1684   // alternative match for the node matched to them.
1685   // The greedy algorithm will stop before this and fail to produce the
1686   // correct result.
1687   for (int i = 0; i < count2_; ++i) {
1688     int matched = (*match_list2_)[i];
1689     if (matched != -1 && Match(v, i)) {
1690       if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1691         (*match_list2_)[i] = v;
1692         return true;
1693       }
1694     }
1695   }
1696   return false;
1697 }
1698 
1699 }  // namespace
1700 
MatchRepeatedFieldIndices(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const std::vector<SpecificField> & parent_fields,std::vector<int> * match_list1,std::vector<int> * match_list2)1701 bool MessageDifferencer::MatchRepeatedFieldIndices(
1702     const Message& message1, const Message& message2,
1703     const FieldDescriptor* repeated_field,
1704     const MapKeyComparator* key_comparator,
1705     const std::vector<SpecificField>& parent_fields,
1706     std::vector<int>* match_list1, std::vector<int>* match_list2) {
1707   const int count1 =
1708       message1.GetReflection()->FieldSize(message1, repeated_field);
1709   const int count2 =
1710       message2.GetReflection()->FieldSize(message2, repeated_field);
1711   const bool is_treated_as_smart_set = IsTreatedAsSmartSet(repeated_field);
1712 
1713   match_list1->assign(count1, -1);
1714   match_list2->assign(count2, -1);
1715   // Ensure that we don't report differences during the matching process. Since
1716   // field comparators could potentially use this message differencer object to
1717   // perform further comparisons, turn off reporting here and re-enable it
1718   // before returning.
1719   Reporter* reporter = reporter_;
1720   reporter_ = NULL;
1721   NumDiffsReporter num_diffs_reporter;
1722   std::vector<int32> num_diffs_list1;
1723   if (is_treated_as_smart_set) {
1724     num_diffs_list1.assign(count1, kint32max);
1725   }
1726 
1727   bool success = true;
1728   // Find potential match if this is a special repeated field.
1729   if (scope_ == PARTIAL) {
1730     // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1731     // doesn't necessarily imply Compare(b, c). Therefore a naive greedy
1732     // algorithm will fail to find a maximum matching.
1733     // Here we use the augmenting path algorithm.
1734     auto callback = [&](int i1, int i2) {
1735       return IsMatch(repeated_field, key_comparator, &message1, &message2,
1736                      parent_fields, nullptr, i1, i2);
1737     };
1738     MaximumMatcher matcher(count1, count2, std::move(callback), match_list1,
1739                            match_list2);
1740     // If diff info is not needed, we should end the matching process as
1741     // soon as possible if not all items can be matched.
1742     bool early_return = (reporter == nullptr);
1743     int match_count = matcher.FindMaximumMatch(early_return);
1744     if (match_count != count1 && early_return) return false;
1745     success = success && (match_count == count1);
1746   } else {
1747     int start_offset = 0;
1748     // If the two repeated fields are treated as sets, optimize for the case
1749     // where both start with same items stored in the same order.
1750     if (IsTreatedAsSet(repeated_field) || is_treated_as_smart_set ||
1751         IsTreatedAsSmartList(repeated_field)) {
1752       start_offset = std::min(count1, count2);
1753       for (int i = 0; i < count1 && i < count2; i++) {
1754         if (IsMatch(repeated_field, key_comparator, &message1, &message2,
1755                     parent_fields, nullptr, i, i)) {
1756           match_list1->at(i) = i;
1757           match_list2->at(i) = i;
1758         } else {
1759           start_offset = i;
1760           break;
1761         }
1762       }
1763     }
1764     for (int i = start_offset; i < count1; ++i) {
1765       // Indicates any matched elements for this repeated field.
1766       bool match = false;
1767       int matched_j = -1;
1768 
1769       for (int j = start_offset; j < count2; j++) {
1770         if (match_list2->at(j) != -1) {
1771           if (!is_treated_as_smart_set || num_diffs_list1[i] == 0 ||
1772               num_diffs_list1[match_list2->at(j)] == 0) {
1773             continue;
1774           }
1775         }
1776 
1777         if (is_treated_as_smart_set) {
1778           num_diffs_reporter.Reset();
1779           match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1780                           parent_fields, &num_diffs_reporter, i, j);
1781         } else {
1782           match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1783                           parent_fields, nullptr, i, j);
1784         }
1785 
1786         if (is_treated_as_smart_set) {
1787           if (match) {
1788             num_diffs_list1[i] = 0;
1789           } else if (repeated_field->cpp_type() ==
1790                      FieldDescriptor::CPPTYPE_MESSAGE) {
1791             // Replace with the one with fewer diffs.
1792             const int32 num_diffs = num_diffs_reporter.GetNumDiffs();
1793             if (num_diffs < num_diffs_list1[i]) {
1794               // If j has been already matched to some element, ensure the
1795               // current num_diffs is smaller.
1796               if (match_list2->at(j) == -1 ||
1797                   num_diffs < num_diffs_list1[match_list2->at(j)]) {
1798                 num_diffs_list1[i] = num_diffs;
1799                 match = true;
1800               }
1801             }
1802           }
1803         }
1804 
1805         if (match) {
1806           matched_j = j;
1807           if (!is_treated_as_smart_set || num_diffs_list1[i] == 0) {
1808             break;
1809           }
1810         }
1811       }
1812 
1813       match = (matched_j != -1);
1814       if (match) {
1815         if (is_treated_as_smart_set && match_list2->at(matched_j) != -1) {
1816           // This is to revert the previously matched index in list2.
1817           match_list1->at(match_list2->at(matched_j)) = -1;
1818           match = false;
1819         }
1820         match_list1->at(i) = matched_j;
1821         match_list2->at(matched_j) = i;
1822       }
1823       if (!match && reporter == nullptr) return false;
1824       success = success && match;
1825     }
1826   }
1827 
1828   if (IsTreatedAsSmartList(repeated_field)) {
1829     match_indices_for_smart_list_callback_(match_list1, match_list2);
1830   }
1831 
1832   reporter_ = reporter;
1833 
1834   return success;
1835 }
1836 
GetFieldComparisonResult(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,const FieldContext * field_context)1837 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1838     const Message& message1, const Message& message2,
1839     const FieldDescriptor* field, int index1, int index2,
1840     const FieldContext* field_context) {
1841   FieldComparator* comparator = field_comparator_ != NULL
1842                                     ? field_comparator_
1843                                     : &default_field_comparator_;
1844   return comparator->Compare(message1, message2, field, index1, index2,
1845                              field_context);
1846 }
1847 
1848 // ===========================================================================
1849 
Reporter()1850 MessageDifferencer::Reporter::Reporter() {}
~Reporter()1851 MessageDifferencer::Reporter::~Reporter() {}
1852 
1853 // ===========================================================================
1854 
MapKeyComparator()1855 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
~MapKeyComparator()1856 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1857 
1858 // ===========================================================================
1859 
IgnoreCriteria()1860 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
~IgnoreCriteria()1861 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1862 
1863 // ===========================================================================
1864 
1865 // Note that the printer's delimiter is not used, because if we are given a
1866 // printer, we don't know its delimiter.
StreamReporter(io::ZeroCopyOutputStream * output)1867 MessageDifferencer::StreamReporter::StreamReporter(
1868     io::ZeroCopyOutputStream* output)
1869     : printer_(new io::Printer(output, '$')),
1870       delete_printer_(true),
1871       report_modified_aggregates_(false) {}
1872 
StreamReporter(io::Printer * printer)1873 MessageDifferencer::StreamReporter::StreamReporter(io::Printer* printer)
1874     : printer_(printer),
1875       delete_printer_(false),
1876       report_modified_aggregates_(false) {}
1877 
~StreamReporter()1878 MessageDifferencer::StreamReporter::~StreamReporter() {
1879   if (delete_printer_) delete printer_;
1880 }
1881 
PrintPath(const std::vector<SpecificField> & field_path,bool left_side)1882 void MessageDifferencer::StreamReporter::PrintPath(
1883     const std::vector<SpecificField>& field_path, bool left_side) {
1884   for (int i = 0; i < field_path.size(); ++i) {
1885     if (i > 0) {
1886       printer_->Print(".");
1887     }
1888 
1889     SpecificField specific_field = field_path[i];
1890 
1891     if (specific_field.field != NULL) {
1892       if (specific_field.field->is_extension()) {
1893         printer_->Print("($name$)", "name", specific_field.field->full_name());
1894       } else {
1895         printer_->PrintRaw(specific_field.field->name());
1896       }
1897       if (specific_field.field->is_map()) {
1898         // Don't print index in a map field; they are semantically unordered.
1899         continue;
1900       }
1901     } else {
1902       printer_->PrintRaw(StrCat(specific_field.unknown_field_number));
1903     }
1904     if (left_side && specific_field.index >= 0) {
1905       printer_->Print("[$name$]", "name", StrCat(specific_field.index));
1906     }
1907     if (!left_side && specific_field.new_index >= 0) {
1908       printer_->Print("[$name$]", "name",
1909                       StrCat(specific_field.new_index));
1910     }
1911   }
1912 }
1913 
PrintPath(const std::vector<SpecificField> & field_path,bool left_side,const Message & message)1914 void MessageDifferencer::StreamReporter::PrintPath(
1915     const std::vector<SpecificField>& field_path, bool left_side,
1916     const Message& message) {
1917   PrintPath(field_path, left_side);
1918 }
1919 
PrintValue(const Message & message,const std::vector<SpecificField> & field_path,bool left_side)1920 void MessageDifferencer::StreamReporter::PrintValue(
1921     const Message& message, const std::vector<SpecificField>& field_path,
1922     bool left_side) {
1923   const SpecificField& specific_field = field_path.back();
1924   const FieldDescriptor* field = specific_field.field;
1925   if (field != NULL) {
1926     std::string output;
1927     int index = left_side ? specific_field.index : specific_field.new_index;
1928     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1929       const Reflection* reflection = message.GetReflection();
1930       const Message& field_message =
1931           field->is_repeated()
1932               ? reflection->GetRepeatedMessage(message, field, index)
1933               : reflection->GetMessage(message, field);
1934       output = field_message.ShortDebugString();
1935       if (output.empty()) {
1936         printer_->Print("{ }");
1937       } else {
1938         printer_->Print("{ $name$ }", "name", output);
1939       }
1940     } else {
1941       TextFormat::PrintFieldValueToString(message, field, index, &output);
1942       printer_->PrintRaw(output);
1943     }
1944   } else {
1945     const UnknownFieldSet* unknown_fields =
1946         (left_side ? specific_field.unknown_field_set1
1947                    : specific_field.unknown_field_set2);
1948     const UnknownField* unknown_field =
1949         &unknown_fields->field(left_side ? specific_field.unknown_field_index1
1950                                          : specific_field.unknown_field_index2);
1951     PrintUnknownFieldValue(unknown_field);
1952   }
1953 }
1954 
PrintUnknownFieldValue(const UnknownField * unknown_field)1955 void MessageDifferencer::StreamReporter::PrintUnknownFieldValue(
1956     const UnknownField* unknown_field) {
1957   GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1958 
1959   std::string output;
1960   switch (unknown_field->type()) {
1961     case UnknownField::TYPE_VARINT:
1962       output = StrCat(unknown_field->varint());
1963       break;
1964     case UnknownField::TYPE_FIXED32:
1965       output = StrCat(
1966           "0x", strings::Hex(unknown_field->fixed32(), strings::ZERO_PAD_8));
1967       break;
1968     case UnknownField::TYPE_FIXED64:
1969       output = StrCat(
1970           "0x", strings::Hex(unknown_field->fixed64(), strings::ZERO_PAD_16));
1971       break;
1972     case UnknownField::TYPE_LENGTH_DELIMITED:
1973       output = StringPrintf(
1974           "\"%s\"", CEscape(unknown_field->length_delimited()).c_str());
1975       break;
1976     case UnknownField::TYPE_GROUP:
1977       // TODO(kenton):  Print the contents of the group like we do for
1978       //   messages.  Requires an equivalent of ShortDebugString() for
1979       //   UnknownFieldSet.
1980       output = "{ ... }";
1981       break;
1982   }
1983   printer_->PrintRaw(output);
1984 }
1985 
Print(const std::string & str)1986 void MessageDifferencer::StreamReporter::Print(const std::string& str) {
1987   printer_->Print(str.c_str());
1988 }
1989 
ReportAdded(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1990 void MessageDifferencer::StreamReporter::ReportAdded(
1991     const Message& message1, const Message& message2,
1992     const std::vector<SpecificField>& field_path) {
1993   printer_->Print("added: ");
1994   PrintPath(field_path, false, message2);
1995   printer_->Print(": ");
1996   PrintValue(message2, field_path, false);
1997   printer_->Print("\n");  // Print for newlines.
1998 }
1999 
ReportDeleted(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2000 void MessageDifferencer::StreamReporter::ReportDeleted(
2001     const Message& message1, const Message& message2,
2002     const std::vector<SpecificField>& field_path) {
2003   printer_->Print("deleted: ");
2004   PrintPath(field_path, true, message1);
2005   printer_->Print(": ");
2006   PrintValue(message1, field_path, true);
2007   printer_->Print("\n");  // Print for newlines
2008 }
2009 
ReportModified(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2010 void MessageDifferencer::StreamReporter::ReportModified(
2011     const Message& message1, const Message& message2,
2012     const std::vector<SpecificField>& field_path) {
2013   if (!report_modified_aggregates_ && field_path.back().field == NULL) {
2014     if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
2015       // Any changes to the subfields have already been printed.
2016       return;
2017     }
2018   } else if (!report_modified_aggregates_) {
2019     if (field_path.back().field->cpp_type() ==
2020         FieldDescriptor::CPPTYPE_MESSAGE) {
2021       // Any changes to the subfields have already been printed.
2022       return;
2023     }
2024   }
2025 
2026   printer_->Print("modified: ");
2027   PrintPath(field_path, true, message1);
2028   if (CheckPathChanged(field_path)) {
2029     printer_->Print(" -> ");
2030     PrintPath(field_path, false, message2);
2031   }
2032   printer_->Print(": ");
2033   PrintValue(message1, field_path, true);
2034   printer_->Print(" -> ");
2035   PrintValue(message2, field_path, false);
2036   printer_->Print("\n");  // Print for newlines.
2037 }
2038 
ReportMoved(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2039 void MessageDifferencer::StreamReporter::ReportMoved(
2040     const Message& message1, const Message& message2,
2041     const std::vector<SpecificField>& field_path) {
2042   printer_->Print("moved: ");
2043   PrintPath(field_path, true, message1);
2044   printer_->Print(" -> ");
2045   PrintPath(field_path, false, message2);
2046   printer_->Print(" : ");
2047   PrintValue(message1, field_path, true);
2048   printer_->Print("\n");  // Print for newlines.
2049 }
2050 
ReportMatched(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2051 void MessageDifferencer::StreamReporter::ReportMatched(
2052     const Message& message1, const Message& message2,
2053     const std::vector<SpecificField>& field_path) {
2054   printer_->Print("matched: ");
2055   PrintPath(field_path, true, message1);
2056   if (CheckPathChanged(field_path)) {
2057     printer_->Print(" -> ");
2058     PrintPath(field_path, false, message2);
2059   }
2060   printer_->Print(" : ");
2061   PrintValue(message1, field_path, true);
2062   printer_->Print("\n");  // Print for newlines.
2063 }
2064 
ReportIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2065 void MessageDifferencer::StreamReporter::ReportIgnored(
2066     const Message& message1, const Message& message2,
2067     const std::vector<SpecificField>& field_path) {
2068   printer_->Print("ignored: ");
2069   PrintPath(field_path, true, message1);
2070   if (CheckPathChanged(field_path)) {
2071     printer_->Print(" -> ");
2072     PrintPath(field_path, false, message2);
2073   }
2074   printer_->Print("\n");  // Print for newlines.
2075 }
2076 
ReportUnknownFieldIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2077 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
2078     const Message& message1, const Message& message2,
2079     const std::vector<SpecificField>& field_path) {
2080   printer_->Print("ignored: ");
2081   PrintPath(field_path, true, message1);
2082   if (CheckPathChanged(field_path)) {
2083     printer_->Print(" -> ");
2084     PrintPath(field_path, false, message2);
2085   }
2086   printer_->Print("\n");  // Print for newlines.
2087 }
2088 
2089 MessageDifferencer::MapKeyComparator*
CreateMultipleFieldsMapKeyComparator(const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)2090 MessageDifferencer::CreateMultipleFieldsMapKeyComparator(
2091     const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
2092   return new MultipleFieldsMapKeyComparator(this, key_field_paths);
2093 }
2094 
2095 }  // namespace util
2096 }  // namespace protobuf
2097 }  // namespace google
2098