• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Author: jschorr@google.com (Joseph Schorr)
32 //  Based on original Protocol Buffers design by
33 //  Sanjay Ghemawat, Jeff Dean, and others.
34 
35 #include <google/protobuf/util/message_differencer.h>
36 
37 #include <algorithm>
38 #include <functional>
39 #include <memory>
40 #include <utility>
41 
42 #include <google/protobuf/stubs/logging.h>
43 #include <google/protobuf/stubs/common.h>
44 #include <google/protobuf/stubs/stringprintf.h>
45 #include <google/protobuf/io/printer.h>
46 #include <google/protobuf/io/zero_copy_stream.h>
47 #include <google/protobuf/io/zero_copy_stream_impl.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/dynamic_message.h>
50 #include <google/protobuf/text_format.h>
51 #include <google/protobuf/util/field_comparator.h>
52 #include <google/protobuf/stubs/strutil.h>
53 
54 // Always include as last one, otherwise it can break compilation
55 #include <google/protobuf/port_def.inc>
56 
57 namespace google {
58 namespace protobuf {
59 
60 namespace util {
61 
62 // A reporter to report the total number of diffs.
63 // TODO(ykzhu): we can improve this to take into account the value differencers.
64 class NumDiffsReporter : public google::protobuf::util::MessageDifferencer::Reporter {
65  public:
NumDiffsReporter()66   NumDiffsReporter() : num_diffs_(0) {}
67 
68   // Returns the total number of diffs.
GetNumDiffs() const69   int32 GetNumDiffs() const { return num_diffs_; }
Reset()70   void Reset() { num_diffs_ = 0; }
71 
72   // Report that a field has been added into Message2.
ReportAdded(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)73   void ReportAdded(
74       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
75       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
76           field_path) override {
77     ++num_diffs_;
78   }
79 
80   // Report that a field has been deleted from Message1.
ReportDeleted(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)81   void ReportDeleted(
82       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
83       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
84           field_path) override {
85     ++num_diffs_;
86   }
87 
88   // Report that the value of a field has been modified.
ReportModified(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)89   void ReportModified(
90       const google::protobuf::Message& message1, const google::protobuf::Message& message2,
91       const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
92           field_path) override {
93     ++num_diffs_;
94   }
95 
96  private:
97   int32 num_diffs_;
98 };
99 
100 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
101 // be used to specify multiple fields as key for key comparison.
102 // Two elements of a repeated field will be regarded as having the same key
103 // iff they have the same value for every specified key field.
104 // Note that you can also specify only one field as key.
105 class MessageDifferencer::MultipleFieldsMapKeyComparator
106     : public MessageDifferencer::MapKeyComparator {
107  public:
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)108   MultipleFieldsMapKeyComparator(
109       MessageDifferencer* message_differencer,
110       const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths)
111       : message_differencer_(message_differencer),
112         key_field_paths_(key_field_paths) {
113     GOOGLE_CHECK(!key_field_paths_.empty());
114     for (int i = 0; i < key_field_paths_.size(); ++i) {
115       GOOGLE_CHECK(!key_field_paths_[i].empty());
116     }
117   }
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const FieldDescriptor * key)118   MultipleFieldsMapKeyComparator(MessageDifferencer* message_differencer,
119                                  const FieldDescriptor* key)
120       : message_differencer_(message_differencer) {
121     std::vector<const FieldDescriptor*> key_field_path;
122     key_field_path.push_back(key);
123     key_field_paths_.push_back(key_field_path);
124   }
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const125   bool IsMatch(const Message& message1, const Message& message2,
126                const std::vector<SpecificField>& parent_fields) const override {
127     for (int i = 0; i < key_field_paths_.size(); ++i) {
128       if (!IsMatchInternal(message1, message2, parent_fields,
129                            key_field_paths_[i], 0)) {
130         return false;
131       }
132     }
133     return true;
134   }
135 
136  private:
IsMatchInternal(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields,const std::vector<const FieldDescriptor * > & key_field_path,int path_index) const137   bool IsMatchInternal(
138       const Message& message1, const Message& message2,
139       const std::vector<SpecificField>& parent_fields,
140       const std::vector<const FieldDescriptor*>& key_field_path,
141       int path_index) const {
142     const FieldDescriptor* field = key_field_path[path_index];
143     std::vector<SpecificField> current_parent_fields(parent_fields);
144     if (path_index == key_field_path.size() - 1) {
145       if (field->is_repeated()) {
146         if (!message_differencer_->CompareRepeatedField(
147                 message1, message2, field, &current_parent_fields)) {
148           return false;
149         }
150       } else {
151         if (!message_differencer_->CompareFieldValueUsingParentFields(
152                 message1, message2, field, -1, -1, &current_parent_fields)) {
153           return false;
154         }
155       }
156       return true;
157     } else {
158       const Reflection* reflection1 = message1.GetReflection();
159       const Reflection* reflection2 = message2.GetReflection();
160       bool has_field1 = reflection1->HasField(message1, field);
161       bool has_field2 = reflection2->HasField(message2, field);
162       if (!has_field1 && !has_field2) {
163         return true;
164       }
165       if (has_field1 != has_field2) {
166         return false;
167       }
168       SpecificField specific_field;
169       specific_field.field = field;
170       current_parent_fields.push_back(specific_field);
171       return IsMatchInternal(reflection1->GetMessage(message1, field),
172                              reflection2->GetMessage(message2, field),
173                              current_parent_fields, key_field_path,
174                              path_index + 1);
175     }
176   }
177   MessageDifferencer* message_differencer_;
178   std::vector<std::vector<const FieldDescriptor*> > key_field_paths_;
179   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
180 };
181 
182 // Preserve the order when treating repeated field as SMART_LIST. The current
183 // implementation is to find the longest matching sequence from the first
184 // element. The optimal solution requires to use //util/diff/lcs.h, which is
185 // not open sourced yet. Overwrite this method if you want to have that.
186 // TODO(ykzhu): change to use LCS once it is open sourced.
MatchIndicesPostProcessorForSmartList(std::vector<int> * match_list1,std::vector<int> * match_list2)187 void MatchIndicesPostProcessorForSmartList(
188     std::vector<int>* match_list1, std::vector<int>* match_list2) {
189   int last_matched_index = -1;
190   for (int i = 0; i < match_list1->size(); ++i) {
191     if (match_list1->at(i) < 0) {
192       continue;
193     }
194     if (last_matched_index < 0 || match_list1->at(i) > last_matched_index) {
195       last_matched_index = match_list1->at(i);
196     } else {
197       match_list2->at(match_list1->at(i)) = -1;
198       match_list1->at(i) = -1;
199     }
200   }
201 }
202 
MapEntryKeyComparator(MessageDifferencer * message_differencer)203 MessageDifferencer::MapEntryKeyComparator::MapEntryKeyComparator(
204     MessageDifferencer* message_differencer)
205     : message_differencer_(message_differencer) {}
206 
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const207 bool MessageDifferencer::MapEntryKeyComparator::IsMatch(
208     const Message& message1, const Message& message2,
209     const std::vector<SpecificField>& parent_fields) const {
210   // Map entry has its key in the field with tag 1.  See the comment for
211   // map_entry in MessageOptions.
212   const FieldDescriptor* key = message1.GetDescriptor()->FindFieldByNumber(1);
213   // If key is not present in message1 and we're doing partial comparison or if
214   // map key is explicitly ignored treat the field as set instead,
215   const bool treat_as_set =
216       (message_differencer_->scope() == PARTIAL &&
217        !message1.GetReflection()->HasField(message1, key)) ||
218       message_differencer_->IsIgnored(message1, message2, key, parent_fields);
219 
220   std::vector<SpecificField> current_parent_fields(parent_fields);
221   if (treat_as_set) {
222     return message_differencer_->Compare(message1, message2,
223                                          &current_parent_fields);
224   }
225   return message_differencer_->CompareFieldValueUsingParentFields(
226       message1, message2, key, -1, -1, &current_parent_fields);
227 }
228 
Equals(const Message & message1,const Message & message2)229 bool MessageDifferencer::Equals(const Message& message1,
230                                 const Message& message2) {
231   MessageDifferencer differencer;
232 
233   return differencer.Compare(message1, message2);
234 }
235 
Equivalent(const Message & message1,const Message & message2)236 bool MessageDifferencer::Equivalent(const Message& message1,
237                                     const Message& message2) {
238   MessageDifferencer differencer;
239   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
240 
241   return differencer.Compare(message1, message2);
242 }
243 
ApproximatelyEquals(const Message & message1,const Message & message2)244 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
245                                              const Message& message2) {
246   MessageDifferencer differencer;
247   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
248 
249   return differencer.Compare(message1, message2);
250 }
251 
ApproximatelyEquivalent(const Message & message1,const Message & message2)252 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
253                                                  const Message& message2) {
254   MessageDifferencer differencer;
255   differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
256   differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
257 
258   return differencer.Compare(message1, message2);
259 }
260 
261 // ===========================================================================
262 
MessageDifferencer()263 MessageDifferencer::MessageDifferencer()
264     : reporter_(NULL),
265       field_comparator_(NULL),
266       message_field_comparison_(EQUAL),
267       scope_(FULL),
268       repeated_field_comparison_(AS_LIST),
269       map_entry_key_comparator_(this),
270       report_matches_(false),
271       report_moves_(true),
272       report_ignores_(true),
273       output_string_(nullptr),
274       match_indices_for_smart_list_callback_(
275           MatchIndicesPostProcessorForSmartList) {}
276 
~MessageDifferencer()277 MessageDifferencer::~MessageDifferencer() {
278   for (int i = 0; i < owned_key_comparators_.size(); ++i) {
279     delete owned_key_comparators_[i];
280   }
281   for (int i = 0; i < ignore_criteria_.size(); ++i) {
282     delete ignore_criteria_[i];
283   }
284 }
285 
set_field_comparator(FieldComparator * comparator)286 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
287   GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
288   field_comparator_ = comparator;
289 }
290 
set_message_field_comparison(MessageFieldComparison comparison)291 void MessageDifferencer::set_message_field_comparison(
292     MessageFieldComparison comparison) {
293   message_field_comparison_ = comparison;
294 }
295 
set_scope(Scope scope)296 void MessageDifferencer::set_scope(Scope scope) { scope_ = scope; }
297 
scope()298 MessageDifferencer::Scope MessageDifferencer::scope() { return scope_; }
299 
set_float_comparison(FloatComparison comparison)300 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
301   default_field_comparator_.set_float_comparison(
302       comparison == EXACT ? DefaultFieldComparator::EXACT
303                           : DefaultFieldComparator::APPROXIMATE);
304 }
305 
set_repeated_field_comparison(RepeatedFieldComparison comparison)306 void MessageDifferencer::set_repeated_field_comparison(
307     RepeatedFieldComparison comparison) {
308   repeated_field_comparison_ = comparison;
309 }
310 
311 MessageDifferencer::RepeatedFieldComparison
repeated_field_comparison()312 MessageDifferencer::repeated_field_comparison() {
313   return repeated_field_comparison_;
314 }
315 
CheckRepeatedFieldComparisons(const FieldDescriptor * field,const RepeatedFieldComparison & new_comparison)316 void MessageDifferencer::CheckRepeatedFieldComparisons(
317     const FieldDescriptor* field,
318     const RepeatedFieldComparison& new_comparison) {
319   GOOGLE_CHECK(field->is_repeated())
320       << "Field must be repeated: " << field->full_name();
321   const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
322   GOOGLE_CHECK(key_comparator == NULL)
323       << "Cannot treat this repeated field as both MAP and " << new_comparison
324       << " for"
325       << " comparison.  Field name is: " << field->full_name();
326   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
327             repeated_field_comparisons_.end() ||
328         repeated_field_comparisons_[field] == new_comparison)
329       << "Cannot treat the same field as both "
330       << repeated_field_comparisons_[field] << " and " << new_comparison
331       << ". Field name is: " << field->full_name();
332 }
333 
TreatAsSet(const FieldDescriptor * field)334 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
335   CheckRepeatedFieldComparisons(field, AS_SET);
336   repeated_field_comparisons_[field] = AS_SET;
337 }
338 
TreatAsSmartSet(const FieldDescriptor * field)339 void MessageDifferencer::TreatAsSmartSet(const FieldDescriptor* field) {
340   CheckRepeatedFieldComparisons(field, AS_SMART_SET);
341   repeated_field_comparisons_[field] = AS_SMART_SET;
342 }
343 
SetMatchIndicesForSmartListCallback(std::function<void (std::vector<int> *,std::vector<int> *)> callback)344 void MessageDifferencer::SetMatchIndicesForSmartListCallback(
345     std::function<void(std::vector<int>*, std::vector<int>*)> callback) {
346   match_indices_for_smart_list_callback_ = callback;
347 }
348 
TreatAsList(const FieldDescriptor * field)349 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
350   CheckRepeatedFieldComparisons(field, AS_LIST);
351   repeated_field_comparisons_[field] = AS_LIST;
352 }
353 
TreatAsSmartList(const FieldDescriptor * field)354 void MessageDifferencer::TreatAsSmartList(const FieldDescriptor* field) {
355   CheckRepeatedFieldComparisons(field, AS_SMART_LIST);
356   repeated_field_comparisons_[field] = AS_SMART_LIST;
357 }
358 
TreatAsMap(const FieldDescriptor * field,const FieldDescriptor * key)359 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
360                                     const FieldDescriptor* key) {
361   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
362       << "Field has to be message type.  Field name is: " << field->full_name();
363   GOOGLE_CHECK(key->containing_type() == field->message_type())
364       << key->full_name()
365       << " must be a direct subfield within the repeated field "
366       << field->full_name() << ", not " << key->containing_type()->full_name();
367   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
368         repeated_field_comparisons_.end())
369       << "Cannot treat the same field as both "
370       << repeated_field_comparisons_[field]
371       << " and MAP. Field name is: " << field->full_name();
372   MapKeyComparator* key_comparator =
373       new MultipleFieldsMapKeyComparator(this, key);
374   owned_key_comparators_.push_back(key_comparator);
375   map_field_key_comparator_[field] = key_comparator;
376 }
377 
TreatAsMapWithMultipleFieldsAsKey(const FieldDescriptor * field,const std::vector<const FieldDescriptor * > & key_fields)378 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
379     const FieldDescriptor* field,
380     const std::vector<const FieldDescriptor*>& key_fields) {
381   std::vector<std::vector<const FieldDescriptor*> > key_field_paths;
382   for (int i = 0; i < key_fields.size(); ++i) {
383     std::vector<const FieldDescriptor*> key_field_path;
384     key_field_path.push_back(key_fields[i]);
385     key_field_paths.push_back(key_field_path);
386   }
387   TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
388 }
389 
TreatAsMapWithMultipleFieldPathsAsKey(const FieldDescriptor * field,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)390 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
391     const FieldDescriptor* field,
392     const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
393   GOOGLE_CHECK(field->is_repeated())
394       << "Field must be repeated: " << field->full_name();
395   GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
396       << "Field has to be message type.  Field name is: " << field->full_name();
397   for (int i = 0; i < key_field_paths.size(); ++i) {
398     const std::vector<const FieldDescriptor*>& key_field_path =
399         key_field_paths[i];
400     for (int j = 0; j < key_field_path.size(); ++j) {
401       const FieldDescriptor* parent_field =
402           j == 0 ? field : key_field_path[j - 1];
403       const FieldDescriptor* child_field = key_field_path[j];
404       GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
405           << child_field->full_name()
406           << " must be a direct subfield within the field: "
407           << parent_field->full_name();
408       if (j != 0) {
409         GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
410             << parent_field->full_name() << " has to be of type message.";
411         GOOGLE_CHECK(!parent_field->is_repeated())
412             << parent_field->full_name() << " cannot be a repeated field.";
413       }
414     }
415   }
416   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
417         repeated_field_comparisons_.end())
418       << "Cannot treat the same field as both "
419       << repeated_field_comparisons_[field]
420       << " and MAP. Field name is: " << field->full_name();
421   MapKeyComparator* key_comparator =
422       new MultipleFieldsMapKeyComparator(this, key_field_paths);
423   owned_key_comparators_.push_back(key_comparator);
424   map_field_key_comparator_[field] = key_comparator;
425 }
426 
TreatAsMapUsingKeyComparator(const FieldDescriptor * field,const MapKeyComparator * key_comparator)427 void MessageDifferencer::TreatAsMapUsingKeyComparator(
428     const FieldDescriptor* field, const MapKeyComparator* key_comparator) {
429   GOOGLE_CHECK(field->is_repeated())
430       << "Field must be repeated: " << field->full_name();
431   GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
432         repeated_field_comparisons_.end())
433       << "Cannot treat the same field as both "
434       << repeated_field_comparisons_[field]
435       << " and MAP. Field name is: " << field->full_name();
436   map_field_key_comparator_[field] = key_comparator;
437 }
438 
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)439 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
440   ignore_criteria_.push_back(ignore_criteria);
441 }
442 
IgnoreField(const FieldDescriptor * field)443 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
444   ignored_fields_.insert(field);
445 }
446 
SetFractionAndMargin(const FieldDescriptor * field,double fraction,double margin)447 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
448                                               double fraction, double margin) {
449   default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
450 }
451 
ReportDifferencesToString(std::string * output)452 void MessageDifferencer::ReportDifferencesToString(std::string* output) {
453   GOOGLE_DCHECK(output) << "Specified output string was NULL";
454 
455   output_string_ = output;
456   output_string_->clear();
457 }
458 
ReportDifferencesTo(Reporter * reporter)459 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
460   // If an output string is set, clear it to prevent
461   // it superseding the specified reporter.
462   if (output_string_) {
463     output_string_ = NULL;
464   }
465 
466   reporter_ = reporter;
467 }
468 
FieldBefore(const FieldDescriptor * field1,const FieldDescriptor * field2)469 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
470                                      const FieldDescriptor* field2) {
471   // Handle sentinel values (i.e. make sure NULLs are always ordered
472   // at the end of the list).
473   if (field1 == NULL) {
474     return false;
475   }
476 
477   if (field2 == NULL) {
478     return true;
479   }
480 
481   // Always order fields by their tag number
482   return (field1->number() < field2->number());
483 }
484 
Compare(const Message & message1,const Message & message2)485 bool MessageDifferencer::Compare(const Message& message1,
486                                  const Message& message2) {
487   std::vector<SpecificField> parent_fields;
488 
489   bool result = false;
490 
491   // Setup the internal reporter if need be.
492   if (output_string_) {
493     io::StringOutputStream output_stream(output_string_);
494     StreamReporter reporter(&output_stream);
495     reporter_ = &reporter;
496     result = Compare(message1, message2, &parent_fields);
497     reporter_ = NULL;
498   } else {
499     result = Compare(message1, message2, &parent_fields);
500   }
501 
502   return result;
503 }
504 
CompareWithFields(const Message & message1,const Message & message2,const std::vector<const FieldDescriptor * > & message1_fields_arg,const std::vector<const FieldDescriptor * > & message2_fields_arg)505 bool MessageDifferencer::CompareWithFields(
506     const Message& message1, const Message& message2,
507     const std::vector<const FieldDescriptor*>& message1_fields_arg,
508     const std::vector<const FieldDescriptor*>& message2_fields_arg) {
509   if (message1.GetDescriptor() != message2.GetDescriptor()) {
510     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
511                 << "descriptors.";
512     return false;
513   }
514 
515   std::vector<SpecificField> parent_fields;
516 
517   bool result = false;
518 
519   FieldDescriptorArray message1_fields(message1_fields_arg.size() + 1);
520   FieldDescriptorArray message2_fields(message2_fields_arg.size() + 1);
521 
522   std::copy(message1_fields_arg.cbegin(), message1_fields_arg.cend(),
523             message1_fields.begin());
524   std::copy(message2_fields_arg.cbegin(), message2_fields_arg.cend(),
525             message2_fields.begin());
526 
527   // Append sentinel values.
528   message1_fields[message1_fields_arg.size()] = nullptr;
529   message2_fields[message2_fields_arg.size()] = nullptr;
530 
531   std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
532   std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
533 
534   // Setup the internal reporter if need be.
535   if (output_string_) {
536     io::StringOutputStream output_stream(output_string_);
537     StreamReporter reporter(&output_stream);
538     reporter_ = &reporter;
539     result = CompareRequestedFieldsUsingSettings(
540         message1, message2, message1_fields, message2_fields, &parent_fields);
541     reporter_ = NULL;
542   } else {
543     result = CompareRequestedFieldsUsingSettings(
544         message1, message2, message1_fields, message2_fields, &parent_fields);
545   }
546 
547   return result;
548 }
549 
Compare(const Message & message1,const Message & message2,std::vector<SpecificField> * parent_fields)550 bool MessageDifferencer::Compare(const Message& message1,
551                                  const Message& message2,
552                                  std::vector<SpecificField>* parent_fields) {
553   const Descriptor* descriptor1 = message1.GetDescriptor();
554   const Descriptor* descriptor2 = message2.GetDescriptor();
555   if (descriptor1 != descriptor2) {
556     GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
557                 << "descriptors. " << descriptor1->full_name() << " vs "
558                 << descriptor2->full_name();
559     return false;
560   }
561   // Expand google.protobuf.Any payload if possible.
562   if (descriptor1->full_name() == internal::kAnyFullTypeName) {
563     std::unique_ptr<Message> data1;
564     std::unique_ptr<Message> data2;
565     if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
566       // Avoid DFATAL for different descriptors in google.protobuf.Any payloads.
567       if (data1->GetDescriptor() != data2->GetDescriptor()) {
568         return false;
569       }
570       return Compare(*data1, *data2, parent_fields);
571     }
572   }
573   const Reflection* reflection1 = message1.GetReflection();
574   const Reflection* reflection2 = message2.GetReflection();
575 
576   bool unknown_compare_result = true;
577   // Ignore unknown fields in EQUIVALENT mode
578   if (message_field_comparison_ != EQUIVALENT) {
579     const UnknownFieldSet& unknown_field_set1 =
580         reflection1->GetUnknownFields(message1);
581     const UnknownFieldSet& unknown_field_set2 =
582         reflection2->GetUnknownFields(message2);
583     if (!CompareUnknownFields(message1, message2, unknown_field_set1,
584                               unknown_field_set2, parent_fields)) {
585       if (reporter_ == NULL) {
586         return false;
587       }
588       unknown_compare_result = false;
589     }
590   }
591 
592   FieldDescriptorArray message1_fields = RetrieveFields(message1, true);
593   FieldDescriptorArray message2_fields = RetrieveFields(message2, false);
594 
595   return CompareRequestedFieldsUsingSettings(
596       message1, message2,
597       message1_fields, message2_fields,
598       parent_fields) && unknown_compare_result;
599 }
600 
RetrieveFields(const Message & message,bool base_message)601 FieldDescriptorArray MessageDifferencer::RetrieveFields(const Message& message,
602                                                         bool base_message) {
603   const Descriptor* descriptor = message.GetDescriptor();
604 
605   tmp_message_fields_.clear();
606   tmp_message_fields_.reserve(descriptor->field_count() + 1);
607 
608   const Reflection* reflection = message.GetReflection();
609   if (descriptor->options().map_entry()) {
610     if (this->scope_ == PARTIAL && base_message) {
611       reflection->ListFields(message, &tmp_message_fields_);
612     } else {
613       // Map entry fields are always considered present.
614       for (int i = 0; i < descriptor->field_count(); i++) {
615         tmp_message_fields_.push_back(descriptor->field(i));
616       }
617     }
618   } else {
619     reflection->ListFields(message, &tmp_message_fields_);
620   }
621   // Add sentinel values to deal with the
622   // case where the number of the fields in
623   // each list are different.
624   tmp_message_fields_.push_back(nullptr);
625 
626   FieldDescriptorArray message_fields(tmp_message_fields_.begin(),
627                                       tmp_message_fields_.end());
628 
629   return message_fields;
630 }
631 
CompareRequestedFieldsUsingSettings(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)632 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
633     const Message& message1, const Message& message2,
634     const FieldDescriptorArray& message1_fields,
635     const FieldDescriptorArray& message2_fields,
636     std::vector<SpecificField>* parent_fields) {
637   if (scope_ == FULL) {
638     if (message_field_comparison_ == EQUIVALENT) {
639       // We need to merge the field lists of both messages (i.e.
640       // we are merely checking for a difference in field values,
641       // rather than the addition or deletion of fields).
642       FieldDescriptorArray fields_union =
643           CombineFields(message1_fields, FULL, message2_fields, FULL);
644       return CompareWithFieldsInternal(message1, message2, fields_union,
645                                        fields_union, parent_fields);
646     } else {
647       // Simple equality comparison, use the unaltered field lists.
648       return CompareWithFieldsInternal(message1, message2, message1_fields,
649                                        message2_fields, parent_fields);
650     }
651   } else {
652     if (message_field_comparison_ == EQUIVALENT) {
653       // We use the list of fields for message1 for both messages when
654       // comparing.  This way, extra fields in message2 are ignored,
655       // and missing fields in message2 use their default value.
656       return CompareWithFieldsInternal(message1, message2, message1_fields,
657                                        message1_fields, parent_fields);
658     } else {
659       // We need to consider the full list of fields for message1
660       // but only the intersection for message2.  This way, any fields
661       // only present in message2 will be ignored, but any fields only
662       // present in message1 will be marked as a difference.
663       FieldDescriptorArray fields_intersection =
664           CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL);
665       return CompareWithFieldsInternal(message1, message2, message1_fields,
666                                        fields_intersection, parent_fields);
667     }
668   }
669 }
670 
CombineFields(const FieldDescriptorArray & fields1,Scope fields1_scope,const FieldDescriptorArray & fields2,Scope fields2_scope)671 FieldDescriptorArray MessageDifferencer::CombineFields(
672     const FieldDescriptorArray& fields1, Scope fields1_scope,
673     const FieldDescriptorArray& fields2, Scope fields2_scope) {
674   int index1 = 0;
675   int index2 = 0;
676 
677   tmp_message_fields_.clear();
678 
679   while (index1 < fields1.size() && index2 < fields2.size()) {
680     const FieldDescriptor* field1 = fields1[index1];
681     const FieldDescriptor* field2 = fields2[index2];
682 
683     if (FieldBefore(field1, field2)) {
684       if (fields1_scope == FULL) {
685         tmp_message_fields_.push_back(fields1[index1]);
686       }
687       ++index1;
688     } else if (FieldBefore(field2, field1)) {
689       if (fields2_scope == FULL) {
690         tmp_message_fields_.push_back(fields2[index2]);
691       }
692       ++index2;
693     } else {
694       tmp_message_fields_.push_back(fields1[index1]);
695       ++index1;
696       ++index2;
697     }
698   }
699 
700   tmp_message_fields_.push_back(nullptr);
701 
702   FieldDescriptorArray combined_fields(tmp_message_fields_.begin(),
703                                        tmp_message_fields_.end());
704 
705   return combined_fields;
706 }
707 
CompareWithFieldsInternal(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)708 bool MessageDifferencer::CompareWithFieldsInternal(
709     const Message& message1, const Message& message2,
710     const FieldDescriptorArray& message1_fields,
711     const FieldDescriptorArray& message2_fields,
712     std::vector<SpecificField>* parent_fields) {
713   bool isDifferent = false;
714   int field_index1 = 0;
715   int field_index2 = 0;
716 
717   const Reflection* reflection1 = message1.GetReflection();
718   const Reflection* reflection2 = message2.GetReflection();
719 
720   while (true) {
721     const FieldDescriptor* field1 = message1_fields[field_index1];
722     const FieldDescriptor* field2 = message2_fields[field_index2];
723 
724     // Once we have reached sentinel values, we are done the comparison.
725     if (field1 == NULL && field2 == NULL) {
726       break;
727     }
728 
729     // Check for differences in the field itself.
730     if (FieldBefore(field1, field2)) {
731       // Field 1 is not in the field list for message 2.
732       if (IsIgnored(message1, message2, field1, *parent_fields)) {
733         // We are ignoring field1. Report the ignore and move on to
734         // the next field in message1_fields.
735         if (reporter_ != NULL) {
736           SpecificField specific_field;
737           specific_field.field = field1;
738 
739           parent_fields->push_back(specific_field);
740           if (report_ignores_) {
741             reporter_->ReportIgnored(message1, message2, *parent_fields);
742           }
743           parent_fields->pop_back();
744         }
745         ++field_index1;
746         continue;
747       }
748 
749       if (reporter_ != NULL) {
750         assert(field1 != NULL);
751         int count = field1->is_repeated()
752                         ? reflection1->FieldSize(message1, field1)
753                         : 1;
754 
755         for (int i = 0; i < count; ++i) {
756           SpecificField specific_field;
757           specific_field.field = field1;
758           specific_field.index = field1->is_repeated() ? i : -1;
759 
760           parent_fields->push_back(specific_field);
761           reporter_->ReportDeleted(message1, message2, *parent_fields);
762           parent_fields->pop_back();
763         }
764 
765         isDifferent = true;
766       } else {
767         return false;
768       }
769 
770       ++field_index1;
771       continue;
772     } else if (FieldBefore(field2, field1)) {
773       // Field 2 is not in the field list for message 1.
774       if (IsIgnored(message1, message2, field2, *parent_fields)) {
775         // We are ignoring field2. Report the ignore and move on to
776         // the next field in message2_fields.
777         if (reporter_ != NULL) {
778           SpecificField specific_field;
779           specific_field.field = field2;
780 
781           parent_fields->push_back(specific_field);
782           if (report_ignores_) {
783             reporter_->ReportIgnored(message1, message2, *parent_fields);
784           }
785           parent_fields->pop_back();
786         }
787         ++field_index2;
788         continue;
789       }
790 
791       if (reporter_ != NULL) {
792         int count = field2->is_repeated()
793                         ? reflection2->FieldSize(message2, field2)
794                         : 1;
795 
796         for (int i = 0; i < count; ++i) {
797           SpecificField specific_field;
798           specific_field.field = field2;
799           specific_field.index = field2->is_repeated() ? i : -1;
800           specific_field.new_index = specific_field.index;
801 
802           parent_fields->push_back(specific_field);
803           reporter_->ReportAdded(message1, message2, *parent_fields);
804           parent_fields->pop_back();
805         }
806 
807         isDifferent = true;
808       } else {
809         return false;
810       }
811 
812       ++field_index2;
813       continue;
814     }
815 
816     // By this point, field1 and field2 are guaranteed to point to the same
817     // field, so we can now compare the values.
818     if (IsIgnored(message1, message2, field1, *parent_fields)) {
819       // Ignore this field. Report and move on.
820       if (reporter_ != NULL) {
821         SpecificField specific_field;
822         specific_field.field = field1;
823 
824         parent_fields->push_back(specific_field);
825         if (report_ignores_) {
826           reporter_->ReportIgnored(message1, message2, *parent_fields);
827         }
828         parent_fields->pop_back();
829       }
830 
831       ++field_index1;
832       ++field_index2;
833       continue;
834     }
835 
836     bool fieldDifferent = false;
837     assert(field1 != NULL);
838     if (field1->is_repeated()) {
839       fieldDifferent =
840           !CompareRepeatedField(message1, message2, field1, parent_fields);
841       if (fieldDifferent) {
842         if (reporter_ == NULL) return false;
843         isDifferent = true;
844       }
845     } else {
846       fieldDifferent = !CompareFieldValueUsingParentFields(
847           message1, message2, field1, -1, -1, parent_fields);
848 
849       // If we have found differences, either report them or terminate if
850       // no reporter is present.
851       if (fieldDifferent && reporter_ == NULL) {
852         return false;
853       }
854 
855       if (reporter_ != NULL) {
856         SpecificField specific_field;
857         specific_field.field = field1;
858         parent_fields->push_back(specific_field);
859         if (fieldDifferent) {
860           reporter_->ReportModified(message1, message2, *parent_fields);
861           isDifferent = true;
862         } else if (report_matches_) {
863           reporter_->ReportMatched(message1, message2, *parent_fields);
864         }
865         parent_fields->pop_back();
866       }
867     }
868     // Increment the field indices.
869     ++field_index1;
870     ++field_index2;
871   }
872 
873   return !isDifferent;
874 }
875 
IsMatch(const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const Message * message1,const Message * message2,const std::vector<SpecificField> & parent_fields,Reporter * reporter,int index1,int index2)876 bool MessageDifferencer::IsMatch(
877     const FieldDescriptor* repeated_field,
878     const MapKeyComparator* key_comparator, const Message* message1,
879     const Message* message2, const std::vector<SpecificField>& parent_fields,
880     Reporter* reporter, int index1, int index2) {
881   std::vector<SpecificField> current_parent_fields(parent_fields);
882   if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
883     return CompareFieldValueUsingParentFields(*message1, *message2,
884                                               repeated_field, index1, index2,
885                                               &current_parent_fields);
886   }
887   // Back up the Reporter and output_string_.  They will be reset in the
888   // following code.
889   Reporter* backup_reporter = reporter_;
890   std::string* output_string = output_string_;
891   reporter_ = reporter;
892   output_string_ = NULL;
893   bool match;
894 
895   if (key_comparator == NULL) {
896     match = CompareFieldValueUsingParentFields(*message1, *message2,
897                                                repeated_field, index1, index2,
898                                                &current_parent_fields);
899   } else {
900     const Reflection* reflection1 = message1->GetReflection();
901     const Reflection* reflection2 = message2->GetReflection();
902     const Message& m1 =
903         reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
904     const Message& m2 =
905         reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
906     SpecificField specific_field;
907     specific_field.field = repeated_field;
908     specific_field.index = index1;
909     specific_field.new_index = index2;
910     current_parent_fields.push_back(specific_field);
911     match = key_comparator->IsMatch(m1, m2, current_parent_fields);
912   }
913 
914   reporter_ = backup_reporter;
915   output_string_ = output_string;
916   return match;
917 }
918 
CompareRepeatedField(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,std::vector<SpecificField> * parent_fields)919 bool MessageDifferencer::CompareRepeatedField(
920     const Message& message1, const Message& message2,
921     const FieldDescriptor* repeated_field,
922     std::vector<SpecificField>* parent_fields) {
923   // the input FieldDescriptor is guaranteed to be repeated field.
924   const Reflection* reflection1 = message1.GetReflection();
925   const Reflection* reflection2 = message2.GetReflection();
926   const int count1 = reflection1->FieldSize(message1, repeated_field);
927   const int count2 = reflection2->FieldSize(message2, repeated_field);
928   const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
929 
930   // If the field is not treated as subset and no detailed reports is needed,
931   // we do a quick check on the number of the elements to avoid unnecessary
932   // comparison.
933   if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
934     return false;
935   }
936   // A match can never be found if message1 has more items than message2.
937   if (count1 > count2 && reporter_ == NULL) {
938     return false;
939   }
940 
941   // These two list are used for store the index of the correspondent
942   // element in peer repeated field.
943   std::vector<int> match_list1;
944   std::vector<int> match_list2;
945 
946   const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
947   bool smart_list = IsTreatedAsSmartList(repeated_field);
948   bool simple_list = key_comparator == nullptr &&
949                      !IsTreatedAsSet(repeated_field) &&
950                      !IsTreatedAsSmartSet(repeated_field) && !smart_list;
951 
952   // For simple lists, we avoid matching repeated field indices, saving the
953   // memory allocations that would otherwise be needed for match_list1 and
954   // match_list2.
955   if (!simple_list) {
956     // Try to match indices of the repeated fields. Return false if match fails.
957     if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
958                                    key_comparator, *parent_fields, &match_list1,
959                                    &match_list2) &&
960         reporter_ == nullptr) {
961       return false;
962     }
963   }
964 
965   bool fieldDifferent = false;
966   SpecificField specific_field;
967   specific_field.field = repeated_field;
968 
969   // At this point, we have already matched pairs of fields (with the reporting
970   // to be done later). Now to check if the paired elements are different.
971   int next_unmatched_index = 0;
972   for (int i = 0; i < count1; i++) {
973     if (simple_list && i >= count2) {
974       break;
975     }
976     if (!simple_list && match_list1[i] == -1) {
977       if (smart_list) {
978         if (reporter_ == nullptr) return false;
979         specific_field.index = i;
980         parent_fields->push_back(specific_field);
981         reporter_->ReportDeleted(message1, message2, *parent_fields);
982         parent_fields->pop_back();
983         fieldDifferent = true;
984         // Use -2 to mark this element has been reported.
985         match_list1[i] = -2;
986       }
987       continue;
988     }
989     if (smart_list) {
990       for (int j = next_unmatched_index; j < match_list1[i]; ++j) {
991         GOOGLE_CHECK_LE(0, j);
992         if (reporter_ == nullptr) return false;
993         specific_field.index = j;
994         specific_field.new_index = j;
995         parent_fields->push_back(specific_field);
996         reporter_->ReportAdded(message1, message2, *parent_fields);
997         parent_fields->pop_back();
998         fieldDifferent = true;
999         // Use -2 to mark this element has been reported.
1000         match_list2[j] = -2;
1001       }
1002     }
1003     specific_field.index = i;
1004     if (simple_list) {
1005       specific_field.new_index = i;
1006     } else {
1007       specific_field.new_index = match_list1[i];
1008       next_unmatched_index = match_list1[i] + 1;
1009     }
1010 
1011     const bool result = CompareFieldValueUsingParentFields(
1012         message1, message2, repeated_field, i, specific_field.new_index,
1013         parent_fields);
1014 
1015     // If we have found differences, either report them or terminate if
1016     // no reporter is present. Note that ReportModified, ReportMoved, and
1017     // ReportMatched are all mutually exclusive.
1018     if (!result) {
1019       if (reporter_ == NULL) return false;
1020       parent_fields->push_back(specific_field);
1021       reporter_->ReportModified(message1, message2, *parent_fields);
1022       parent_fields->pop_back();
1023       fieldDifferent = true;
1024     } else if (reporter_ != NULL &&
1025                specific_field.index != specific_field.new_index &&
1026                !specific_field.field->is_map() && report_moves_) {
1027       parent_fields->push_back(specific_field);
1028       reporter_->ReportMoved(message1, message2, *parent_fields);
1029       parent_fields->pop_back();
1030     } else if (report_matches_ && reporter_ != NULL) {
1031       parent_fields->push_back(specific_field);
1032       reporter_->ReportMatched(message1, message2, *parent_fields);
1033       parent_fields->pop_back();
1034     }
1035   }
1036 
1037   // Report any remaining additions or deletions.
1038   for (int i = 0; i < count2; ++i) {
1039     if (!simple_list && match_list2[i] != -1) continue;
1040     if (simple_list && i < count1) continue;
1041     if (!treated_as_subset) {
1042       fieldDifferent = true;
1043     }
1044 
1045     if (reporter_ == NULL) continue;
1046     specific_field.index = i;
1047     specific_field.new_index = i;
1048     parent_fields->push_back(specific_field);
1049     reporter_->ReportAdded(message1, message2, *parent_fields);
1050     parent_fields->pop_back();
1051   }
1052 
1053   for (int i = 0; i < count1; ++i) {
1054     if (!simple_list && match_list1[i] != -1) continue;
1055     if (simple_list && i < count2) continue;
1056     assert(reporter_ != NULL);
1057     specific_field.index = i;
1058     parent_fields->push_back(specific_field);
1059     reporter_->ReportDeleted(message1, message2, *parent_fields);
1060     parent_fields->pop_back();
1061     fieldDifferent = true;
1062   }
1063   return !fieldDifferent;
1064 }
1065 
CompareFieldValue(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2)1066 bool MessageDifferencer::CompareFieldValue(const Message& message1,
1067                                            const Message& message2,
1068                                            const FieldDescriptor* field,
1069                                            int index1, int index2) {
1070   return CompareFieldValueUsingParentFields(message1, message2, field, index1,
1071                                             index2, NULL);
1072 }
1073 
CompareFieldValueUsingParentFields(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,std::vector<SpecificField> * parent_fields)1074 bool MessageDifferencer::CompareFieldValueUsingParentFields(
1075     const Message& message1, const Message& message2,
1076     const FieldDescriptor* field, int index1, int index2,
1077     std::vector<SpecificField>* parent_fields) {
1078   FieldContext field_context(parent_fields);
1079   FieldComparator::ComparisonResult result = GetFieldComparisonResult(
1080       message1, message2, field, index1, index2, &field_context);
1081 
1082   if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1083       result == FieldComparator::RECURSE) {
1084     // Get the nested messages and compare them using one of the Compare
1085     // methods.
1086     const Reflection* reflection1 = message1.GetReflection();
1087     const Reflection* reflection2 = message2.GetReflection();
1088     const Message& m1 =
1089         field->is_repeated()
1090             ? reflection1->GetRepeatedMessage(message1, field, index1)
1091             : reflection1->GetMessage(message1, field);
1092     const Message& m2 =
1093         field->is_repeated()
1094             ? reflection2->GetRepeatedMessage(message2, field, index2)
1095             : reflection2->GetMessage(message2, field);
1096 
1097     // parent_fields is used in calls to Reporter methods.
1098     if (parent_fields != NULL) {
1099       // Append currently compared field to the end of parent_fields.
1100       SpecificField specific_field;
1101       specific_field.field = field;
1102       specific_field.index = index1;
1103       specific_field.new_index = index2;
1104       parent_fields->push_back(specific_field);
1105       const bool compare_result = Compare(m1, m2, parent_fields);
1106       parent_fields->pop_back();
1107       return compare_result;
1108     } else {
1109       // Recreates parent_fields as if m1 and m2 had no parents.
1110       return Compare(m1, m2);
1111     }
1112   } else {
1113     return (result == FieldComparator::SAME);
1114   }
1115 }
1116 
CheckPathChanged(const std::vector<SpecificField> & field_path)1117 bool MessageDifferencer::CheckPathChanged(
1118     const std::vector<SpecificField>& field_path) {
1119   for (int i = 0; i < field_path.size(); ++i) {
1120     // Don't check indexes for map entries -- maps are unordered.
1121     if (field_path[i].field != NULL && field_path[i].field->is_map()) continue;
1122     if (field_path[i].index != field_path[i].new_index) return true;
1123   }
1124   return false;
1125 }
1126 
IsTreatedAsSet(const FieldDescriptor * field)1127 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
1128   if (!field->is_repeated()) return false;
1129   if (repeated_field_comparisons_.find(field) !=
1130       repeated_field_comparisons_.end()) {
1131     return repeated_field_comparisons_[field] == AS_SET;
1132   }
1133   return repeated_field_comparison_ == AS_SET;
1134 }
1135 
IsTreatedAsSmartSet(const FieldDescriptor * field)1136 bool MessageDifferencer::IsTreatedAsSmartSet(const FieldDescriptor* field) {
1137   if (!field->is_repeated()) return false;
1138   if (repeated_field_comparisons_.find(field) !=
1139       repeated_field_comparisons_.end()) {
1140     return repeated_field_comparisons_[field] == AS_SMART_SET;
1141   }
1142   return repeated_field_comparison_ == AS_SMART_SET;
1143 }
1144 
IsTreatedAsSmartList(const FieldDescriptor * field)1145 bool MessageDifferencer::IsTreatedAsSmartList(const FieldDescriptor* field) {
1146   if (!field->is_repeated()) return false;
1147   if (repeated_field_comparisons_.find(field) !=
1148       repeated_field_comparisons_.end()) {
1149     return repeated_field_comparisons_[field] == AS_SMART_LIST;
1150   }
1151   return repeated_field_comparison_ == AS_SMART_LIST;
1152 }
1153 
IsTreatedAsSubset(const FieldDescriptor * field)1154 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
1155   return scope_ == PARTIAL &&
1156          (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
1157 }
1158 
IsIgnored(const Message & message1,const Message & message2,const FieldDescriptor * field,const std::vector<SpecificField> & parent_fields)1159 bool MessageDifferencer::IsIgnored(
1160     const Message& message1, const Message& message2,
1161     const FieldDescriptor* field,
1162     const std::vector<SpecificField>& parent_fields) {
1163   if (ignored_fields_.find(field) != ignored_fields_.end()) {
1164     return true;
1165   }
1166   for (int i = 0; i < ignore_criteria_.size(); ++i) {
1167     if (ignore_criteria_[i]->IsIgnored(message1, message2, field,
1168                                        parent_fields)) {
1169       return true;
1170     }
1171   }
1172   return false;
1173 }
1174 
IsUnknownFieldIgnored(const Message & message1,const Message & message2,const SpecificField & field,const std::vector<SpecificField> & parent_fields)1175 bool MessageDifferencer::IsUnknownFieldIgnored(
1176     const Message& message1, const Message& message2,
1177     const SpecificField& field,
1178     const std::vector<SpecificField>& parent_fields) {
1179   for (int i = 0; i < ignore_criteria_.size(); ++i) {
1180     if (ignore_criteria_[i]->IsUnknownFieldIgnored(message1, message2, field,
1181                                                    parent_fields)) {
1182       return true;
1183     }
1184   }
1185   return false;
1186 }
1187 
1188 const MessageDifferencer::MapKeyComparator*
GetMapKeyComparator(const FieldDescriptor * field) const1189 MessageDifferencer ::GetMapKeyComparator(const FieldDescriptor* field) const {
1190   if (!field->is_repeated()) return NULL;
1191   FieldKeyComparatorMap::const_iterator it =
1192       map_field_key_comparator_.find(field);
1193   if (it != map_field_key_comparator_.end()) {
1194     return it->second;
1195   }
1196   if (field->is_map()) {
1197     // field cannot already be treated as list or set since TreatAsList() and
1198     // TreatAsSet() call GetMapKeyComparator() and fail if it returns non-NULL.
1199     return &map_entry_key_comparator_;
1200   }
1201   return NULL;
1202 }
1203 
1204 namespace {
1205 
1206 typedef std::pair<int, const UnknownField*> IndexUnknownFieldPair;
1207 
1208 struct UnknownFieldOrdering {
operator ()google::protobuf::util::__anon934161890111::UnknownFieldOrdering1209   inline bool operator()(const IndexUnknownFieldPair& a,
1210                          const IndexUnknownFieldPair& b) const {
1211     if (a.second->number() < b.second->number()) return true;
1212     if (a.second->number() > b.second->number()) return false;
1213     return a.second->type() < b.second->type();
1214   }
1215 };
1216 
1217 }  // namespace
1218 
UnpackAny(const Message & any,std::unique_ptr<Message> * data)1219 bool MessageDifferencer::UnpackAny(const Message& any,
1220                                    std::unique_ptr<Message>* data) {
1221   const Reflection* reflection = any.GetReflection();
1222   const FieldDescriptor* type_url_field;
1223   const FieldDescriptor* value_field;
1224   if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1225     return false;
1226   }
1227   const std::string& type_url = reflection->GetString(any, type_url_field);
1228   std::string full_type_name;
1229   if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1230     return false;
1231   }
1232 
1233   const Descriptor* desc =
1234       any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1235           full_type_name);
1236   if (desc == NULL) {
1237     GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1238     return false;
1239   }
1240 
1241   if (dynamic_message_factory_ == NULL) {
1242     dynamic_message_factory_.reset(new DynamicMessageFactory());
1243   }
1244   data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1245   std::string serialized_value = reflection->GetString(any, value_field);
1246   if (!(*data)->ParsePartialFromString(serialized_value)) {
1247     GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1248     return false;
1249   }
1250   return true;
1251 }
1252 
CompareUnknownFields(const Message & message1,const Message & message2,const UnknownFieldSet & unknown_field_set1,const UnknownFieldSet & unknown_field_set2,std::vector<SpecificField> * parent_field)1253 bool MessageDifferencer::CompareUnknownFields(
1254     const Message& message1, const Message& message2,
1255     const UnknownFieldSet& unknown_field_set1,
1256     const UnknownFieldSet& unknown_field_set2,
1257     std::vector<SpecificField>* parent_field) {
1258   // Ignore unknown fields in EQUIVALENT mode.
1259   if (message_field_comparison_ == EQUIVALENT) return true;
1260 
1261   if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1262     return true;
1263   }
1264 
1265   bool is_different = false;
1266 
1267   // We first sort the unknown fields by field number and type (in other words,
1268   // in tag order), making sure to preserve ordering of values with the same
1269   // tag.  This allows us to report only meaningful differences between the
1270   // two sets -- that is, differing values for the same tag.  We use
1271   // IndexUnknownFieldPairs to keep track of the field's original index for
1272   // reporting purposes.
1273   std::vector<IndexUnknownFieldPair> fields1;  // unknown_field_set1, sorted
1274   std::vector<IndexUnknownFieldPair> fields2;  // unknown_field_set2, sorted
1275   fields1.reserve(unknown_field_set1.field_count());
1276   fields2.reserve(unknown_field_set2.field_count());
1277 
1278   for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1279     fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1280   }
1281   for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1282     fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1283   }
1284 
1285   UnknownFieldOrdering is_before;
1286   std::stable_sort(fields1.begin(), fields1.end(), is_before);
1287   std::stable_sort(fields2.begin(), fields2.end(), is_before);
1288 
1289   // In order to fill in SpecificField::index, we have to keep track of how
1290   // many values we've seen with the same field number and type.
1291   // current_repeated points at the first field in this range, and
1292   // current_repeated_start{1,2} are the indexes of the first field in the
1293   // range within fields1 and fields2.
1294   const UnknownField* current_repeated = NULL;
1295   int current_repeated_start1 = 0;
1296   int current_repeated_start2 = 0;
1297 
1298   // Now that we have two sorted lists, we can detect fields which appear only
1299   // in one list or the other by traversing them simultaneously.
1300   int index1 = 0;
1301   int index2 = 0;
1302   while (index1 < fields1.size() || index2 < fields2.size()) {
1303     enum {
1304       ADDITION,
1305       DELETION,
1306       MODIFICATION,
1307       COMPARE_GROUPS,
1308       NO_CHANGE
1309     } change_type;
1310 
1311     // focus_field is the field we're currently reporting on.  (In the case
1312     // of a modification, it's the field on the left side.)
1313     const UnknownField* focus_field;
1314     bool match = false;
1315 
1316     if (index2 == fields2.size() ||
1317         (index1 < fields1.size() &&
1318          is_before(fields1[index1], fields2[index2]))) {
1319       // fields1[index1] is not present in fields2.
1320       change_type = DELETION;
1321       focus_field = fields1[index1].second;
1322     } else if (index1 == fields1.size() ||
1323                is_before(fields2[index2], fields1[index1])) {
1324       // fields2[index2] is not present in fields1.
1325       if (scope_ == PARTIAL) {
1326         // Ignore.
1327         ++index2;
1328         continue;
1329       }
1330       change_type = ADDITION;
1331       focus_field = fields2[index2].second;
1332     } else {
1333       // Field type and number are the same.  See if the values differ.
1334       change_type = MODIFICATION;
1335       focus_field = fields1[index1].second;
1336 
1337       switch (focus_field->type()) {
1338         case UnknownField::TYPE_VARINT:
1339           match = fields1[index1].second->varint() ==
1340                   fields2[index2].second->varint();
1341           break;
1342         case UnknownField::TYPE_FIXED32:
1343           match = fields1[index1].second->fixed32() ==
1344                   fields2[index2].second->fixed32();
1345           break;
1346         case UnknownField::TYPE_FIXED64:
1347           match = fields1[index1].second->fixed64() ==
1348                   fields2[index2].second->fixed64();
1349           break;
1350         case UnknownField::TYPE_LENGTH_DELIMITED:
1351           match = fields1[index1].second->length_delimited() ==
1352                   fields2[index2].second->length_delimited();
1353           break;
1354         case UnknownField::TYPE_GROUP:
1355           // We must deal with this later, after building the SpecificField.
1356           change_type = COMPARE_GROUPS;
1357           break;
1358       }
1359       if (match && change_type != COMPARE_GROUPS) {
1360         change_type = NO_CHANGE;
1361       }
1362     }
1363 
1364     if (current_repeated == NULL ||
1365         focus_field->number() != current_repeated->number() ||
1366         focus_field->type() != current_repeated->type()) {
1367       // We've started a new repeated field.
1368       current_repeated = focus_field;
1369       current_repeated_start1 = index1;
1370       current_repeated_start2 = index2;
1371     }
1372 
1373     if (change_type == NO_CHANGE && reporter_ == NULL) {
1374       // Fields were already compared and matched and we have no reporter.
1375       ++index1;
1376       ++index2;
1377       continue;
1378     }
1379 
1380     // Build the SpecificField.  This is slightly complicated.
1381     SpecificField specific_field;
1382     specific_field.unknown_field_number = focus_field->number();
1383     specific_field.unknown_field_type = focus_field->type();
1384 
1385     specific_field.unknown_field_set1 = &unknown_field_set1;
1386     specific_field.unknown_field_set2 = &unknown_field_set2;
1387 
1388     if (change_type != ADDITION) {
1389       specific_field.unknown_field_index1 = fields1[index1].first;
1390     }
1391     if (change_type != DELETION) {
1392       specific_field.unknown_field_index2 = fields2[index2].first;
1393     }
1394 
1395     // Calculate the field index.
1396     if (change_type == ADDITION) {
1397       specific_field.index = index2 - current_repeated_start2;
1398       specific_field.new_index = index2 - current_repeated_start2;
1399     } else {
1400       specific_field.index = index1 - current_repeated_start1;
1401       specific_field.new_index = index2 - current_repeated_start2;
1402     }
1403 
1404     if (IsUnknownFieldIgnored(message1, message2, specific_field,
1405                               *parent_field)) {
1406       if (reporter_ != NULL) {
1407         parent_field->push_back(specific_field);
1408         reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1409         parent_field->pop_back();
1410       }
1411       return true;
1412     }
1413 
1414     if (change_type == ADDITION || change_type == DELETION ||
1415         change_type == MODIFICATION) {
1416       if (reporter_ == NULL) {
1417         // We found a difference and we have no reporter.
1418         return false;
1419       }
1420       is_different = true;
1421     }
1422 
1423     parent_field->push_back(specific_field);
1424 
1425     switch (change_type) {
1426       case ADDITION:
1427         reporter_->ReportAdded(message1, message2, *parent_field);
1428         ++index2;
1429         break;
1430       case DELETION:
1431         reporter_->ReportDeleted(message1, message2, *parent_field);
1432         ++index1;
1433         break;
1434       case MODIFICATION:
1435         reporter_->ReportModified(message1, message2, *parent_field);
1436         ++index1;
1437         ++index2;
1438         break;
1439       case COMPARE_GROUPS:
1440         if (!CompareUnknownFields(
1441                 message1, message2, fields1[index1].second->group(),
1442                 fields2[index2].second->group(), parent_field)) {
1443           if (reporter_ == NULL) return false;
1444           is_different = true;
1445           reporter_->ReportModified(message1, message2, *parent_field);
1446         }
1447         ++index1;
1448         ++index2;
1449         break;
1450       case NO_CHANGE:
1451         ++index1;
1452         ++index2;
1453         if (report_matches_) {
1454           reporter_->ReportMatched(message1, message2, *parent_field);
1455         }
1456     }
1457 
1458     parent_field->pop_back();
1459   }
1460 
1461   return !is_different;
1462 }
1463 
1464 namespace {
1465 
1466 // Find maximum bipartite matching using the argumenting path algorithm.
1467 class MaximumMatcher {
1468  public:
1469   typedef std::function<bool(int, int)> NodeMatchCallback;
1470   // MaximumMatcher takes ownership of the passed in callback and uses it to
1471   // determine whether a node on the left side of the bipartial graph matches
1472   // a node on the right side. count1 is the number of nodes on the left side
1473   // of the graph and count2 to is the number of nodes on the right side.
1474   // Every node is referred to using 0-based indices.
1475   // If a maximum match is found, the result will be stored in match_list1 and
1476   // match_list2. match_list1[i] == j means the i-th node on the left side is
1477   // matched to the j-th node on the right side and match_list2[x] == y means
1478   // the x-th node on the right side is matched to y-th node on the left side.
1479   // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1480   MaximumMatcher(int count1, int count2, NodeMatchCallback callback,
1481                  std::vector<int>* match_list1, std::vector<int>* match_list2);
1482   // Find a maximum match and return the number of matched node pairs.
1483   // If early_return is true, this method will return 0 immediately when it
1484   // finds that not all nodes on the left side can be matched.
1485   int FindMaximumMatch(bool early_return);
1486 
1487  private:
1488   // Determines whether the node on the left side of the bipartial graph
1489   // matches the one on the right side.
1490   bool Match(int left, int right);
1491   // Find an argumenting path starting from the node v on the left side. If a
1492   // path can be found, update match_list2_ to reflect the path and return
1493   // true.
1494   bool FindArgumentPathDFS(int v, std::vector<bool>* visited);
1495 
1496   int count1_;
1497   int count2_;
1498   NodeMatchCallback match_callback_;
1499   std::map<std::pair<int, int>, bool> cached_match_results_;
1500   std::vector<int>* match_list1_;
1501   std::vector<int>* match_list2_;
1502   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1503 };
1504 
MaximumMatcher(int count1,int count2,NodeMatchCallback callback,std::vector<int> * match_list1,std::vector<int> * match_list2)1505 MaximumMatcher::MaximumMatcher(int count1, int count2,
1506                                NodeMatchCallback callback,
1507                                std::vector<int>* match_list1,
1508                                std::vector<int>* match_list2)
1509     : count1_(count1),
1510       count2_(count2),
1511       match_callback_(std::move(callback)),
1512       match_list1_(match_list1),
1513       match_list2_(match_list2) {
1514   match_list1_->assign(count1, -1);
1515   match_list2_->assign(count2, -1);
1516 }
1517 
FindMaximumMatch(bool early_return)1518 int MaximumMatcher::FindMaximumMatch(bool early_return) {
1519   int result = 0;
1520   for (int i = 0; i < count1_; ++i) {
1521     std::vector<bool> visited(count1_);
1522     if (FindArgumentPathDFS(i, &visited)) {
1523       ++result;
1524     } else if (early_return) {
1525       return 0;
1526     }
1527   }
1528   // Backfill match_list1_ as we only filled match_list2_ when finding
1529   // argumenting pathes.
1530   for (int i = 0; i < count2_; ++i) {
1531     if ((*match_list2_)[i] != -1) {
1532       (*match_list1_)[(*match_list2_)[i]] = i;
1533     }
1534   }
1535   return result;
1536 }
1537 
Match(int left,int right)1538 bool MaximumMatcher::Match(int left, int right) {
1539   std::pair<int, int> p(left, right);
1540   std::map<std::pair<int, int>, bool>::iterator it =
1541       cached_match_results_.find(p);
1542   if (it != cached_match_results_.end()) {
1543     return it->second;
1544   }
1545   cached_match_results_[p] = match_callback_(left, right);
1546   return cached_match_results_[p];
1547 }
1548 
FindArgumentPathDFS(int v,std::vector<bool> * visited)1549 bool MaximumMatcher::FindArgumentPathDFS(int v, std::vector<bool>* visited) {
1550   (*visited)[v] = true;
1551   // We try to match those un-matched nodes on the right side first. This is
1552   // the step that the naive greedy matching algorithm uses. In the best cases
1553   // where the greedy algorithm can find a maximum matching, we will always
1554   // find a match in this step and the performance will be identical to the
1555   // greedy algorithm.
1556   for (int i = 0; i < count2_; ++i) {
1557     int matched = (*match_list2_)[i];
1558     if (matched == -1 && Match(v, i)) {
1559       (*match_list2_)[i] = v;
1560       return true;
1561     }
1562   }
1563   // Then we try those already matched nodes and see if we can find an
1564   // alternative match for the node matched to them.
1565   // The greedy algorithm will stop before this and fail to produce the
1566   // correct result.
1567   for (int i = 0; i < count2_; ++i) {
1568     int matched = (*match_list2_)[i];
1569     if (matched != -1 && Match(v, i)) {
1570       if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1571         (*match_list2_)[i] = v;
1572         return true;
1573       }
1574     }
1575   }
1576   return false;
1577 }
1578 
1579 }  // namespace
1580 
MatchRepeatedFieldIndices(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const std::vector<SpecificField> & parent_fields,std::vector<int> * match_list1,std::vector<int> * match_list2)1581 bool MessageDifferencer::MatchRepeatedFieldIndices(
1582     const Message& message1, const Message& message2,
1583     const FieldDescriptor* repeated_field,
1584     const MapKeyComparator* key_comparator,
1585     const std::vector<SpecificField>& parent_fields,
1586     std::vector<int>* match_list1, std::vector<int>* match_list2) {
1587   const int count1 =
1588       message1.GetReflection()->FieldSize(message1, repeated_field);
1589   const int count2 =
1590       message2.GetReflection()->FieldSize(message2, repeated_field);
1591   const bool is_treated_as_smart_set = IsTreatedAsSmartSet(repeated_field);
1592 
1593   match_list1->assign(count1, -1);
1594   match_list2->assign(count2, -1);
1595   // Ensure that we don't report differences during the matching process. Since
1596   // field comparators could potentially use this message differencer object to
1597   // perform further comparisons, turn off reporting here and re-enable it
1598   // before returning.
1599   Reporter* reporter = reporter_;
1600   reporter_ = NULL;
1601   NumDiffsReporter num_diffs_reporter;
1602   std::vector<int32> num_diffs_list1;
1603   if (is_treated_as_smart_set) {
1604     num_diffs_list1.assign(count1, kint32max);
1605   }
1606 
1607   bool success = true;
1608   // Find potential match if this is a special repeated field.
1609   if (scope_ == PARTIAL) {
1610     // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1611     // doesn't necessarily imply Compare(b, c). Therefore a naive greedy
1612     // algorithm will fail to find a maximum matching.
1613     // Here we use the augmenting path algorithm.
1614     auto callback = [&](int i1, int i2) {
1615       return IsMatch(repeated_field, key_comparator, &message1, &message2,
1616                      parent_fields, nullptr, i1, i2);
1617     };
1618     MaximumMatcher matcher(count1, count2, std::move(callback), match_list1,
1619                            match_list2);
1620     // If diff info is not needed, we should end the matching process as
1621     // soon as possible if not all items can be matched.
1622     bool early_return = (reporter == nullptr);
1623     int match_count = matcher.FindMaximumMatch(early_return);
1624     if (match_count != count1 && early_return) return false;
1625     success = success && (match_count == count1);
1626   } else {
1627     int start_offset = 0;
1628     // If the two repeated fields are treated as sets, optimize for the case
1629     // where both start with same items stored in the same order.
1630     if (IsTreatedAsSet(repeated_field) || is_treated_as_smart_set ||
1631         IsTreatedAsSmartList(repeated_field)) {
1632       start_offset = std::min(count1, count2);
1633       for (int i = 0; i < count1 && i < count2; i++) {
1634         if (IsMatch(repeated_field, key_comparator, &message1, &message2,
1635                     parent_fields, nullptr, i, i)) {
1636           match_list1->at(i) = i;
1637           match_list2->at(i) = i;
1638         } else {
1639           start_offset = i;
1640           break;
1641         }
1642       }
1643     }
1644     for (int i = start_offset; i < count1; ++i) {
1645       // Indicates any matched elements for this repeated field.
1646       bool match = false;
1647       int matched_j = -1;
1648 
1649       for (int j = start_offset; j < count2; j++) {
1650         if (match_list2->at(j) != -1) {
1651           if (!is_treated_as_smart_set || num_diffs_list1[i] == 0 ||
1652               num_diffs_list1[match_list2->at(j)] == 0) {
1653             continue;
1654           }
1655         }
1656 
1657         if (is_treated_as_smart_set) {
1658           num_diffs_reporter.Reset();
1659           match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1660                           parent_fields, &num_diffs_reporter, i, j);
1661         } else {
1662           match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1663                           parent_fields, nullptr, i, j);
1664         }
1665 
1666         if (is_treated_as_smart_set) {
1667           if (match) {
1668             num_diffs_list1[i] = 0;
1669           } else if (repeated_field->cpp_type() ==
1670                      FieldDescriptor::CPPTYPE_MESSAGE) {
1671             // Replace with the one with fewer diffs.
1672             const int32 num_diffs = num_diffs_reporter.GetNumDiffs();
1673             if (num_diffs < num_diffs_list1[i]) {
1674               // If j has been already matched to some element, ensure the
1675               // current num_diffs is smaller.
1676               if (match_list2->at(j) == -1 ||
1677                   num_diffs < num_diffs_list1[match_list2->at(j)]) {
1678                 num_diffs_list1[i] = num_diffs;
1679                 match = true;
1680               }
1681             }
1682           }
1683         }
1684 
1685         if (match) {
1686           matched_j = j;
1687           if (!is_treated_as_smart_set || num_diffs_list1[i] == 0) {
1688             break;
1689           }
1690         }
1691       }
1692 
1693       match = (matched_j != -1);
1694       if (match) {
1695         if (is_treated_as_smart_set && match_list2->at(matched_j) != -1) {
1696           // This is to revert the previously matched index in list2.
1697           match_list1->at(match_list2->at(matched_j)) = -1;
1698           match = false;
1699         }
1700         match_list1->at(i) = matched_j;
1701         match_list2->at(matched_j) = i;
1702       }
1703       if (!match && reporter == nullptr) return false;
1704       success = success && match;
1705     }
1706   }
1707 
1708   if (IsTreatedAsSmartList(repeated_field)) {
1709     match_indices_for_smart_list_callback_(match_list1, match_list2);
1710   }
1711 
1712   reporter_ = reporter;
1713 
1714   return success;
1715 }
1716 
GetFieldComparisonResult(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,const FieldContext * field_context)1717 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1718     const Message& message1, const Message& message2,
1719     const FieldDescriptor* field, int index1, int index2,
1720     const FieldContext* field_context) {
1721   FieldComparator* comparator = field_comparator_ != NULL
1722                                     ? field_comparator_
1723                                     : &default_field_comparator_;
1724   return comparator->Compare(message1, message2, field, index1, index2,
1725                              field_context);
1726 }
1727 
1728 // ===========================================================================
1729 
Reporter()1730 MessageDifferencer::Reporter::Reporter() {}
~Reporter()1731 MessageDifferencer::Reporter::~Reporter() {}
1732 
1733 // ===========================================================================
1734 
MapKeyComparator()1735 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
~MapKeyComparator()1736 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1737 
1738 // ===========================================================================
1739 
IgnoreCriteria()1740 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
~IgnoreCriteria()1741 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1742 
1743 // ===========================================================================
1744 
1745 // Note that the printer's delimiter is not used, because if we are given a
1746 // printer, we don't know its delimiter.
StreamReporter(io::ZeroCopyOutputStream * output)1747 MessageDifferencer::StreamReporter::StreamReporter(
1748     io::ZeroCopyOutputStream* output)
1749     : printer_(new io::Printer(output, '$')),
1750       delete_printer_(true),
1751       report_modified_aggregates_(false) {}
1752 
StreamReporter(io::Printer * printer)1753 MessageDifferencer::StreamReporter::StreamReporter(io::Printer* printer)
1754     : printer_(printer),
1755       delete_printer_(false),
1756       report_modified_aggregates_(false) {}
1757 
~StreamReporter()1758 MessageDifferencer::StreamReporter::~StreamReporter() {
1759   if (delete_printer_) delete printer_;
1760 }
1761 
PrintPath(const std::vector<SpecificField> & field_path,bool left_side)1762 void MessageDifferencer::StreamReporter::PrintPath(
1763     const std::vector<SpecificField>& field_path, bool left_side) {
1764   for (int i = 0; i < field_path.size(); ++i) {
1765     if (i > 0) {
1766       printer_->Print(".");
1767     }
1768 
1769     SpecificField specific_field = field_path[i];
1770 
1771     if (specific_field.field != NULL) {
1772       if (specific_field.field->is_extension()) {
1773         printer_->Print("($name$)", "name", specific_field.field->full_name());
1774       } else {
1775         printer_->PrintRaw(specific_field.field->name());
1776       }
1777       if (specific_field.field->is_map()) {
1778         // Don't print index in a map field; they are semantically unordered.
1779         continue;
1780       }
1781     } else {
1782       printer_->PrintRaw(StrCat(specific_field.unknown_field_number));
1783     }
1784     if (left_side && specific_field.index >= 0) {
1785       printer_->Print("[$name$]", "name", StrCat(specific_field.index));
1786     }
1787     if (!left_side && specific_field.new_index >= 0) {
1788       printer_->Print("[$name$]", "name",
1789                       StrCat(specific_field.new_index));
1790     }
1791   }
1792 }
1793 
PrintPath(const std::vector<SpecificField> & field_path,bool left_side,const Message & message)1794 void MessageDifferencer::StreamReporter::PrintPath(
1795     const std::vector<SpecificField>& field_path, bool left_side,
1796     const Message& message) {
1797   PrintPath(field_path, left_side);
1798 }
1799 
PrintValue(const Message & message,const std::vector<SpecificField> & field_path,bool left_side)1800 void MessageDifferencer::StreamReporter::PrintValue(
1801     const Message& message, const std::vector<SpecificField>& field_path,
1802     bool left_side) {
1803   const SpecificField& specific_field = field_path.back();
1804   const FieldDescriptor* field = specific_field.field;
1805   if (field != NULL) {
1806     std::string output;
1807     int index = left_side ? specific_field.index : specific_field.new_index;
1808     if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1809       const Reflection* reflection = message.GetReflection();
1810       const Message& field_message =
1811           field->is_repeated()
1812               ? reflection->GetRepeatedMessage(message, field, index)
1813               : reflection->GetMessage(message, field);
1814       output = field_message.ShortDebugString();
1815       if (output.empty()) {
1816         printer_->Print("{ }");
1817       } else {
1818         printer_->Print("{ $name$ }", "name", output);
1819       }
1820     } else {
1821       TextFormat::PrintFieldValueToString(message, field, index, &output);
1822       printer_->PrintRaw(output);
1823     }
1824   } else {
1825     const UnknownFieldSet* unknown_fields =
1826         (left_side ? specific_field.unknown_field_set1
1827                    : specific_field.unknown_field_set2);
1828     const UnknownField* unknown_field =
1829         &unknown_fields->field(left_side ? specific_field.unknown_field_index1
1830                                          : specific_field.unknown_field_index2);
1831     PrintUnknownFieldValue(unknown_field);
1832   }
1833 }
1834 
PrintUnknownFieldValue(const UnknownField * unknown_field)1835 void MessageDifferencer::StreamReporter::PrintUnknownFieldValue(
1836     const UnknownField* unknown_field) {
1837   GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1838 
1839   std::string output;
1840   switch (unknown_field->type()) {
1841     case UnknownField::TYPE_VARINT:
1842       output = StrCat(unknown_field->varint());
1843       break;
1844     case UnknownField::TYPE_FIXED32:
1845       output = StrCat(
1846           "0x", strings::Hex(unknown_field->fixed32(), strings::ZERO_PAD_8));
1847       break;
1848     case UnknownField::TYPE_FIXED64:
1849       output = StrCat(
1850           "0x", strings::Hex(unknown_field->fixed64(), strings::ZERO_PAD_16));
1851       break;
1852     case UnknownField::TYPE_LENGTH_DELIMITED:
1853       output = StringPrintf(
1854           "\"%s\"", CEscape(unknown_field->length_delimited()).c_str());
1855       break;
1856     case UnknownField::TYPE_GROUP:
1857       // TODO(kenton):  Print the contents of the group like we do for
1858       //   messages.  Requires an equivalent of ShortDebugString() for
1859       //   UnknownFieldSet.
1860       output = "{ ... }";
1861       break;
1862   }
1863   printer_->PrintRaw(output);
1864 }
1865 
Print(const std::string & str)1866 void MessageDifferencer::StreamReporter::Print(const std::string& str) {
1867   printer_->Print(str.c_str());
1868 }
1869 
ReportAdded(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1870 void MessageDifferencer::StreamReporter::ReportAdded(
1871     const Message& message1, const Message& message2,
1872     const std::vector<SpecificField>& field_path) {
1873   printer_->Print("added: ");
1874   PrintPath(field_path, false, message2);
1875   printer_->Print(": ");
1876   PrintValue(message2, field_path, false);
1877   printer_->Print("\n");  // Print for newlines.
1878 }
1879 
ReportDeleted(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1880 void MessageDifferencer::StreamReporter::ReportDeleted(
1881     const Message& message1, const Message& message2,
1882     const std::vector<SpecificField>& field_path) {
1883   printer_->Print("deleted: ");
1884   PrintPath(field_path, true, message1);
1885   printer_->Print(": ");
1886   PrintValue(message1, field_path, true);
1887   printer_->Print("\n");  // Print for newlines
1888 }
1889 
ReportModified(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1890 void MessageDifferencer::StreamReporter::ReportModified(
1891     const Message& message1, const Message& message2,
1892     const std::vector<SpecificField>& field_path) {
1893   if (!report_modified_aggregates_ && field_path.back().field == NULL) {
1894     if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
1895       // Any changes to the subfields have already been printed.
1896       return;
1897     }
1898   } else if (!report_modified_aggregates_) {
1899     if (field_path.back().field->cpp_type() ==
1900         FieldDescriptor::CPPTYPE_MESSAGE) {
1901       // Any changes to the subfields have already been printed.
1902       return;
1903     }
1904   }
1905 
1906   printer_->Print("modified: ");
1907   PrintPath(field_path, true, message1);
1908   if (CheckPathChanged(field_path)) {
1909     printer_->Print(" -> ");
1910     PrintPath(field_path, false, message2);
1911   }
1912   printer_->Print(": ");
1913   PrintValue(message1, field_path, true);
1914   printer_->Print(" -> ");
1915   PrintValue(message2, field_path, false);
1916   printer_->Print("\n");  // Print for newlines.
1917 }
1918 
ReportMoved(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1919 void MessageDifferencer::StreamReporter::ReportMoved(
1920     const Message& message1, const Message& message2,
1921     const std::vector<SpecificField>& field_path) {
1922   printer_->Print("moved: ");
1923   PrintPath(field_path, true, message1);
1924   printer_->Print(" -> ");
1925   PrintPath(field_path, false, message2);
1926   printer_->Print(" : ");
1927   PrintValue(message1, field_path, true);
1928   printer_->Print("\n");  // Print for newlines.
1929 }
1930 
ReportMatched(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1931 void MessageDifferencer::StreamReporter::ReportMatched(
1932     const Message& message1, const Message& message2,
1933     const std::vector<SpecificField>& field_path) {
1934   printer_->Print("matched: ");
1935   PrintPath(field_path, true, message1);
1936   if (CheckPathChanged(field_path)) {
1937     printer_->Print(" -> ");
1938     PrintPath(field_path, false, message2);
1939   }
1940   printer_->Print(" : ");
1941   PrintValue(message1, field_path, true);
1942   printer_->Print("\n");  // Print for newlines.
1943 }
1944 
ReportIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1945 void MessageDifferencer::StreamReporter::ReportIgnored(
1946     const Message& message1, const Message& message2,
1947     const std::vector<SpecificField>& field_path) {
1948   printer_->Print("ignored: ");
1949   PrintPath(field_path, true, message1);
1950   if (CheckPathChanged(field_path)) {
1951     printer_->Print(" -> ");
1952     PrintPath(field_path, false, message2);
1953   }
1954   printer_->Print("\n");  // Print for newlines.
1955 }
1956 
ReportUnknownFieldIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1957 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
1958     const Message& message1, const Message& message2,
1959     const std::vector<SpecificField>& field_path) {
1960   printer_->Print("ignored: ");
1961   PrintPath(field_path, true, message1);
1962   if (CheckPathChanged(field_path)) {
1963     printer_->Print(" -> ");
1964     PrintPath(field_path, false, message2);
1965   }
1966   printer_->Print("\n");  // Print for newlines.
1967 }
1968 
1969 MessageDifferencer::MapKeyComparator*
CreateMultipleFieldsMapKeyComparator(const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)1970 MessageDifferencer::CreateMultipleFieldsMapKeyComparator(
1971     const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
1972   return new MultipleFieldsMapKeyComparator(this, key_field_paths);
1973 }
1974 
1975 }  // namespace util
1976 }  // namespace protobuf
1977 }  // namespace google
1978