1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc. All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // Author: jschorr@google.com (Joseph Schorr)
32 // Based on original Protocol Buffers design by
33 // Sanjay Ghemawat, Jeff Dean, and others.
34
35 #include <google/protobuf/util/message_differencer.h>
36
37 #include <algorithm>
38 #include <functional>
39 #include <memory>
40 #include <utility>
41
42 #include <google/protobuf/stubs/logging.h>
43 #include <google/protobuf/stubs/common.h>
44 #include <google/protobuf/stubs/stringprintf.h>
45 #include <google/protobuf/io/printer.h>
46 #include <google/protobuf/io/zero_copy_stream.h>
47 #include <google/protobuf/io/zero_copy_stream_impl.h>
48 #include <google/protobuf/descriptor.pb.h>
49 #include <google/protobuf/dynamic_message.h>
50 #include <google/protobuf/map_field.h>
51 #include <google/protobuf/text_format.h>
52 #include <google/protobuf/util/field_comparator.h>
53 #include <google/protobuf/stubs/strutil.h>
54
55 // Always include as last one, otherwise it can break compilation
56 #include <google/protobuf/port_def.inc>
57
58 namespace google {
59 namespace protobuf {
60
61 namespace util {
62
63 // A reporter to report the total number of diffs.
64 // TODO(ykzhu): we can improve this to take into account the value differencers.
65 class NumDiffsReporter : public google::protobuf::util::MessageDifferencer::Reporter {
66 public:
NumDiffsReporter()67 NumDiffsReporter() : num_diffs_(0) {}
68
69 // Returns the total number of diffs.
GetNumDiffs() const70 int32 GetNumDiffs() const { return num_diffs_; }
Reset()71 void Reset() { num_diffs_ = 0; }
72
73 // Report that a field has been added into Message2.
ReportAdded(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)74 void ReportAdded(
75 const google::protobuf::Message& message1, const google::protobuf::Message& message2,
76 const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
77 field_path) override {
78 ++num_diffs_;
79 }
80
81 // Report that a field has been deleted from Message1.
ReportDeleted(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)82 void ReportDeleted(
83 const google::protobuf::Message& message1, const google::protobuf::Message& message2,
84 const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
85 field_path) override {
86 ++num_diffs_;
87 }
88
89 // Report that the value of a field has been modified.
ReportModified(const google::protobuf::Message & message1,const google::protobuf::Message & message2,const std::vector<google::protobuf::util::MessageDifferencer::SpecificField> & field_path)90 void ReportModified(
91 const google::protobuf::Message& message1, const google::protobuf::Message& message2,
92 const std::vector<google::protobuf::util::MessageDifferencer::SpecificField>&
93 field_path) override {
94 ++num_diffs_;
95 }
96
97 private:
98 int32 num_diffs_;
99 };
100
101 // When comparing a repeated field as map, MultipleFieldMapKeyComparator can
102 // be used to specify multiple fields as key for key comparison.
103 // Two elements of a repeated field will be regarded as having the same key
104 // iff they have the same value for every specified key field.
105 // Note that you can also specify only one field as key.
106 class MessageDifferencer::MultipleFieldsMapKeyComparator
107 : public MessageDifferencer::MapKeyComparator {
108 public:
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)109 MultipleFieldsMapKeyComparator(
110 MessageDifferencer* message_differencer,
111 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths)
112 : message_differencer_(message_differencer),
113 key_field_paths_(key_field_paths) {
114 GOOGLE_CHECK(!key_field_paths_.empty());
115 for (const auto& path : key_field_paths_) {
116 GOOGLE_CHECK(!path.empty());
117 }
118 }
MultipleFieldsMapKeyComparator(MessageDifferencer * message_differencer,const FieldDescriptor * key)119 MultipleFieldsMapKeyComparator(MessageDifferencer* message_differencer,
120 const FieldDescriptor* key)
121 : message_differencer_(message_differencer) {
122 std::vector<const FieldDescriptor*> key_field_path;
123 key_field_path.push_back(key);
124 key_field_paths_.push_back(key_field_path);
125 }
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const126 bool IsMatch(const Message& message1, const Message& message2,
127 const std::vector<SpecificField>& parent_fields) const override {
128 for (const auto& path : key_field_paths_) {
129 if (!IsMatchInternal(message1, message2, parent_fields, path, 0)) {
130 return false;
131 }
132 }
133 return true;
134 }
135
136 private:
IsMatchInternal(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields,const std::vector<const FieldDescriptor * > & key_field_path,int path_index) const137 bool IsMatchInternal(
138 const Message& message1, const Message& message2,
139 const std::vector<SpecificField>& parent_fields,
140 const std::vector<const FieldDescriptor*>& key_field_path,
141 int path_index) const {
142 const FieldDescriptor* field = key_field_path[path_index];
143 std::vector<SpecificField> current_parent_fields(parent_fields);
144 if (path_index == key_field_path.size() - 1) {
145 if (field->is_repeated()) {
146 if (!message_differencer_->CompareRepeatedField(
147 message1, message2, field, ¤t_parent_fields)) {
148 return false;
149 }
150 } else {
151 if (!message_differencer_->CompareFieldValueUsingParentFields(
152 message1, message2, field, -1, -1, ¤t_parent_fields)) {
153 return false;
154 }
155 }
156 return true;
157 } else {
158 const Reflection* reflection1 = message1.GetReflection();
159 const Reflection* reflection2 = message2.GetReflection();
160 bool has_field1 = reflection1->HasField(message1, field);
161 bool has_field2 = reflection2->HasField(message2, field);
162 if (!has_field1 && !has_field2) {
163 return true;
164 }
165 if (has_field1 != has_field2) {
166 return false;
167 }
168 SpecificField specific_field;
169 specific_field.field = field;
170 current_parent_fields.push_back(specific_field);
171 return IsMatchInternal(reflection1->GetMessage(message1, field),
172 reflection2->GetMessage(message2, field),
173 current_parent_fields, key_field_path,
174 path_index + 1);
175 }
176 }
177 MessageDifferencer* message_differencer_;
178 std::vector<std::vector<const FieldDescriptor*> > key_field_paths_;
179 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MultipleFieldsMapKeyComparator);
180 };
181
182 // Preserve the order when treating repeated field as SMART_LIST. The current
183 // implementation is to find the longest matching sequence from the first
184 // element. The optimal solution requires to use //util/diff/lcs.h, which is
185 // not open sourced yet. Overwrite this method if you want to have that.
186 // TODO(ykzhu): change to use LCS once it is open sourced.
MatchIndicesPostProcessorForSmartList(std::vector<int> * match_list1,std::vector<int> * match_list2)187 void MatchIndicesPostProcessorForSmartList(
188 std::vector<int>* match_list1, std::vector<int>* match_list2) {
189 int last_matched_index = -1;
190 for (int i = 0; i < match_list1->size(); ++i) {
191 if (match_list1->at(i) < 0) {
192 continue;
193 }
194 if (last_matched_index < 0 || match_list1->at(i) > last_matched_index) {
195 last_matched_index = match_list1->at(i);
196 } else {
197 match_list2->at(match_list1->at(i)) = -1;
198 match_list1->at(i) = -1;
199 }
200 }
201 }
202
MapEntryKeyComparator(MessageDifferencer * message_differencer)203 MessageDifferencer::MapEntryKeyComparator::MapEntryKeyComparator(
204 MessageDifferencer* message_differencer)
205 : message_differencer_(message_differencer) {}
206
IsMatch(const Message & message1,const Message & message2,const std::vector<SpecificField> & parent_fields) const207 bool MessageDifferencer::MapEntryKeyComparator::IsMatch(
208 const Message& message1, const Message& message2,
209 const std::vector<SpecificField>& parent_fields) const {
210 // Map entry has its key in the field with tag 1. See the comment for
211 // map_entry in MessageOptions.
212 const FieldDescriptor* key = message1.GetDescriptor()->FindFieldByNumber(1);
213 // If key is not present in message1 and we're doing partial comparison or if
214 // map key is explicitly ignored treat the field as set instead,
215 const bool treat_as_set =
216 (message_differencer_->scope() == PARTIAL &&
217 !message1.GetReflection()->HasField(message1, key)) ||
218 message_differencer_->IsIgnored(message1, message2, key, parent_fields);
219
220 std::vector<SpecificField> current_parent_fields(parent_fields);
221 if (treat_as_set) {
222 return message_differencer_->Compare(message1, message2,
223 ¤t_parent_fields);
224 }
225 return message_differencer_->CompareFieldValueUsingParentFields(
226 message1, message2, key, -1, -1, ¤t_parent_fields);
227 }
228
Equals(const Message & message1,const Message & message2)229 bool MessageDifferencer::Equals(const Message& message1,
230 const Message& message2) {
231 MessageDifferencer differencer;
232
233 return differencer.Compare(message1, message2);
234 }
235
Equivalent(const Message & message1,const Message & message2)236 bool MessageDifferencer::Equivalent(const Message& message1,
237 const Message& message2) {
238 MessageDifferencer differencer;
239 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
240
241 return differencer.Compare(message1, message2);
242 }
243
ApproximatelyEquals(const Message & message1,const Message & message2)244 bool MessageDifferencer::ApproximatelyEquals(const Message& message1,
245 const Message& message2) {
246 MessageDifferencer differencer;
247 differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
248
249 return differencer.Compare(message1, message2);
250 }
251
ApproximatelyEquivalent(const Message & message1,const Message & message2)252 bool MessageDifferencer::ApproximatelyEquivalent(const Message& message1,
253 const Message& message2) {
254 MessageDifferencer differencer;
255 differencer.set_message_field_comparison(MessageDifferencer::EQUIVALENT);
256 differencer.set_float_comparison(MessageDifferencer::APPROXIMATE);
257
258 return differencer.Compare(message1, message2);
259 }
260
261 // ===========================================================================
262
MessageDifferencer()263 MessageDifferencer::MessageDifferencer()
264 : reporter_(NULL),
265 field_comparator_(NULL),
266 message_field_comparison_(EQUAL),
267 scope_(FULL),
268 repeated_field_comparison_(AS_LIST),
269 map_entry_key_comparator_(this),
270 report_matches_(false),
271 report_moves_(true),
272 report_ignores_(true),
273 output_string_(nullptr),
274 match_indices_for_smart_list_callback_(
275 MatchIndicesPostProcessorForSmartList) {}
276
~MessageDifferencer()277 MessageDifferencer::~MessageDifferencer() {
278 for (MapKeyComparator* comparator : owned_key_comparators_) {
279 delete comparator;
280 }
281 for (IgnoreCriteria* criteria : ignore_criteria_) {
282 delete criteria;
283 }
284 }
285
set_field_comparator(FieldComparator * comparator)286 void MessageDifferencer::set_field_comparator(FieldComparator* comparator) {
287 GOOGLE_CHECK(comparator) << "Field comparator can't be NULL.";
288 field_comparator_ = comparator;
289 }
290
set_message_field_comparison(MessageFieldComparison comparison)291 void MessageDifferencer::set_message_field_comparison(
292 MessageFieldComparison comparison) {
293 message_field_comparison_ = comparison;
294 }
295
set_scope(Scope scope)296 void MessageDifferencer::set_scope(Scope scope) { scope_ = scope; }
297
scope()298 MessageDifferencer::Scope MessageDifferencer::scope() { return scope_; }
299
set_float_comparison(FloatComparison comparison)300 void MessageDifferencer::set_float_comparison(FloatComparison comparison) {
301 default_field_comparator_.set_float_comparison(
302 comparison == EXACT ? DefaultFieldComparator::EXACT
303 : DefaultFieldComparator::APPROXIMATE);
304 }
305
set_repeated_field_comparison(RepeatedFieldComparison comparison)306 void MessageDifferencer::set_repeated_field_comparison(
307 RepeatedFieldComparison comparison) {
308 repeated_field_comparison_ = comparison;
309 }
310
311 MessageDifferencer::RepeatedFieldComparison
repeated_field_comparison()312 MessageDifferencer::repeated_field_comparison() {
313 return repeated_field_comparison_;
314 }
315
CheckRepeatedFieldComparisons(const FieldDescriptor * field,const RepeatedFieldComparison & new_comparison)316 void MessageDifferencer::CheckRepeatedFieldComparisons(
317 const FieldDescriptor* field,
318 const RepeatedFieldComparison& new_comparison) {
319 GOOGLE_CHECK(field->is_repeated())
320 << "Field must be repeated: " << field->full_name();
321 const MapKeyComparator* key_comparator = GetMapKeyComparator(field);
322 GOOGLE_CHECK(key_comparator == NULL)
323 << "Cannot treat this repeated field as both MAP and " << new_comparison
324 << " for"
325 << " comparison. Field name is: " << field->full_name();
326 GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
327 repeated_field_comparisons_.end() ||
328 repeated_field_comparisons_[field] == new_comparison)
329 << "Cannot treat the same field as both "
330 << repeated_field_comparisons_[field] << " and " << new_comparison
331 << ". Field name is: " << field->full_name();
332 }
333
TreatAsSet(const FieldDescriptor * field)334 void MessageDifferencer::TreatAsSet(const FieldDescriptor* field) {
335 CheckRepeatedFieldComparisons(field, AS_SET);
336 repeated_field_comparisons_[field] = AS_SET;
337 }
338
TreatAsSmartSet(const FieldDescriptor * field)339 void MessageDifferencer::TreatAsSmartSet(const FieldDescriptor* field) {
340 CheckRepeatedFieldComparisons(field, AS_SMART_SET);
341 repeated_field_comparisons_[field] = AS_SMART_SET;
342 }
343
SetMatchIndicesForSmartListCallback(std::function<void (std::vector<int> *,std::vector<int> *)> callback)344 void MessageDifferencer::SetMatchIndicesForSmartListCallback(
345 std::function<void(std::vector<int>*, std::vector<int>*)> callback) {
346 match_indices_for_smart_list_callback_ = callback;
347 }
348
TreatAsList(const FieldDescriptor * field)349 void MessageDifferencer::TreatAsList(const FieldDescriptor* field) {
350 CheckRepeatedFieldComparisons(field, AS_LIST);
351 repeated_field_comparisons_[field] = AS_LIST;
352 }
353
TreatAsSmartList(const FieldDescriptor * field)354 void MessageDifferencer::TreatAsSmartList(const FieldDescriptor* field) {
355 CheckRepeatedFieldComparisons(field, AS_SMART_LIST);
356 repeated_field_comparisons_[field] = AS_SMART_LIST;
357 }
358
TreatAsMap(const FieldDescriptor * field,const FieldDescriptor * key)359 void MessageDifferencer::TreatAsMap(const FieldDescriptor* field,
360 const FieldDescriptor* key) {
361 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
362 << "Field has to be message type. Field name is: " << field->full_name();
363 GOOGLE_CHECK(key->containing_type() == field->message_type())
364 << key->full_name()
365 << " must be a direct subfield within the repeated field "
366 << field->full_name() << ", not " << key->containing_type()->full_name();
367 GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
368 repeated_field_comparisons_.end())
369 << "Cannot treat the same field as both "
370 << repeated_field_comparisons_[field]
371 << " and MAP. Field name is: " << field->full_name();
372 MapKeyComparator* key_comparator =
373 new MultipleFieldsMapKeyComparator(this, key);
374 owned_key_comparators_.push_back(key_comparator);
375 map_field_key_comparator_[field] = key_comparator;
376 }
377
TreatAsMapWithMultipleFieldsAsKey(const FieldDescriptor * field,const std::vector<const FieldDescriptor * > & key_fields)378 void MessageDifferencer::TreatAsMapWithMultipleFieldsAsKey(
379 const FieldDescriptor* field,
380 const std::vector<const FieldDescriptor*>& key_fields) {
381 std::vector<std::vector<const FieldDescriptor*> > key_field_paths;
382 for (const FieldDescriptor* key_filed : key_fields) {
383 std::vector<const FieldDescriptor*> key_field_path;
384 key_field_path.push_back(key_filed);
385 key_field_paths.push_back(key_field_path);
386 }
387 TreatAsMapWithMultipleFieldPathsAsKey(field, key_field_paths);
388 }
389
TreatAsMapWithMultipleFieldPathsAsKey(const FieldDescriptor * field,const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)390 void MessageDifferencer::TreatAsMapWithMultipleFieldPathsAsKey(
391 const FieldDescriptor* field,
392 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
393 GOOGLE_CHECK(field->is_repeated())
394 << "Field must be repeated: " << field->full_name();
395 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, field->cpp_type())
396 << "Field has to be message type. Field name is: " << field->full_name();
397 for (const auto& key_field_path : key_field_paths) {
398 for (int j = 0; j < key_field_path.size(); ++j) {
399 const FieldDescriptor* parent_field =
400 j == 0 ? field : key_field_path[j - 1];
401 const FieldDescriptor* child_field = key_field_path[j];
402 GOOGLE_CHECK(child_field->containing_type() == parent_field->message_type())
403 << child_field->full_name()
404 << " must be a direct subfield within the field: "
405 << parent_field->full_name();
406 if (j != 0) {
407 GOOGLE_CHECK_EQ(FieldDescriptor::CPPTYPE_MESSAGE, parent_field->cpp_type())
408 << parent_field->full_name() << " has to be of type message.";
409 GOOGLE_CHECK(!parent_field->is_repeated())
410 << parent_field->full_name() << " cannot be a repeated field.";
411 }
412 }
413 }
414 GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
415 repeated_field_comparisons_.end())
416 << "Cannot treat the same field as both "
417 << repeated_field_comparisons_[field]
418 << " and MAP. Field name is: " << field->full_name();
419 MapKeyComparator* key_comparator =
420 new MultipleFieldsMapKeyComparator(this, key_field_paths);
421 owned_key_comparators_.push_back(key_comparator);
422 map_field_key_comparator_[field] = key_comparator;
423 }
424
TreatAsMapUsingKeyComparator(const FieldDescriptor * field,const MapKeyComparator * key_comparator)425 void MessageDifferencer::TreatAsMapUsingKeyComparator(
426 const FieldDescriptor* field, const MapKeyComparator* key_comparator) {
427 GOOGLE_CHECK(field->is_repeated())
428 << "Field must be repeated: " << field->full_name();
429 GOOGLE_CHECK(repeated_field_comparisons_.find(field) ==
430 repeated_field_comparisons_.end())
431 << "Cannot treat the same field as both "
432 << repeated_field_comparisons_[field]
433 << " and MAP. Field name is: " << field->full_name();
434 map_field_key_comparator_[field] = key_comparator;
435 }
436
AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)437 void MessageDifferencer::AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) {
438 ignore_criteria_.push_back(ignore_criteria);
439 }
440
IgnoreField(const FieldDescriptor * field)441 void MessageDifferencer::IgnoreField(const FieldDescriptor* field) {
442 ignored_fields_.insert(field);
443 }
444
SetFractionAndMargin(const FieldDescriptor * field,double fraction,double margin)445 void MessageDifferencer::SetFractionAndMargin(const FieldDescriptor* field,
446 double fraction, double margin) {
447 default_field_comparator_.SetFractionAndMargin(field, fraction, margin);
448 }
449
ReportDifferencesToString(std::string * output)450 void MessageDifferencer::ReportDifferencesToString(std::string* output) {
451 GOOGLE_DCHECK(output) << "Specified output string was NULL";
452
453 output_string_ = output;
454 output_string_->clear();
455 }
456
ReportDifferencesTo(Reporter * reporter)457 void MessageDifferencer::ReportDifferencesTo(Reporter* reporter) {
458 // If an output string is set, clear it to prevent
459 // it superseding the specified reporter.
460 if (output_string_) {
461 output_string_ = NULL;
462 }
463
464 reporter_ = reporter;
465 }
466
FieldBefore(const FieldDescriptor * field1,const FieldDescriptor * field2)467 bool MessageDifferencer::FieldBefore(const FieldDescriptor* field1,
468 const FieldDescriptor* field2) {
469 // Handle sentinel values (i.e. make sure NULLs are always ordered
470 // at the end of the list).
471 if (field1 == NULL) {
472 return false;
473 }
474
475 if (field2 == NULL) {
476 return true;
477 }
478
479 // Always order fields by their tag number
480 return (field1->number() < field2->number());
481 }
482
Compare(const Message & message1,const Message & message2)483 bool MessageDifferencer::Compare(const Message& message1,
484 const Message& message2) {
485 std::vector<SpecificField> parent_fields;
486
487 bool result = false;
488
489 // Setup the internal reporter if need be.
490 if (output_string_) {
491 io::StringOutputStream output_stream(output_string_);
492 StreamReporter reporter(&output_stream);
493 reporter_ = &reporter;
494 result = Compare(message1, message2, &parent_fields);
495 reporter_ = NULL;
496 } else {
497 result = Compare(message1, message2, &parent_fields);
498 }
499
500 return result;
501 }
502
CompareWithFields(const Message & message1,const Message & message2,const std::vector<const FieldDescriptor * > & message1_fields_arg,const std::vector<const FieldDescriptor * > & message2_fields_arg)503 bool MessageDifferencer::CompareWithFields(
504 const Message& message1, const Message& message2,
505 const std::vector<const FieldDescriptor*>& message1_fields_arg,
506 const std::vector<const FieldDescriptor*>& message2_fields_arg) {
507 if (message1.GetDescriptor() != message2.GetDescriptor()) {
508 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
509 << "descriptors.";
510 return false;
511 }
512
513 std::vector<SpecificField> parent_fields;
514
515 bool result = false;
516
517 FieldDescriptorArray message1_fields(message1_fields_arg.size() + 1);
518 FieldDescriptorArray message2_fields(message2_fields_arg.size() + 1);
519
520 std::copy(message1_fields_arg.cbegin(), message1_fields_arg.cend(),
521 message1_fields.begin());
522 std::copy(message2_fields_arg.cbegin(), message2_fields_arg.cend(),
523 message2_fields.begin());
524
525 // Append sentinel values.
526 message1_fields[message1_fields_arg.size()] = nullptr;
527 message2_fields[message2_fields_arg.size()] = nullptr;
528
529 std::sort(message1_fields.begin(), message1_fields.end(), FieldBefore);
530 std::sort(message2_fields.begin(), message2_fields.end(), FieldBefore);
531
532 // Setup the internal reporter if need be.
533 if (output_string_) {
534 io::StringOutputStream output_stream(output_string_);
535 StreamReporter reporter(&output_stream);
536 reporter_ = &reporter;
537 result = CompareRequestedFieldsUsingSettings(
538 message1, message2, message1_fields, message2_fields, &parent_fields);
539 reporter_ = NULL;
540 } else {
541 result = CompareRequestedFieldsUsingSettings(
542 message1, message2, message1_fields, message2_fields, &parent_fields);
543 }
544
545 return result;
546 }
547
Compare(const Message & message1,const Message & message2,std::vector<SpecificField> * parent_fields)548 bool MessageDifferencer::Compare(const Message& message1,
549 const Message& message2,
550 std::vector<SpecificField>* parent_fields) {
551 const Descriptor* descriptor1 = message1.GetDescriptor();
552 const Descriptor* descriptor2 = message2.GetDescriptor();
553 if (descriptor1 != descriptor2) {
554 GOOGLE_LOG(DFATAL) << "Comparison between two messages with different "
555 << "descriptors. " << descriptor1->full_name() << " vs "
556 << descriptor2->full_name();
557 return false;
558 }
559 // Expand google.protobuf.Any payload if possible.
560 if (descriptor1->full_name() == internal::kAnyFullTypeName) {
561 std::unique_ptr<Message> data1;
562 std::unique_ptr<Message> data2;
563 if (UnpackAny(message1, &data1) && UnpackAny(message2, &data2)) {
564 // Avoid DFATAL for different descriptors in google.protobuf.Any payloads.
565 if (data1->GetDescriptor() != data2->GetDescriptor()) {
566 return false;
567 }
568 return Compare(*data1, *data2, parent_fields);
569 }
570 }
571 const Reflection* reflection1 = message1.GetReflection();
572 const Reflection* reflection2 = message2.GetReflection();
573
574 bool unknown_compare_result = true;
575 // Ignore unknown fields in EQUIVALENT mode
576 if (message_field_comparison_ != EQUIVALENT) {
577 const UnknownFieldSet& unknown_field_set1 =
578 reflection1->GetUnknownFields(message1);
579 const UnknownFieldSet& unknown_field_set2 =
580 reflection2->GetUnknownFields(message2);
581 if (!CompareUnknownFields(message1, message2, unknown_field_set1,
582 unknown_field_set2, parent_fields)) {
583 if (reporter_ == NULL) {
584 return false;
585 }
586 unknown_compare_result = false;
587 }
588 }
589
590 FieldDescriptorArray message1_fields = RetrieveFields(message1, true);
591 FieldDescriptorArray message2_fields = RetrieveFields(message2, false);
592
593 return CompareRequestedFieldsUsingSettings(
594 message1, message2,
595 message1_fields, message2_fields,
596 parent_fields) && unknown_compare_result;
597 }
598
RetrieveFields(const Message & message,bool base_message)599 FieldDescriptorArray MessageDifferencer::RetrieveFields(const Message& message,
600 bool base_message) {
601 const Descriptor* descriptor = message.GetDescriptor();
602
603 tmp_message_fields_.clear();
604 tmp_message_fields_.reserve(descriptor->field_count() + 1);
605
606 const Reflection* reflection = message.GetReflection();
607 if (descriptor->options().map_entry()) {
608 if (this->scope_ == PARTIAL && base_message) {
609 reflection->ListFields(message, &tmp_message_fields_);
610 } else {
611 // Map entry fields are always considered present.
612 for (int i = 0; i < descriptor->field_count(); i++) {
613 tmp_message_fields_.push_back(descriptor->field(i));
614 }
615 }
616 } else {
617 reflection->ListFields(message, &tmp_message_fields_);
618 }
619 // Add sentinel values to deal with the
620 // case where the number of the fields in
621 // each list are different.
622 tmp_message_fields_.push_back(nullptr);
623
624 FieldDescriptorArray message_fields(tmp_message_fields_.begin(),
625 tmp_message_fields_.end());
626
627 return message_fields;
628 }
629
CompareRequestedFieldsUsingSettings(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)630 bool MessageDifferencer::CompareRequestedFieldsUsingSettings(
631 const Message& message1, const Message& message2,
632 const FieldDescriptorArray& message1_fields,
633 const FieldDescriptorArray& message2_fields,
634 std::vector<SpecificField>* parent_fields) {
635 if (scope_ == FULL) {
636 if (message_field_comparison_ == EQUIVALENT) {
637 // We need to merge the field lists of both messages (i.e.
638 // we are merely checking for a difference in field values,
639 // rather than the addition or deletion of fields).
640 FieldDescriptorArray fields_union =
641 CombineFields(message1_fields, FULL, message2_fields, FULL);
642 return CompareWithFieldsInternal(message1, message2, fields_union,
643 fields_union, parent_fields);
644 } else {
645 // Simple equality comparison, use the unaltered field lists.
646 return CompareWithFieldsInternal(message1, message2, message1_fields,
647 message2_fields, parent_fields);
648 }
649 } else {
650 if (message_field_comparison_ == EQUIVALENT) {
651 // We use the list of fields for message1 for both messages when
652 // comparing. This way, extra fields in message2 are ignored,
653 // and missing fields in message2 use their default value.
654 return CompareWithFieldsInternal(message1, message2, message1_fields,
655 message1_fields, parent_fields);
656 } else {
657 // We need to consider the full list of fields for message1
658 // but only the intersection for message2. This way, any fields
659 // only present in message2 will be ignored, but any fields only
660 // present in message1 will be marked as a difference.
661 FieldDescriptorArray fields_intersection =
662 CombineFields(message1_fields, PARTIAL, message2_fields, PARTIAL);
663 return CompareWithFieldsInternal(message1, message2, message1_fields,
664 fields_intersection, parent_fields);
665 }
666 }
667 }
668
CombineFields(const FieldDescriptorArray & fields1,Scope fields1_scope,const FieldDescriptorArray & fields2,Scope fields2_scope)669 FieldDescriptorArray MessageDifferencer::CombineFields(
670 const FieldDescriptorArray& fields1, Scope fields1_scope,
671 const FieldDescriptorArray& fields2, Scope fields2_scope) {
672 int index1 = 0;
673 int index2 = 0;
674
675 tmp_message_fields_.clear();
676
677 while (index1 < fields1.size() && index2 < fields2.size()) {
678 const FieldDescriptor* field1 = fields1[index1];
679 const FieldDescriptor* field2 = fields2[index2];
680
681 if (FieldBefore(field1, field2)) {
682 if (fields1_scope == FULL) {
683 tmp_message_fields_.push_back(fields1[index1]);
684 }
685 ++index1;
686 } else if (FieldBefore(field2, field1)) {
687 if (fields2_scope == FULL) {
688 tmp_message_fields_.push_back(fields2[index2]);
689 }
690 ++index2;
691 } else {
692 tmp_message_fields_.push_back(fields1[index1]);
693 ++index1;
694 ++index2;
695 }
696 }
697
698 tmp_message_fields_.push_back(nullptr);
699
700 FieldDescriptorArray combined_fields(tmp_message_fields_.begin(),
701 tmp_message_fields_.end());
702
703 return combined_fields;
704 }
705
CompareWithFieldsInternal(const Message & message1,const Message & message2,const FieldDescriptorArray & message1_fields,const FieldDescriptorArray & message2_fields,std::vector<SpecificField> * parent_fields)706 bool MessageDifferencer::CompareWithFieldsInternal(
707 const Message& message1, const Message& message2,
708 const FieldDescriptorArray& message1_fields,
709 const FieldDescriptorArray& message2_fields,
710 std::vector<SpecificField>* parent_fields) {
711 bool isDifferent = false;
712 int field_index1 = 0;
713 int field_index2 = 0;
714
715 const Reflection* reflection1 = message1.GetReflection();
716 const Reflection* reflection2 = message2.GetReflection();
717
718 while (true) {
719 const FieldDescriptor* field1 = message1_fields[field_index1];
720 const FieldDescriptor* field2 = message2_fields[field_index2];
721
722 // Once we have reached sentinel values, we are done the comparison.
723 if (field1 == NULL && field2 == NULL) {
724 break;
725 }
726
727 // Check for differences in the field itself.
728 if (FieldBefore(field1, field2)) {
729 // Field 1 is not in the field list for message 2.
730 if (IsIgnored(message1, message2, field1, *parent_fields)) {
731 // We are ignoring field1. Report the ignore and move on to
732 // the next field in message1_fields.
733 if (reporter_ != NULL) {
734 SpecificField specific_field;
735 specific_field.field = field1;
736
737 parent_fields->push_back(specific_field);
738 if (report_ignores_) {
739 reporter_->ReportIgnored(message1, message2, *parent_fields);
740 }
741 parent_fields->pop_back();
742 }
743 ++field_index1;
744 continue;
745 }
746
747 if (reporter_ != NULL) {
748 assert(field1 != NULL);
749 int count = field1->is_repeated()
750 ? reflection1->FieldSize(message1, field1)
751 : 1;
752
753 for (int i = 0; i < count; ++i) {
754 SpecificField specific_field;
755 specific_field.field = field1;
756 specific_field.index = field1->is_repeated() ? i : -1;
757
758 parent_fields->push_back(specific_field);
759 reporter_->ReportDeleted(message1, message2, *parent_fields);
760 parent_fields->pop_back();
761 }
762
763 isDifferent = true;
764 } else {
765 return false;
766 }
767
768 ++field_index1;
769 continue;
770 } else if (FieldBefore(field2, field1)) {
771 // Field 2 is not in the field list for message 1.
772 if (IsIgnored(message1, message2, field2, *parent_fields)) {
773 // We are ignoring field2. Report the ignore and move on to
774 // the next field in message2_fields.
775 if (reporter_ != NULL) {
776 SpecificField specific_field;
777 specific_field.field = field2;
778
779 parent_fields->push_back(specific_field);
780 if (report_ignores_) {
781 reporter_->ReportIgnored(message1, message2, *parent_fields);
782 }
783 parent_fields->pop_back();
784 }
785 ++field_index2;
786 continue;
787 }
788
789 if (reporter_ != NULL) {
790 int count = field2->is_repeated()
791 ? reflection2->FieldSize(message2, field2)
792 : 1;
793
794 for (int i = 0; i < count; ++i) {
795 SpecificField specific_field;
796 specific_field.field = field2;
797 specific_field.index = field2->is_repeated() ? i : -1;
798 specific_field.new_index = specific_field.index;
799
800 parent_fields->push_back(specific_field);
801 reporter_->ReportAdded(message1, message2, *parent_fields);
802 parent_fields->pop_back();
803 }
804
805 isDifferent = true;
806 } else {
807 return false;
808 }
809
810 ++field_index2;
811 continue;
812 }
813
814 // By this point, field1 and field2 are guaranteed to point to the same
815 // field, so we can now compare the values.
816 if (IsIgnored(message1, message2, field1, *parent_fields)) {
817 // Ignore this field. Report and move on.
818 if (reporter_ != NULL) {
819 SpecificField specific_field;
820 specific_field.field = field1;
821
822 parent_fields->push_back(specific_field);
823 if (report_ignores_) {
824 reporter_->ReportIgnored(message1, message2, *parent_fields);
825 }
826 parent_fields->pop_back();
827 }
828
829 ++field_index1;
830 ++field_index2;
831 continue;
832 }
833
834 bool fieldDifferent = false;
835 assert(field1 != NULL);
836 if (field1->is_repeated()) {
837 fieldDifferent =
838 !CompareRepeatedField(message1, message2, field1, parent_fields);
839 if (fieldDifferent) {
840 if (reporter_ == NULL) return false;
841 isDifferent = true;
842 }
843 } else {
844 fieldDifferent = !CompareFieldValueUsingParentFields(
845 message1, message2, field1, -1, -1, parent_fields);
846
847 // If we have found differences, either report them or terminate if
848 // no reporter is present.
849 if (fieldDifferent && reporter_ == NULL) {
850 return false;
851 }
852
853 if (reporter_ != NULL) {
854 SpecificField specific_field;
855 specific_field.field = field1;
856 parent_fields->push_back(specific_field);
857 if (fieldDifferent) {
858 reporter_->ReportModified(message1, message2, *parent_fields);
859 isDifferent = true;
860 } else if (report_matches_) {
861 reporter_->ReportMatched(message1, message2, *parent_fields);
862 }
863 parent_fields->pop_back();
864 }
865 }
866 // Increment the field indices.
867 ++field_index1;
868 ++field_index2;
869 }
870
871 return !isDifferent;
872 }
873
IsMatch(const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const Message * message1,const Message * message2,const std::vector<SpecificField> & parent_fields,Reporter * reporter,int index1,int index2)874 bool MessageDifferencer::IsMatch(
875 const FieldDescriptor* repeated_field,
876 const MapKeyComparator* key_comparator, const Message* message1,
877 const Message* message2, const std::vector<SpecificField>& parent_fields,
878 Reporter* reporter, int index1, int index2) {
879 std::vector<SpecificField> current_parent_fields(parent_fields);
880 if (repeated_field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
881 return CompareFieldValueUsingParentFields(*message1, *message2,
882 repeated_field, index1, index2,
883 ¤t_parent_fields);
884 }
885 // Back up the Reporter and output_string_. They will be reset in the
886 // following code.
887 Reporter* backup_reporter = reporter_;
888 std::string* output_string = output_string_;
889 reporter_ = reporter;
890 output_string_ = NULL;
891 bool match;
892
893 if (key_comparator == NULL) {
894 match = CompareFieldValueUsingParentFields(*message1, *message2,
895 repeated_field, index1, index2,
896 ¤t_parent_fields);
897 } else {
898 const Reflection* reflection1 = message1->GetReflection();
899 const Reflection* reflection2 = message2->GetReflection();
900 const Message& m1 =
901 reflection1->GetRepeatedMessage(*message1, repeated_field, index1);
902 const Message& m2 =
903 reflection2->GetRepeatedMessage(*message2, repeated_field, index2);
904 SpecificField specific_field;
905 specific_field.field = repeated_field;
906 specific_field.index = index1;
907 specific_field.new_index = index2;
908 current_parent_fields.push_back(specific_field);
909 match = key_comparator->IsMatch(m1, m2, current_parent_fields);
910 }
911
912 reporter_ = backup_reporter;
913 output_string_ = output_string;
914 return match;
915 }
916
CompareMapFieldByMapReflection(const Message & message1,const Message & message2,const FieldDescriptor * map_field,std::vector<SpecificField> * parent_fields,DefaultFieldComparator * comparator)917 bool MessageDifferencer::CompareMapFieldByMapReflection(
918 const Message& message1, const Message& message2,
919 const FieldDescriptor* map_field, std::vector<SpecificField>* parent_fields,
920 DefaultFieldComparator* comparator) {
921 const Reflection* reflection1 = message1.GetReflection();
922 const Reflection* reflection2 = message2.GetReflection();
923 const int count1 = reflection1->MapSize(message1, map_field);
924 const int count2 = reflection2->MapSize(message2, map_field);
925 const bool treated_as_subset = IsTreatedAsSubset(map_field);
926 if (count1 != count2 && !treated_as_subset) {
927 return false;
928 }
929 if (count1 > count2) {
930 return false;
931 }
932 const FieldDescriptor* val_des = map_field->message_type()->map_value();
933 switch (val_des->cpp_type()) {
934 #define HANDLE_TYPE(CPPTYPE, METHOD, COMPAREMETHOD) \
935 case FieldDescriptor::CPPTYPE_##CPPTYPE: { \
936 for (MapIterator it = reflection1->MapBegin( \
937 const_cast<Message*>(&message1), map_field); \
938 it != \
939 reflection1->MapEnd(const_cast<Message*>(&message1), map_field); \
940 ++it) { \
941 if (!reflection2->ContainsMapKey(message2, map_field, it.GetKey())) { \
942 return false; \
943 } \
944 MapValueConstRef value2; \
945 reflection2->LookupMapValue(message2, map_field, it.GetKey(), &value2); \
946 if (!comparator->Compare##COMPAREMETHOD(*val_des, \
947 it.GetValueRef().Get##METHOD(), \
948 value2.Get##METHOD())) { \
949 return false; \
950 } \
951 } \
952 break; \
953 }
954 HANDLE_TYPE(INT32, Int32Value, Int32);
955 HANDLE_TYPE(INT64, Int64Value, Int64);
956 HANDLE_TYPE(UINT32, UInt32Value, UInt32);
957 HANDLE_TYPE(UINT64, UInt64Value, UInt64);
958 HANDLE_TYPE(DOUBLE, DoubleValue, Double);
959 HANDLE_TYPE(FLOAT, FloatValue, Float);
960 HANDLE_TYPE(BOOL, BoolValue, Bool);
961 HANDLE_TYPE(STRING, StringValue, String);
962 HANDLE_TYPE(ENUM, EnumValue, Int32);
963 #undef HANDLE_TYPE
964 case FieldDescriptor::CPPTYPE_MESSAGE: {
965 for (MapIterator it = reflection1->MapBegin(
966 const_cast<Message*>(&message1), map_field);
967 it !=
968 reflection1->MapEnd(const_cast<Message*>(&message1), map_field);
969 ++it) {
970 if (!reflection2->ContainsMapKey(message2, map_field, it.GetKey())) {
971 return false;
972 }
973 bool compare_result;
974 MapValueConstRef value2;
975 reflection2->LookupMapValue(message2, map_field, it.GetKey(), &value2);
976 // Append currently compared field to the end of parent_fields.
977 SpecificField specific_value_field;
978 specific_value_field.field = val_des;
979 parent_fields->push_back(specific_value_field);
980 compare_result = Compare(it.GetValueRef().GetMessageValue(),
981 value2.GetMessageValue(), parent_fields);
982 parent_fields->pop_back();
983 if (!compare_result) {
984 return false;
985 }
986 }
987 break;
988 }
989 }
990 return true;
991 }
992
CompareRepeatedField(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,std::vector<SpecificField> * parent_fields)993 bool MessageDifferencer::CompareRepeatedField(
994 const Message& message1, const Message& message2,
995 const FieldDescriptor* repeated_field,
996 std::vector<SpecificField>* parent_fields) {
997 // the input FieldDescriptor is guaranteed to be repeated field.
998 const Reflection* reflection1 = message1.GetReflection();
999 const Reflection* reflection2 = message2.GetReflection();
1000
1001 // When both map fields are on map, do not sync to repeated field.
1002 // TODO(jieluo): Add support for reporter
1003 if (repeated_field->is_map() && reporter_ == nullptr &&
1004 // Users didn't set custom map field key comparator
1005 map_field_key_comparator_.find(repeated_field) ==
1006 map_field_key_comparator_.end() &&
1007 // Users didn't set repeated field comparison
1008 repeated_field_comparison_ == AS_LIST) {
1009 DefaultFieldComparator* map_field_comparator =
1010 field_comparator_ ? nullptr : &default_field_comparator_;
1011 #if PROTOBUF_RTTI
1012 // Inherit class from DefaultFieldComparator can not get the benefit
1013 // because DefaultFieldComparator::Compare() method might be overwrote.
1014 if (field_comparator_ &&
1015 typeid(*field_comparator_) == typeid(default_field_comparator_)) {
1016 map_field_comparator =
1017 static_cast<DefaultFieldComparator*>(field_comparator_);
1018 }
1019 #endif
1020 if (map_field_comparator) {
1021 const FieldDescriptor* key_des =
1022 repeated_field->message_type()->map_key();
1023 const FieldDescriptor* val_des =
1024 repeated_field->message_type()->map_value();
1025 const internal::MapFieldBase* map_field1 =
1026 reflection1->GetMapData(message1, repeated_field);
1027 const internal::MapFieldBase* map_field2 =
1028 reflection2->GetMapData(message2, repeated_field);
1029 std::vector<SpecificField> current_parent_fields(*parent_fields);
1030 SpecificField specific_field;
1031 specific_field.field = repeated_field;
1032 current_parent_fields.push_back(specific_field);
1033 if (map_field1->IsMapValid() && map_field2->IsMapValid() &&
1034 !IsIgnored(message1, message2, key_des, current_parent_fields) &&
1035 !IsIgnored(message1, message2, val_des, current_parent_fields)) {
1036 return CompareMapFieldByMapReflection(
1037 message1, message2, repeated_field, ¤t_parent_fields,
1038 map_field_comparator);
1039 }
1040 }
1041 }
1042
1043 const int count1 = reflection1->FieldSize(message1, repeated_field);
1044 const int count2 = reflection2->FieldSize(message2, repeated_field);
1045 const bool treated_as_subset = IsTreatedAsSubset(repeated_field);
1046
1047 // If the field is not treated as subset and no detailed reports is needed,
1048 // we do a quick check on the number of the elements to avoid unnecessary
1049 // comparison.
1050 if (count1 != count2 && reporter_ == NULL && !treated_as_subset) {
1051 return false;
1052 }
1053 // A match can never be found if message1 has more items than message2.
1054 if (count1 > count2 && reporter_ == NULL) {
1055 return false;
1056 }
1057
1058 // These two list are used for store the index of the correspondent
1059 // element in peer repeated field.
1060 std::vector<int> match_list1;
1061 std::vector<int> match_list2;
1062
1063 const MapKeyComparator* key_comparator = GetMapKeyComparator(repeated_field);
1064 bool smart_list = IsTreatedAsSmartList(repeated_field);
1065 bool simple_list = key_comparator == nullptr &&
1066 !IsTreatedAsSet(repeated_field) &&
1067 !IsTreatedAsSmartSet(repeated_field) && !smart_list;
1068
1069 // For simple lists, we avoid matching repeated field indices, saving the
1070 // memory allocations that would otherwise be needed for match_list1 and
1071 // match_list2.
1072 if (!simple_list) {
1073 // Try to match indices of the repeated fields. Return false if match fails.
1074 if (!MatchRepeatedFieldIndices(message1, message2, repeated_field,
1075 key_comparator, *parent_fields, &match_list1,
1076 &match_list2) &&
1077 reporter_ == nullptr) {
1078 return false;
1079 }
1080 }
1081
1082 bool fieldDifferent = false;
1083 SpecificField specific_field;
1084 specific_field.field = repeated_field;
1085
1086 // At this point, we have already matched pairs of fields (with the reporting
1087 // to be done later). Now to check if the paired elements are different.
1088 int next_unmatched_index = 0;
1089 for (int i = 0; i < count1; i++) {
1090 if (simple_list && i >= count2) {
1091 break;
1092 }
1093 if (!simple_list && match_list1[i] == -1) {
1094 if (smart_list) {
1095 if (reporter_ == nullptr) return false;
1096 specific_field.index = i;
1097 parent_fields->push_back(specific_field);
1098 reporter_->ReportDeleted(message1, message2, *parent_fields);
1099 parent_fields->pop_back();
1100 fieldDifferent = true;
1101 // Use -2 to mark this element has been reported.
1102 match_list1[i] = -2;
1103 }
1104 continue;
1105 }
1106 if (smart_list) {
1107 for (int j = next_unmatched_index; j < match_list1[i]; ++j) {
1108 GOOGLE_CHECK_LE(0, j);
1109 if (reporter_ == nullptr) return false;
1110 specific_field.index = j;
1111 specific_field.new_index = j;
1112 parent_fields->push_back(specific_field);
1113 reporter_->ReportAdded(message1, message2, *parent_fields);
1114 parent_fields->pop_back();
1115 fieldDifferent = true;
1116 // Use -2 to mark this element has been reported.
1117 match_list2[j] = -2;
1118 }
1119 }
1120 specific_field.index = i;
1121 if (simple_list) {
1122 specific_field.new_index = i;
1123 } else {
1124 specific_field.new_index = match_list1[i];
1125 next_unmatched_index = match_list1[i] + 1;
1126 }
1127
1128 const bool result = CompareFieldValueUsingParentFields(
1129 message1, message2, repeated_field, i, specific_field.new_index,
1130 parent_fields);
1131
1132 // If we have found differences, either report them or terminate if
1133 // no reporter is present. Note that ReportModified, ReportMoved, and
1134 // ReportMatched are all mutually exclusive.
1135 if (!result) {
1136 if (reporter_ == NULL) return false;
1137 parent_fields->push_back(specific_field);
1138 reporter_->ReportModified(message1, message2, *parent_fields);
1139 parent_fields->pop_back();
1140 fieldDifferent = true;
1141 } else if (reporter_ != NULL &&
1142 specific_field.index != specific_field.new_index &&
1143 !specific_field.field->is_map() && report_moves_) {
1144 parent_fields->push_back(specific_field);
1145 reporter_->ReportMoved(message1, message2, *parent_fields);
1146 parent_fields->pop_back();
1147 } else if (report_matches_ && reporter_ != NULL) {
1148 parent_fields->push_back(specific_field);
1149 reporter_->ReportMatched(message1, message2, *parent_fields);
1150 parent_fields->pop_back();
1151 }
1152 }
1153
1154 // Report any remaining additions or deletions.
1155 for (int i = 0; i < count2; ++i) {
1156 if (!simple_list && match_list2[i] != -1) continue;
1157 if (simple_list && i < count1) continue;
1158 if (!treated_as_subset) {
1159 fieldDifferent = true;
1160 }
1161
1162 if (reporter_ == NULL) continue;
1163 specific_field.index = i;
1164 specific_field.new_index = i;
1165 parent_fields->push_back(specific_field);
1166 reporter_->ReportAdded(message1, message2, *parent_fields);
1167 parent_fields->pop_back();
1168 }
1169
1170 for (int i = 0; i < count1; ++i) {
1171 if (!simple_list && match_list1[i] != -1) continue;
1172 if (simple_list && i < count2) continue;
1173 assert(reporter_ != NULL);
1174 specific_field.index = i;
1175 parent_fields->push_back(specific_field);
1176 reporter_->ReportDeleted(message1, message2, *parent_fields);
1177 parent_fields->pop_back();
1178 fieldDifferent = true;
1179 }
1180 return !fieldDifferent;
1181 }
1182
CompareFieldValue(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2)1183 bool MessageDifferencer::CompareFieldValue(const Message& message1,
1184 const Message& message2,
1185 const FieldDescriptor* field,
1186 int index1, int index2) {
1187 return CompareFieldValueUsingParentFields(message1, message2, field, index1,
1188 index2, NULL);
1189 }
1190
CompareFieldValueUsingParentFields(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,std::vector<SpecificField> * parent_fields)1191 bool MessageDifferencer::CompareFieldValueUsingParentFields(
1192 const Message& message1, const Message& message2,
1193 const FieldDescriptor* field, int index1, int index2,
1194 std::vector<SpecificField>* parent_fields) {
1195 FieldContext field_context(parent_fields);
1196 FieldComparator::ComparisonResult result = GetFieldComparisonResult(
1197 message1, message2, field, index1, index2, &field_context);
1198
1199 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE &&
1200 result == FieldComparator::RECURSE) {
1201 // Get the nested messages and compare them using one of the Compare
1202 // methods.
1203 const Reflection* reflection1 = message1.GetReflection();
1204 const Reflection* reflection2 = message2.GetReflection();
1205 const Message& m1 =
1206 field->is_repeated()
1207 ? reflection1->GetRepeatedMessage(message1, field, index1)
1208 : reflection1->GetMessage(message1, field);
1209 const Message& m2 =
1210 field->is_repeated()
1211 ? reflection2->GetRepeatedMessage(message2, field, index2)
1212 : reflection2->GetMessage(message2, field);
1213
1214 // parent_fields is used in calls to Reporter methods.
1215 if (parent_fields != NULL) {
1216 // Append currently compared field to the end of parent_fields.
1217 SpecificField specific_field;
1218 specific_field.field = field;
1219 specific_field.index = index1;
1220 specific_field.new_index = index2;
1221 parent_fields->push_back(specific_field);
1222 const bool compare_result = Compare(m1, m2, parent_fields);
1223 parent_fields->pop_back();
1224 return compare_result;
1225 } else {
1226 // Recreates parent_fields as if m1 and m2 had no parents.
1227 return Compare(m1, m2);
1228 }
1229 } else {
1230 return (result == FieldComparator::SAME);
1231 }
1232 }
1233
CheckPathChanged(const std::vector<SpecificField> & field_path)1234 bool MessageDifferencer::CheckPathChanged(
1235 const std::vector<SpecificField>& field_path) {
1236 for (const SpecificField& specific_field : field_path) {
1237 // Don't check indexes for map entries -- maps are unordered.
1238 if (specific_field.field != nullptr && specific_field.field->is_map())
1239 continue;
1240 if (specific_field.index != specific_field.new_index) return true;
1241 }
1242 return false;
1243 }
1244
IsTreatedAsSet(const FieldDescriptor * field)1245 bool MessageDifferencer::IsTreatedAsSet(const FieldDescriptor* field) {
1246 if (!field->is_repeated()) return false;
1247 if (repeated_field_comparisons_.find(field) !=
1248 repeated_field_comparisons_.end()) {
1249 return repeated_field_comparisons_[field] == AS_SET;
1250 }
1251 return GetMapKeyComparator(field) == nullptr &&
1252 repeated_field_comparison_ == AS_SET;
1253 }
1254
IsTreatedAsSmartSet(const FieldDescriptor * field)1255 bool MessageDifferencer::IsTreatedAsSmartSet(const FieldDescriptor* field) {
1256 if (!field->is_repeated()) return false;
1257 if (repeated_field_comparisons_.find(field) !=
1258 repeated_field_comparisons_.end()) {
1259 return repeated_field_comparisons_[field] == AS_SMART_SET;
1260 }
1261 return GetMapKeyComparator(field) == nullptr &&
1262 repeated_field_comparison_ == AS_SMART_SET;
1263 }
1264
IsTreatedAsSmartList(const FieldDescriptor * field)1265 bool MessageDifferencer::IsTreatedAsSmartList(const FieldDescriptor* field) {
1266 if (!field->is_repeated()) return false;
1267 if (repeated_field_comparisons_.find(field) !=
1268 repeated_field_comparisons_.end()) {
1269 return repeated_field_comparisons_[field] == AS_SMART_LIST;
1270 }
1271 return GetMapKeyComparator(field) == nullptr &&
1272 repeated_field_comparison_ == AS_SMART_LIST;
1273 }
1274
IsTreatedAsSubset(const FieldDescriptor * field)1275 bool MessageDifferencer::IsTreatedAsSubset(const FieldDescriptor* field) {
1276 return scope_ == PARTIAL &&
1277 (IsTreatedAsSet(field) || GetMapKeyComparator(field) != NULL);
1278 }
1279
IsIgnored(const Message & message1,const Message & message2,const FieldDescriptor * field,const std::vector<SpecificField> & parent_fields)1280 bool MessageDifferencer::IsIgnored(
1281 const Message& message1, const Message& message2,
1282 const FieldDescriptor* field,
1283 const std::vector<SpecificField>& parent_fields) {
1284 if (ignored_fields_.find(field) != ignored_fields_.end()) {
1285 return true;
1286 }
1287 for (IgnoreCriteria* criteria : ignore_criteria_) {
1288 if (criteria->IsIgnored(message1, message2, field, parent_fields)) {
1289 return true;
1290 }
1291 }
1292 return false;
1293 }
1294
IsUnknownFieldIgnored(const Message & message1,const Message & message2,const SpecificField & field,const std::vector<SpecificField> & parent_fields)1295 bool MessageDifferencer::IsUnknownFieldIgnored(
1296 const Message& message1, const Message& message2,
1297 const SpecificField& field,
1298 const std::vector<SpecificField>& parent_fields) {
1299 for (IgnoreCriteria* criteria : ignore_criteria_) {
1300 if (criteria->IsUnknownFieldIgnored(message1, message2, field,
1301 parent_fields)) {
1302 return true;
1303 }
1304 }
1305 return false;
1306 }
1307
1308 const MessageDifferencer::MapKeyComparator*
GetMapKeyComparator(const FieldDescriptor * field) const1309 MessageDifferencer ::GetMapKeyComparator(const FieldDescriptor* field) const {
1310 if (!field->is_repeated()) return NULL;
1311 FieldKeyComparatorMap::const_iterator it =
1312 map_field_key_comparator_.find(field);
1313 if (it != map_field_key_comparator_.end()) {
1314 return it->second;
1315 }
1316 if (field->is_map()) {
1317 // field cannot already be treated as list or set since TreatAsList() and
1318 // TreatAsSet() call GetMapKeyComparator() and fail if it returns non-NULL.
1319 return &map_entry_key_comparator_;
1320 }
1321 return NULL;
1322 }
1323
1324 namespace {
1325
1326 typedef std::pair<int, const UnknownField*> IndexUnknownFieldPair;
1327
1328 struct UnknownFieldOrdering {
operator ()google::protobuf::util::__anon0b44c3070111::UnknownFieldOrdering1329 inline bool operator()(const IndexUnknownFieldPair& a,
1330 const IndexUnknownFieldPair& b) const {
1331 if (a.second->number() < b.second->number()) return true;
1332 if (a.second->number() > b.second->number()) return false;
1333 return a.second->type() < b.second->type();
1334 }
1335 };
1336
1337 } // namespace
1338
UnpackAny(const Message & any,std::unique_ptr<Message> * data)1339 bool MessageDifferencer::UnpackAny(const Message& any,
1340 std::unique_ptr<Message>* data) {
1341 const Reflection* reflection = any.GetReflection();
1342 const FieldDescriptor* type_url_field;
1343 const FieldDescriptor* value_field;
1344 if (!internal::GetAnyFieldDescriptors(any, &type_url_field, &value_field)) {
1345 return false;
1346 }
1347 const std::string& type_url = reflection->GetString(any, type_url_field);
1348 std::string full_type_name;
1349 if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1350 return false;
1351 }
1352
1353 const Descriptor* desc =
1354 any.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1355 full_type_name);
1356 if (desc == NULL) {
1357 GOOGLE_DLOG(ERROR) << "Proto type '" << full_type_name << "' not found";
1358 return false;
1359 }
1360
1361 if (dynamic_message_factory_ == NULL) {
1362 dynamic_message_factory_.reset(new DynamicMessageFactory());
1363 }
1364 data->reset(dynamic_message_factory_->GetPrototype(desc)->New());
1365 std::string serialized_value = reflection->GetString(any, value_field);
1366 if (!(*data)->ParsePartialFromString(serialized_value)) {
1367 GOOGLE_DLOG(ERROR) << "Failed to parse value for " << full_type_name;
1368 return false;
1369 }
1370 return true;
1371 }
1372
CompareUnknownFields(const Message & message1,const Message & message2,const UnknownFieldSet & unknown_field_set1,const UnknownFieldSet & unknown_field_set2,std::vector<SpecificField> * parent_field)1373 bool MessageDifferencer::CompareUnknownFields(
1374 const Message& message1, const Message& message2,
1375 const UnknownFieldSet& unknown_field_set1,
1376 const UnknownFieldSet& unknown_field_set2,
1377 std::vector<SpecificField>* parent_field) {
1378 // Ignore unknown fields in EQUIVALENT mode.
1379 if (message_field_comparison_ == EQUIVALENT) return true;
1380
1381 if (unknown_field_set1.empty() && unknown_field_set2.empty()) {
1382 return true;
1383 }
1384
1385 bool is_different = false;
1386
1387 // We first sort the unknown fields by field number and type (in other words,
1388 // in tag order), making sure to preserve ordering of values with the same
1389 // tag. This allows us to report only meaningful differences between the
1390 // two sets -- that is, differing values for the same tag. We use
1391 // IndexUnknownFieldPairs to keep track of the field's original index for
1392 // reporting purposes.
1393 std::vector<IndexUnknownFieldPair> fields1; // unknown_field_set1, sorted
1394 std::vector<IndexUnknownFieldPair> fields2; // unknown_field_set2, sorted
1395 fields1.reserve(unknown_field_set1.field_count());
1396 fields2.reserve(unknown_field_set2.field_count());
1397
1398 for (int i = 0; i < unknown_field_set1.field_count(); i++) {
1399 fields1.push_back(std::make_pair(i, &unknown_field_set1.field(i)));
1400 }
1401 for (int i = 0; i < unknown_field_set2.field_count(); i++) {
1402 fields2.push_back(std::make_pair(i, &unknown_field_set2.field(i)));
1403 }
1404
1405 UnknownFieldOrdering is_before;
1406 std::stable_sort(fields1.begin(), fields1.end(), is_before);
1407 std::stable_sort(fields2.begin(), fields2.end(), is_before);
1408
1409 // In order to fill in SpecificField::index, we have to keep track of how
1410 // many values we've seen with the same field number and type.
1411 // current_repeated points at the first field in this range, and
1412 // current_repeated_start{1,2} are the indexes of the first field in the
1413 // range within fields1 and fields2.
1414 const UnknownField* current_repeated = NULL;
1415 int current_repeated_start1 = 0;
1416 int current_repeated_start2 = 0;
1417
1418 // Now that we have two sorted lists, we can detect fields which appear only
1419 // in one list or the other by traversing them simultaneously.
1420 int index1 = 0;
1421 int index2 = 0;
1422 while (index1 < fields1.size() || index2 < fields2.size()) {
1423 enum {
1424 ADDITION,
1425 DELETION,
1426 MODIFICATION,
1427 COMPARE_GROUPS,
1428 NO_CHANGE
1429 } change_type;
1430
1431 // focus_field is the field we're currently reporting on. (In the case
1432 // of a modification, it's the field on the left side.)
1433 const UnknownField* focus_field;
1434 bool match = false;
1435
1436 if (index2 == fields2.size() ||
1437 (index1 < fields1.size() &&
1438 is_before(fields1[index1], fields2[index2]))) {
1439 // fields1[index1] is not present in fields2.
1440 change_type = DELETION;
1441 focus_field = fields1[index1].second;
1442 } else if (index1 == fields1.size() ||
1443 is_before(fields2[index2], fields1[index1])) {
1444 // fields2[index2] is not present in fields1.
1445 if (scope_ == PARTIAL) {
1446 // Ignore.
1447 ++index2;
1448 continue;
1449 }
1450 change_type = ADDITION;
1451 focus_field = fields2[index2].second;
1452 } else {
1453 // Field type and number are the same. See if the values differ.
1454 change_type = MODIFICATION;
1455 focus_field = fields1[index1].second;
1456
1457 switch (focus_field->type()) {
1458 case UnknownField::TYPE_VARINT:
1459 match = fields1[index1].second->varint() ==
1460 fields2[index2].second->varint();
1461 break;
1462 case UnknownField::TYPE_FIXED32:
1463 match = fields1[index1].second->fixed32() ==
1464 fields2[index2].second->fixed32();
1465 break;
1466 case UnknownField::TYPE_FIXED64:
1467 match = fields1[index1].second->fixed64() ==
1468 fields2[index2].second->fixed64();
1469 break;
1470 case UnknownField::TYPE_LENGTH_DELIMITED:
1471 match = fields1[index1].second->length_delimited() ==
1472 fields2[index2].second->length_delimited();
1473 break;
1474 case UnknownField::TYPE_GROUP:
1475 // We must deal with this later, after building the SpecificField.
1476 change_type = COMPARE_GROUPS;
1477 break;
1478 }
1479 if (match && change_type != COMPARE_GROUPS) {
1480 change_type = NO_CHANGE;
1481 }
1482 }
1483
1484 if (current_repeated == NULL ||
1485 focus_field->number() != current_repeated->number() ||
1486 focus_field->type() != current_repeated->type()) {
1487 // We've started a new repeated field.
1488 current_repeated = focus_field;
1489 current_repeated_start1 = index1;
1490 current_repeated_start2 = index2;
1491 }
1492
1493 if (change_type == NO_CHANGE && reporter_ == NULL) {
1494 // Fields were already compared and matched and we have no reporter.
1495 ++index1;
1496 ++index2;
1497 continue;
1498 }
1499
1500 // Build the SpecificField. This is slightly complicated.
1501 SpecificField specific_field;
1502 specific_field.unknown_field_number = focus_field->number();
1503 specific_field.unknown_field_type = focus_field->type();
1504
1505 specific_field.unknown_field_set1 = &unknown_field_set1;
1506 specific_field.unknown_field_set2 = &unknown_field_set2;
1507
1508 if (change_type != ADDITION) {
1509 specific_field.unknown_field_index1 = fields1[index1].first;
1510 }
1511 if (change_type != DELETION) {
1512 specific_field.unknown_field_index2 = fields2[index2].first;
1513 }
1514
1515 // Calculate the field index.
1516 if (change_type == ADDITION) {
1517 specific_field.index = index2 - current_repeated_start2;
1518 specific_field.new_index = index2 - current_repeated_start2;
1519 } else {
1520 specific_field.index = index1 - current_repeated_start1;
1521 specific_field.new_index = index2 - current_repeated_start2;
1522 }
1523
1524 if (IsUnknownFieldIgnored(message1, message2, specific_field,
1525 *parent_field)) {
1526 if (reporter_ != NULL) {
1527 parent_field->push_back(specific_field);
1528 reporter_->ReportUnknownFieldIgnored(message1, message2, *parent_field);
1529 parent_field->pop_back();
1530 }
1531 return true;
1532 }
1533
1534 if (change_type == ADDITION || change_type == DELETION ||
1535 change_type == MODIFICATION) {
1536 if (reporter_ == NULL) {
1537 // We found a difference and we have no reporter.
1538 return false;
1539 }
1540 is_different = true;
1541 }
1542
1543 parent_field->push_back(specific_field);
1544
1545 switch (change_type) {
1546 case ADDITION:
1547 reporter_->ReportAdded(message1, message2, *parent_field);
1548 ++index2;
1549 break;
1550 case DELETION:
1551 reporter_->ReportDeleted(message1, message2, *parent_field);
1552 ++index1;
1553 break;
1554 case MODIFICATION:
1555 reporter_->ReportModified(message1, message2, *parent_field);
1556 ++index1;
1557 ++index2;
1558 break;
1559 case COMPARE_GROUPS:
1560 if (!CompareUnknownFields(
1561 message1, message2, fields1[index1].second->group(),
1562 fields2[index2].second->group(), parent_field)) {
1563 if (reporter_ == NULL) return false;
1564 is_different = true;
1565 reporter_->ReportModified(message1, message2, *parent_field);
1566 }
1567 ++index1;
1568 ++index2;
1569 break;
1570 case NO_CHANGE:
1571 ++index1;
1572 ++index2;
1573 if (report_matches_) {
1574 reporter_->ReportMatched(message1, message2, *parent_field);
1575 }
1576 }
1577
1578 parent_field->pop_back();
1579 }
1580
1581 return !is_different;
1582 }
1583
1584 namespace {
1585
1586 // Find maximum bipartite matching using the argumenting path algorithm.
1587 class MaximumMatcher {
1588 public:
1589 typedef std::function<bool(int, int)> NodeMatchCallback;
1590 // MaximumMatcher takes ownership of the passed in callback and uses it to
1591 // determine whether a node on the left side of the bipartial graph matches
1592 // a node on the right side. count1 is the number of nodes on the left side
1593 // of the graph and count2 to is the number of nodes on the right side.
1594 // Every node is referred to using 0-based indices.
1595 // If a maximum match is found, the result will be stored in match_list1 and
1596 // match_list2. match_list1[i] == j means the i-th node on the left side is
1597 // matched to the j-th node on the right side and match_list2[x] == y means
1598 // the x-th node on the right side is matched to y-th node on the left side.
1599 // match_list1[i] == -1 means the node is not matched. Same with match_list2.
1600 MaximumMatcher(int count1, int count2, NodeMatchCallback callback,
1601 std::vector<int>* match_list1, std::vector<int>* match_list2);
1602 // Find a maximum match and return the number of matched node pairs.
1603 // If early_return is true, this method will return 0 immediately when it
1604 // finds that not all nodes on the left side can be matched.
1605 int FindMaximumMatch(bool early_return);
1606
1607 private:
1608 // Determines whether the node on the left side of the bipartial graph
1609 // matches the one on the right side.
1610 bool Match(int left, int right);
1611 // Find an argumenting path starting from the node v on the left side. If a
1612 // path can be found, update match_list2_ to reflect the path and return
1613 // true.
1614 bool FindArgumentPathDFS(int v, std::vector<bool>* visited);
1615
1616 int count1_;
1617 int count2_;
1618 NodeMatchCallback match_callback_;
1619 std::map<std::pair<int, int>, bool> cached_match_results_;
1620 std::vector<int>* match_list1_;
1621 std::vector<int>* match_list2_;
1622 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MaximumMatcher);
1623 };
1624
MaximumMatcher(int count1,int count2,NodeMatchCallback callback,std::vector<int> * match_list1,std::vector<int> * match_list2)1625 MaximumMatcher::MaximumMatcher(int count1, int count2,
1626 NodeMatchCallback callback,
1627 std::vector<int>* match_list1,
1628 std::vector<int>* match_list2)
1629 : count1_(count1),
1630 count2_(count2),
1631 match_callback_(std::move(callback)),
1632 match_list1_(match_list1),
1633 match_list2_(match_list2) {
1634 match_list1_->assign(count1, -1);
1635 match_list2_->assign(count2, -1);
1636 }
1637
FindMaximumMatch(bool early_return)1638 int MaximumMatcher::FindMaximumMatch(bool early_return) {
1639 int result = 0;
1640 for (int i = 0; i < count1_; ++i) {
1641 std::vector<bool> visited(count1_);
1642 if (FindArgumentPathDFS(i, &visited)) {
1643 ++result;
1644 } else if (early_return) {
1645 return 0;
1646 }
1647 }
1648 // Backfill match_list1_ as we only filled match_list2_ when finding
1649 // argumenting paths.
1650 for (int i = 0; i < count2_; ++i) {
1651 if ((*match_list2_)[i] != -1) {
1652 (*match_list1_)[(*match_list2_)[i]] = i;
1653 }
1654 }
1655 return result;
1656 }
1657
Match(int left,int right)1658 bool MaximumMatcher::Match(int left, int right) {
1659 std::pair<int, int> p(left, right);
1660 std::map<std::pair<int, int>, bool>::iterator it =
1661 cached_match_results_.find(p);
1662 if (it != cached_match_results_.end()) {
1663 return it->second;
1664 }
1665 cached_match_results_[p] = match_callback_(left, right);
1666 return cached_match_results_[p];
1667 }
1668
FindArgumentPathDFS(int v,std::vector<bool> * visited)1669 bool MaximumMatcher::FindArgumentPathDFS(int v, std::vector<bool>* visited) {
1670 (*visited)[v] = true;
1671 // We try to match those un-matched nodes on the right side first. This is
1672 // the step that the naive greedy matching algorithm uses. In the best cases
1673 // where the greedy algorithm can find a maximum matching, we will always
1674 // find a match in this step and the performance will be identical to the
1675 // greedy algorithm.
1676 for (int i = 0; i < count2_; ++i) {
1677 int matched = (*match_list2_)[i];
1678 if (matched == -1 && Match(v, i)) {
1679 (*match_list2_)[i] = v;
1680 return true;
1681 }
1682 }
1683 // Then we try those already matched nodes and see if we can find an
1684 // alternative match for the node matched to them.
1685 // The greedy algorithm will stop before this and fail to produce the
1686 // correct result.
1687 for (int i = 0; i < count2_; ++i) {
1688 int matched = (*match_list2_)[i];
1689 if (matched != -1 && Match(v, i)) {
1690 if (!(*visited)[matched] && FindArgumentPathDFS(matched, visited)) {
1691 (*match_list2_)[i] = v;
1692 return true;
1693 }
1694 }
1695 }
1696 return false;
1697 }
1698
1699 } // namespace
1700
MatchRepeatedFieldIndices(const Message & message1,const Message & message2,const FieldDescriptor * repeated_field,const MapKeyComparator * key_comparator,const std::vector<SpecificField> & parent_fields,std::vector<int> * match_list1,std::vector<int> * match_list2)1701 bool MessageDifferencer::MatchRepeatedFieldIndices(
1702 const Message& message1, const Message& message2,
1703 const FieldDescriptor* repeated_field,
1704 const MapKeyComparator* key_comparator,
1705 const std::vector<SpecificField>& parent_fields,
1706 std::vector<int>* match_list1, std::vector<int>* match_list2) {
1707 const int count1 =
1708 message1.GetReflection()->FieldSize(message1, repeated_field);
1709 const int count2 =
1710 message2.GetReflection()->FieldSize(message2, repeated_field);
1711 const bool is_treated_as_smart_set = IsTreatedAsSmartSet(repeated_field);
1712
1713 match_list1->assign(count1, -1);
1714 match_list2->assign(count2, -1);
1715 // Ensure that we don't report differences during the matching process. Since
1716 // field comparators could potentially use this message differencer object to
1717 // perform further comparisons, turn off reporting here and re-enable it
1718 // before returning.
1719 Reporter* reporter = reporter_;
1720 reporter_ = NULL;
1721 NumDiffsReporter num_diffs_reporter;
1722 std::vector<int32> num_diffs_list1;
1723 if (is_treated_as_smart_set) {
1724 num_diffs_list1.assign(count1, kint32max);
1725 }
1726
1727 bool success = true;
1728 // Find potential match if this is a special repeated field.
1729 if (scope_ == PARTIAL) {
1730 // When partial matching is enabled, Compare(a, b) && Compare(a, c)
1731 // doesn't necessarily imply Compare(b, c). Therefore a naive greedy
1732 // algorithm will fail to find a maximum matching.
1733 // Here we use the augmenting path algorithm.
1734 auto callback = [&](int i1, int i2) {
1735 return IsMatch(repeated_field, key_comparator, &message1, &message2,
1736 parent_fields, nullptr, i1, i2);
1737 };
1738 MaximumMatcher matcher(count1, count2, std::move(callback), match_list1,
1739 match_list2);
1740 // If diff info is not needed, we should end the matching process as
1741 // soon as possible if not all items can be matched.
1742 bool early_return = (reporter == nullptr);
1743 int match_count = matcher.FindMaximumMatch(early_return);
1744 if (match_count != count1 && early_return) return false;
1745 success = success && (match_count == count1);
1746 } else {
1747 int start_offset = 0;
1748 // If the two repeated fields are treated as sets, optimize for the case
1749 // where both start with same items stored in the same order.
1750 if (IsTreatedAsSet(repeated_field) || is_treated_as_smart_set ||
1751 IsTreatedAsSmartList(repeated_field)) {
1752 start_offset = std::min(count1, count2);
1753 for (int i = 0; i < count1 && i < count2; i++) {
1754 if (IsMatch(repeated_field, key_comparator, &message1, &message2,
1755 parent_fields, nullptr, i, i)) {
1756 match_list1->at(i) = i;
1757 match_list2->at(i) = i;
1758 } else {
1759 start_offset = i;
1760 break;
1761 }
1762 }
1763 }
1764 for (int i = start_offset; i < count1; ++i) {
1765 // Indicates any matched elements for this repeated field.
1766 bool match = false;
1767 int matched_j = -1;
1768
1769 for (int j = start_offset; j < count2; j++) {
1770 if (match_list2->at(j) != -1) {
1771 if (!is_treated_as_smart_set || num_diffs_list1[i] == 0 ||
1772 num_diffs_list1[match_list2->at(j)] == 0) {
1773 continue;
1774 }
1775 }
1776
1777 if (is_treated_as_smart_set) {
1778 num_diffs_reporter.Reset();
1779 match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1780 parent_fields, &num_diffs_reporter, i, j);
1781 } else {
1782 match = IsMatch(repeated_field, key_comparator, &message1, &message2,
1783 parent_fields, nullptr, i, j);
1784 }
1785
1786 if (is_treated_as_smart_set) {
1787 if (match) {
1788 num_diffs_list1[i] = 0;
1789 } else if (repeated_field->cpp_type() ==
1790 FieldDescriptor::CPPTYPE_MESSAGE) {
1791 // Replace with the one with fewer diffs.
1792 const int32 num_diffs = num_diffs_reporter.GetNumDiffs();
1793 if (num_diffs < num_diffs_list1[i]) {
1794 // If j has been already matched to some element, ensure the
1795 // current num_diffs is smaller.
1796 if (match_list2->at(j) == -1 ||
1797 num_diffs < num_diffs_list1[match_list2->at(j)]) {
1798 num_diffs_list1[i] = num_diffs;
1799 match = true;
1800 }
1801 }
1802 }
1803 }
1804
1805 if (match) {
1806 matched_j = j;
1807 if (!is_treated_as_smart_set || num_diffs_list1[i] == 0) {
1808 break;
1809 }
1810 }
1811 }
1812
1813 match = (matched_j != -1);
1814 if (match) {
1815 if (is_treated_as_smart_set && match_list2->at(matched_j) != -1) {
1816 // This is to revert the previously matched index in list2.
1817 match_list1->at(match_list2->at(matched_j)) = -1;
1818 match = false;
1819 }
1820 match_list1->at(i) = matched_j;
1821 match_list2->at(matched_j) = i;
1822 }
1823 if (!match && reporter == nullptr) return false;
1824 success = success && match;
1825 }
1826 }
1827
1828 if (IsTreatedAsSmartList(repeated_field)) {
1829 match_indices_for_smart_list_callback_(match_list1, match_list2);
1830 }
1831
1832 reporter_ = reporter;
1833
1834 return success;
1835 }
1836
GetFieldComparisonResult(const Message & message1,const Message & message2,const FieldDescriptor * field,int index1,int index2,const FieldContext * field_context)1837 FieldComparator::ComparisonResult MessageDifferencer::GetFieldComparisonResult(
1838 const Message& message1, const Message& message2,
1839 const FieldDescriptor* field, int index1, int index2,
1840 const FieldContext* field_context) {
1841 FieldComparator* comparator = field_comparator_ != NULL
1842 ? field_comparator_
1843 : &default_field_comparator_;
1844 return comparator->Compare(message1, message2, field, index1, index2,
1845 field_context);
1846 }
1847
1848 // ===========================================================================
1849
Reporter()1850 MessageDifferencer::Reporter::Reporter() {}
~Reporter()1851 MessageDifferencer::Reporter::~Reporter() {}
1852
1853 // ===========================================================================
1854
MapKeyComparator()1855 MessageDifferencer::MapKeyComparator::MapKeyComparator() {}
~MapKeyComparator()1856 MessageDifferencer::MapKeyComparator::~MapKeyComparator() {}
1857
1858 // ===========================================================================
1859
IgnoreCriteria()1860 MessageDifferencer::IgnoreCriteria::IgnoreCriteria() {}
~IgnoreCriteria()1861 MessageDifferencer::IgnoreCriteria::~IgnoreCriteria() {}
1862
1863 // ===========================================================================
1864
1865 // Note that the printer's delimiter is not used, because if we are given a
1866 // printer, we don't know its delimiter.
StreamReporter(io::ZeroCopyOutputStream * output)1867 MessageDifferencer::StreamReporter::StreamReporter(
1868 io::ZeroCopyOutputStream* output)
1869 : printer_(new io::Printer(output, '$')),
1870 delete_printer_(true),
1871 report_modified_aggregates_(false) {}
1872
StreamReporter(io::Printer * printer)1873 MessageDifferencer::StreamReporter::StreamReporter(io::Printer* printer)
1874 : printer_(printer),
1875 delete_printer_(false),
1876 report_modified_aggregates_(false) {}
1877
~StreamReporter()1878 MessageDifferencer::StreamReporter::~StreamReporter() {
1879 if (delete_printer_) delete printer_;
1880 }
1881
PrintPath(const std::vector<SpecificField> & field_path,bool left_side)1882 void MessageDifferencer::StreamReporter::PrintPath(
1883 const std::vector<SpecificField>& field_path, bool left_side) {
1884 for (int i = 0; i < field_path.size(); ++i) {
1885 if (i > 0) {
1886 printer_->Print(".");
1887 }
1888
1889 SpecificField specific_field = field_path[i];
1890
1891 if (specific_field.field != NULL) {
1892 if (specific_field.field->is_extension()) {
1893 printer_->Print("($name$)", "name", specific_field.field->full_name());
1894 } else {
1895 printer_->PrintRaw(specific_field.field->name());
1896 }
1897 if (specific_field.field->is_map()) {
1898 // Don't print index in a map field; they are semantically unordered.
1899 continue;
1900 }
1901 } else {
1902 printer_->PrintRaw(StrCat(specific_field.unknown_field_number));
1903 }
1904 if (left_side && specific_field.index >= 0) {
1905 printer_->Print("[$name$]", "name", StrCat(specific_field.index));
1906 }
1907 if (!left_side && specific_field.new_index >= 0) {
1908 printer_->Print("[$name$]", "name",
1909 StrCat(specific_field.new_index));
1910 }
1911 }
1912 }
1913
PrintPath(const std::vector<SpecificField> & field_path,bool left_side,const Message & message)1914 void MessageDifferencer::StreamReporter::PrintPath(
1915 const std::vector<SpecificField>& field_path, bool left_side,
1916 const Message& message) {
1917 PrintPath(field_path, left_side);
1918 }
1919
PrintValue(const Message & message,const std::vector<SpecificField> & field_path,bool left_side)1920 void MessageDifferencer::StreamReporter::PrintValue(
1921 const Message& message, const std::vector<SpecificField>& field_path,
1922 bool left_side) {
1923 const SpecificField& specific_field = field_path.back();
1924 const FieldDescriptor* field = specific_field.field;
1925 if (field != NULL) {
1926 std::string output;
1927 int index = left_side ? specific_field.index : specific_field.new_index;
1928 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1929 const Reflection* reflection = message.GetReflection();
1930 const Message& field_message =
1931 field->is_repeated()
1932 ? reflection->GetRepeatedMessage(message, field, index)
1933 : reflection->GetMessage(message, field);
1934 output = field_message.ShortDebugString();
1935 if (output.empty()) {
1936 printer_->Print("{ }");
1937 } else {
1938 printer_->Print("{ $name$ }", "name", output);
1939 }
1940 } else {
1941 TextFormat::PrintFieldValueToString(message, field, index, &output);
1942 printer_->PrintRaw(output);
1943 }
1944 } else {
1945 const UnknownFieldSet* unknown_fields =
1946 (left_side ? specific_field.unknown_field_set1
1947 : specific_field.unknown_field_set2);
1948 const UnknownField* unknown_field =
1949 &unknown_fields->field(left_side ? specific_field.unknown_field_index1
1950 : specific_field.unknown_field_index2);
1951 PrintUnknownFieldValue(unknown_field);
1952 }
1953 }
1954
PrintUnknownFieldValue(const UnknownField * unknown_field)1955 void MessageDifferencer::StreamReporter::PrintUnknownFieldValue(
1956 const UnknownField* unknown_field) {
1957 GOOGLE_CHECK(unknown_field != NULL) << " Cannot print NULL unknown_field.";
1958
1959 std::string output;
1960 switch (unknown_field->type()) {
1961 case UnknownField::TYPE_VARINT:
1962 output = StrCat(unknown_field->varint());
1963 break;
1964 case UnknownField::TYPE_FIXED32:
1965 output = StrCat(
1966 "0x", strings::Hex(unknown_field->fixed32(), strings::ZERO_PAD_8));
1967 break;
1968 case UnknownField::TYPE_FIXED64:
1969 output = StrCat(
1970 "0x", strings::Hex(unknown_field->fixed64(), strings::ZERO_PAD_16));
1971 break;
1972 case UnknownField::TYPE_LENGTH_DELIMITED:
1973 output = StringPrintf(
1974 "\"%s\"", CEscape(unknown_field->length_delimited()).c_str());
1975 break;
1976 case UnknownField::TYPE_GROUP:
1977 // TODO(kenton): Print the contents of the group like we do for
1978 // messages. Requires an equivalent of ShortDebugString() for
1979 // UnknownFieldSet.
1980 output = "{ ... }";
1981 break;
1982 }
1983 printer_->PrintRaw(output);
1984 }
1985
Print(const std::string & str)1986 void MessageDifferencer::StreamReporter::Print(const std::string& str) {
1987 printer_->Print(str.c_str());
1988 }
1989
ReportAdded(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)1990 void MessageDifferencer::StreamReporter::ReportAdded(
1991 const Message& message1, const Message& message2,
1992 const std::vector<SpecificField>& field_path) {
1993 printer_->Print("added: ");
1994 PrintPath(field_path, false, message2);
1995 printer_->Print(": ");
1996 PrintValue(message2, field_path, false);
1997 printer_->Print("\n"); // Print for newlines.
1998 }
1999
ReportDeleted(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2000 void MessageDifferencer::StreamReporter::ReportDeleted(
2001 const Message& message1, const Message& message2,
2002 const std::vector<SpecificField>& field_path) {
2003 printer_->Print("deleted: ");
2004 PrintPath(field_path, true, message1);
2005 printer_->Print(": ");
2006 PrintValue(message1, field_path, true);
2007 printer_->Print("\n"); // Print for newlines
2008 }
2009
ReportModified(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2010 void MessageDifferencer::StreamReporter::ReportModified(
2011 const Message& message1, const Message& message2,
2012 const std::vector<SpecificField>& field_path) {
2013 if (!report_modified_aggregates_ && field_path.back().field == NULL) {
2014 if (field_path.back().unknown_field_type == UnknownField::TYPE_GROUP) {
2015 // Any changes to the subfields have already been printed.
2016 return;
2017 }
2018 } else if (!report_modified_aggregates_) {
2019 if (field_path.back().field->cpp_type() ==
2020 FieldDescriptor::CPPTYPE_MESSAGE) {
2021 // Any changes to the subfields have already been printed.
2022 return;
2023 }
2024 }
2025
2026 printer_->Print("modified: ");
2027 PrintPath(field_path, true, message1);
2028 if (CheckPathChanged(field_path)) {
2029 printer_->Print(" -> ");
2030 PrintPath(field_path, false, message2);
2031 }
2032 printer_->Print(": ");
2033 PrintValue(message1, field_path, true);
2034 printer_->Print(" -> ");
2035 PrintValue(message2, field_path, false);
2036 printer_->Print("\n"); // Print for newlines.
2037 }
2038
ReportMoved(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2039 void MessageDifferencer::StreamReporter::ReportMoved(
2040 const Message& message1, const Message& message2,
2041 const std::vector<SpecificField>& field_path) {
2042 printer_->Print("moved: ");
2043 PrintPath(field_path, true, message1);
2044 printer_->Print(" -> ");
2045 PrintPath(field_path, false, message2);
2046 printer_->Print(" : ");
2047 PrintValue(message1, field_path, true);
2048 printer_->Print("\n"); // Print for newlines.
2049 }
2050
ReportMatched(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2051 void MessageDifferencer::StreamReporter::ReportMatched(
2052 const Message& message1, const Message& message2,
2053 const std::vector<SpecificField>& field_path) {
2054 printer_->Print("matched: ");
2055 PrintPath(field_path, true, message1);
2056 if (CheckPathChanged(field_path)) {
2057 printer_->Print(" -> ");
2058 PrintPath(field_path, false, message2);
2059 }
2060 printer_->Print(" : ");
2061 PrintValue(message1, field_path, true);
2062 printer_->Print("\n"); // Print for newlines.
2063 }
2064
ReportIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2065 void MessageDifferencer::StreamReporter::ReportIgnored(
2066 const Message& message1, const Message& message2,
2067 const std::vector<SpecificField>& field_path) {
2068 printer_->Print("ignored: ");
2069 PrintPath(field_path, true, message1);
2070 if (CheckPathChanged(field_path)) {
2071 printer_->Print(" -> ");
2072 PrintPath(field_path, false, message2);
2073 }
2074 printer_->Print("\n"); // Print for newlines.
2075 }
2076
ReportUnknownFieldIgnored(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)2077 void MessageDifferencer::StreamReporter::ReportUnknownFieldIgnored(
2078 const Message& message1, const Message& message2,
2079 const std::vector<SpecificField>& field_path) {
2080 printer_->Print("ignored: ");
2081 PrintPath(field_path, true, message1);
2082 if (CheckPathChanged(field_path)) {
2083 printer_->Print(" -> ");
2084 PrintPath(field_path, false, message2);
2085 }
2086 printer_->Print("\n"); // Print for newlines.
2087 }
2088
2089 MessageDifferencer::MapKeyComparator*
CreateMultipleFieldsMapKeyComparator(const std::vector<std::vector<const FieldDescriptor * >> & key_field_paths)2090 MessageDifferencer::CreateMultipleFieldsMapKeyComparator(
2091 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths) {
2092 return new MultipleFieldsMapKeyComparator(this, key_field_paths);
2093 }
2094
2095 } // namespace util
2096 } // namespace protobuf
2097 } // namespace google
2098