1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file or at 6 // https://developers.google.com/open-source/licenses/bsd 7 8 // Author: jschorr@google.com (Joseph Schorr) 9 // Based on original Protocol Buffers design by 10 // Sanjay Ghemawat, Jeff Dean, and others. 11 // 12 // This file defines static methods and classes for comparing Protocol 13 // Messages. 14 // 15 // Aug. 2008: Added Unknown Fields Comparison for messages. 16 // Aug. 2009: Added different options to compare repeated fields. 17 // Apr. 2010: Moved field comparison to FieldComparator 18 // Sep. 2020: Added option to output map keys in path 19 20 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 21 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 22 23 #include <functional> 24 #include <memory> 25 #include <string> 26 #include <vector> 27 28 #include "absl/base/macros.h" 29 #include "absl/container/fixed_array.h" 30 #include "absl/container/flat_hash_map.h" 31 #include "absl/container/flat_hash_set.h" 32 #include "absl/log/absl_check.h" 33 #include "google/protobuf/descriptor.h" // FieldDescriptor 34 #include "google/protobuf/message.h" // Message 35 #include "google/protobuf/text_format.h" 36 #include "google/protobuf/unknown_field_set.h" 37 #include "google/protobuf/util/field_comparator.h" 38 39 // Always include as last one, otherwise it can break compilation 40 #include "google/protobuf/port_def.inc" 41 42 namespace google { 43 namespace protobuf { 44 45 class DynamicMessageFactory; 46 class FieldDescriptor; 47 48 namespace io { 49 class ZeroCopyOutputStream; 50 class Printer; 51 } // namespace io 52 53 namespace util { 54 55 class DefaultFieldComparator; 56 class FieldContext; // declared below MessageDifferencer 57 58 // A basic differencer that can be used to determine 59 // the differences between two specified Protocol Messages. If any differences 60 // are found, the Compare method will return false, and any differencer reporter 61 // specified via ReportDifferencesTo will have its reporting methods called (see 62 // below for implementation of the report). Based off of the original 63 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h 64 // (Thanks Todd!). 65 // 66 // MessageDifferencer REQUIRES that compared messages be the same type, defined 67 // as messages that share the same descriptor. If not, the behavior of this 68 // class is undefined. 69 // 70 // People disagree on what MessageDifferencer should do when asked to compare 71 // messages with different descriptors. Some people think it should always 72 // return false. Others expect it to try to look for similar fields and 73 // compare them anyway -- especially if the descriptors happen to be identical. 74 // If we chose either of these behaviors, some set of people would find it 75 // surprising, and could end up writing code expecting the other behavior 76 // without realizing their error. Therefore, we forbid that usage. 77 // 78 // This class is implemented based on the proto2 reflection. The performance 79 // should be good enough for normal usages. However, for places where the 80 // performance is extremely sensitive, there are several alternatives: 81 // - Comparing serialized string 82 // Downside: false negatives (there are messages that are the same but their 83 // serialized strings are different). 84 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin) 85 // Downside: more generated code; maintenance overhead for the additional rule 86 // (must be in sync with the original proto_library). 87 // 88 // Note on handling of google.protobuf.Any: MessageDifferencer automatically 89 // unpacks Any::value into a Message and compares its individual fields. 90 // Messages encoded in a repeated Any cannot be compared using TreatAsMap. 91 // 92 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to 93 // guard it with a lock to use the same MessageDifferencer instance from 94 // multiple threads. Note that it's fine to call static comparison methods 95 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for 96 // performance critical code as it leads to extra allocations. 97 class PROTOBUF_EXPORT MessageDifferencer { 98 public: 99 // Determines whether the supplied messages are equal. Equality is defined as 100 // all fields within the two messages being set to the same value. Primitive 101 // fields and strings are compared by value while embedded messages/groups 102 // are compared as if via a recursive call. Use Compare() with IgnoreField() 103 // if some fields should be ignored in the comparison. Use Compare() with 104 // TreatAsSet() if there are repeated fields where ordering does not matter. 105 // 106 // This method REQUIRES that the two messages have the same 107 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 108 static bool Equals(const Message& message1, const Message& message2); 109 110 // Determines whether the supplied messages are equivalent. Equivalency is 111 // defined as all fields within the two messages having the same value. This 112 // differs from the Equals method above in that fields with default values 113 // are considered set to said value automatically. For details on how default 114 // values are defined for each field type, see: 115 // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional. 116 // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare() 117 // if some fields should be ignored in the comparison. 118 // 119 // This method REQUIRES that the two messages have the same 120 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 121 static bool Equivalent(const Message& message1, const Message& message2); 122 123 // Determines whether the supplied messages are approximately equal. 124 // Approximate equality is defined as all fields within the two messages 125 // being approximately equal. Primitive (non-float) fields and strings are 126 // compared by value, floats are compared using MathUtil::AlmostEquals() and 127 // embedded messages/groups are compared as if via a recursive call. Use 128 // IgnoreField() and Compare() if some fields should be ignored in the 129 // comparison. 130 // 131 // This method REQUIRES that the two messages have the same 132 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 133 static bool ApproximatelyEquals(const Message& message1, 134 const Message& message2); 135 136 // Determines whether the supplied messages are approximately equivalent. 137 // Approximate equivalency is defined as all fields within the two messages 138 // being approximately equivalent. As in 139 // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and 140 // strings are compared by value, floats are compared using 141 // MathUtil::AlmostEquals() and embedded messages/groups are compared as if 142 // via a recursive call. However, fields with default values are considered 143 // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField() 144 // and Compare() if some fields should be ignored in the comparison. 145 // 146 // This method REQUIRES that the two messages have the same 147 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 148 static bool ApproximatelyEquivalent(const Message& message1, 149 const Message& message2); 150 151 // Identifies an individual field in a message instance. Used for field_path, 152 // below. 153 struct SpecificField { 154 // The messages that contain this field. They are always set. They are valid 155 // only during a call to Reporter::Report* methods. 156 // 157 // If the original messages are of type google.protobuf.Any, these fields 158 // will store the unpacked payloads, and unpacked_any will become > 0. More 159 // precisely, unpacked_any defines the nesting level of Any. For example, 160 // if the original message packs another Any, then unpacked_any=2, assuming 161 // the differencer unpacked both of them. 162 // 163 // When an Any object packs a non-Any proto object whose field includes 164 // Any, then unpacked_any=1. Thus, in most practical applications, 165 // unpacked_any will be 0 or 1. 166 const Message* message1 = nullptr; 167 const Message* message2 = nullptr; 168 int unpacked_any = 0; 169 170 // For known fields, "field" is filled in and "unknown_field_number" is -1. 171 // For unknown fields, "field" is NULL, "unknown_field_number" is the field 172 // number, and "unknown_field_type" is its type. 173 const FieldDescriptor* field = nullptr; 174 int unknown_field_number = -1; 175 UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT; 176 177 // If this a repeated field, "index" is the index within it. For unknown 178 // fields, this is the index of the field among all unknown fields of the 179 // same field number and type. 180 int index = -1; 181 182 // If "field" is a repeated field which is being treated as a map or 183 // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates 184 // the index the position to which the element has moved. If the element 185 // has not moved, "new_index" will have the same value as "index". 186 int new_index = -1; 187 188 // If "field" is a map field, point to the map entry. 189 const Message* map_entry1 = nullptr; 190 const Message* map_entry2 = nullptr; 191 192 // For unknown fields, these are the pointers to the UnknownFieldSet 193 // containing the unknown fields. In certain cases (e.g. proto1's 194 // MessageSet, or nested groups of unknown fields), these may differ from 195 // the messages' internal UnknownFieldSets. 196 const UnknownFieldSet* unknown_field_set1 = nullptr; 197 const UnknownFieldSet* unknown_field_set2 = nullptr; 198 199 // For unknown fields, these are the index of the field within the 200 // UnknownFieldSets. One or the other will be -1 when 201 // reporting an addition or deletion. 202 int unknown_field_index1 = -1; 203 int unknown_field_index2 = -1; 204 205 // Was this field added to the diffing because set_force_compare_no_presence 206 // was called on the MessageDifferencer object. 207 bool forced_compare_no_presence_ = false; 208 }; 209 210 // Abstract base class from which all MessageDifferencer 211 // reporters derive. The five Report* methods below will be called when 212 // a field has been added, deleted, modified, moved, or matched. The third 213 // argument is a vector of FieldDescriptor pointers which describes the chain 214 // of fields that was taken to find the current field. For example, for a 215 // field found in an embedded message, the vector will contain two 216 // FieldDescriptors. The first will be the field of the embedded message 217 // itself and the second will be the actual field in the embedded message 218 // that was added/deleted/modified. 219 // Fields will be reported in PostTraversalOrder. 220 // For example, given following proto, if both baz and mooo are changed. 221 // foo { 222 // bar { 223 // baz: 1 224 // mooo: 2 225 // } 226 // } 227 // ReportModified will be invoked with following order: 228 // 1. foo.bar.baz or foo.bar.mooo 229 // 2. foo.bar.mooo or foo.bar.baz 230 // 2. foo.bar 231 // 3. foo 232 class PROTOBUF_EXPORT Reporter { 233 public: 234 Reporter(); 235 Reporter(const Reporter&) = delete; 236 Reporter& operator=(const Reporter&) = delete; 237 virtual ~Reporter(); 238 239 // Reports that a field has been added into Message2. ReportAdded(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)240 virtual void ReportAdded(const Message& message1, const Message& message2, 241 const std::vector<SpecificField>& field_path) {} 242 243 // Reports that a field has been deleted from Message1. ReportDeleted(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)244 virtual void ReportDeleted(const Message& message1, const Message& message2, 245 const std::vector<SpecificField>& field_path) {} 246 247 // Reports that the value of a field has been modified. ReportModified(const Message & message1,const Message & message2,const std::vector<SpecificField> & field_path)248 virtual void ReportModified(const Message& message1, 249 const Message& message2, 250 const std::vector<SpecificField>& field_path) {} 251 252 // Reports that a repeated field has been moved to another location. This 253 // only applies when using TreatAsSet or TreatAsMap() -- see below. Also 254 // note that for any given field, ReportModified and ReportMoved are 255 // mutually exclusive. If a field has been both moved and modified, then 256 // only ReportModified will be called. ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)257 virtual void ReportMoved( 258 const Message& /* message1 */, const Message& /* message2 */, 259 const std::vector<SpecificField>& /* field_path */) {} 260 261 // Reports that two fields match. Useful for doing side-by-side diffs. 262 // This function is mutually exclusive with ReportModified and ReportMoved. 263 // Note that you must call set_report_matches(true) before calling Compare 264 // to make use of this function. ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)265 virtual void ReportMatched( 266 const Message& /* message1 */, const Message& /* message2 */, 267 const std::vector<SpecificField>& /* field_path */) {} 268 269 // Reports that two fields would have been compared, but the 270 // comparison has been skipped because the field was marked as 271 // 'ignored' using IgnoreField(). This function is mutually 272 // exclusive with all the other Report() functions. 273 // 274 // The contract of ReportIgnored is slightly different than the 275 // other Report() functions, in that |field_path.back().index| is 276 // always equal to -1, even if the last field is repeated. This is 277 // because while the other Report() functions indicate where in a 278 // repeated field the action (Addition, Deletion, etc...) 279 // happened, when a repeated field is 'ignored', the differencer 280 // simply calls ReportIgnored on the repeated field as a whole and 281 // moves on without looking at its individual elements. 282 // 283 // Furthermore, ReportIgnored() does not indicate whether the 284 // fields were in fact equal or not, as Compare() does not inspect 285 // these fields at all. It is up to the Reporter to decide whether 286 // the fields are equal or not (perhaps with a second call to 287 // Compare()), if it cares. ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)288 virtual void ReportIgnored( 289 const Message& /* message1 */, const Message& /* message2 */, 290 const std::vector<SpecificField>& /* field_path */) {} 291 292 // Report that an unknown field is ignored. (see comment above). 293 // Note this is a different function since the last SpecificField in field 294 // path has a null field. This could break existing Reporter. ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)295 virtual void ReportUnknownFieldIgnored( 296 const Message& /* message1 */, const Message& /* message2 */, 297 const std::vector<SpecificField>& /* field_path */) {} 298 }; 299 300 // MapKeyComparator is used to determine if two elements have the same key 301 // when comparing elements of a repeated field as a map. 302 class PROTOBUF_EXPORT MapKeyComparator { 303 public: 304 MapKeyComparator(); 305 MapKeyComparator(const MapKeyComparator&) = delete; 306 MapKeyComparator& operator=(const MapKeyComparator&) = delete; 307 virtual ~MapKeyComparator(); 308 309 // This method should be overridden by every implementation. The arg 310 // unmapped_any is nonzero the original messages provided by the user are of 311 // type google.protobuf.Any. 312 // 313 // More precisely, unpacked_any defines the nesting level of Any. For 314 // example, if Any packs another Any then unpacked_any=2, assuming the 315 // patcher unpacked both. Note that when an Any object packs a non-Any 316 // proto object whose field includes Any, then unpacked_any=1. Thus, in most 317 // practical applications, unpacked_any will be 0 or 1. IsMatch(const Message & message1,const Message & message2,int,const std::vector<SpecificField> & fields)318 virtual bool IsMatch(const Message& message1, const Message& message2, 319 int /* unmapped_any */, 320 const std::vector<SpecificField>& fields) const { 321 ABSL_CHECK(false) << "IsMatch() is not implemented."; 322 return false; 323 } 324 }; 325 326 // Abstract base class from which all IgnoreCriteria derive. 327 // By adding IgnoreCriteria more complex ignore logic can be implemented. 328 // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared 329 // field IsIgnored is called on each added IgnoreCriteria until one returns 330 // true or all return false. 331 // IsIgnored is called for fields where at least one side has a value. 332 class PROTOBUF_EXPORT IgnoreCriteria { 333 public: 334 IgnoreCriteria(); 335 virtual ~IgnoreCriteria(); 336 337 // Returns true if the field should be ignored. 338 virtual bool IsIgnored( 339 const Message& /* message1 */, const Message& /* message2 */, 340 const FieldDescriptor* /* field */, 341 const std::vector<SpecificField>& /* parent_fields */) = 0; 342 343 // Returns true if the unknown field should be ignored. 344 // Note: This will be called for unknown fields as well in which case 345 // field.field will be null. IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)346 virtual bool IsUnknownFieldIgnored( 347 const Message& /* message1 */, const Message& /* message2 */, 348 const SpecificField& /* field */, 349 const std::vector<SpecificField>& /* parent_fields */) { 350 return false; 351 } 352 }; 353 354 // To add a Reporter, construct default here, then use ReportDifferencesTo or 355 // ReportDifferencesToString. 356 explicit MessageDifferencer(); 357 MessageDifferencer(const MessageDifferencer&) = delete; 358 MessageDifferencer& operator=(const MessageDifferencer&) = delete; 359 360 ~MessageDifferencer(); 361 362 enum MessageFieldComparison { 363 EQUAL, // Fields must be present in both messages 364 // for the messages to be considered the same. 365 EQUIVALENT, // Fields with default values are considered set 366 // for comparison purposes even if not explicitly 367 // set in the messages themselves. Unknown fields 368 // are ignored. 369 }; 370 371 enum Scope { 372 FULL, // All fields of both messages are considered in the comparison. 373 PARTIAL // Only fields present in the first message are considered; fields 374 // set only in the second message will be skipped during 375 // comparison. 376 }; 377 378 // DEPRECATED. Use FieldComparator::FloatComparison instead. 379 enum FloatComparison { 380 EXACT, // Floats and doubles are compared exactly. 381 APPROXIMATE // Floats and doubles are compared using the 382 // MathUtil::AlmostEquals method. 383 }; 384 385 enum RepeatedFieldComparison { 386 AS_LIST, // Repeated fields are compared in order. Differing values at 387 // the same index are reported using ReportModified(). If the 388 // repeated fields have different numbers of elements, the 389 // unpaired elements are reported using ReportAdded() or 390 // ReportDeleted(). 391 AS_SET, // Treat all the repeated fields as sets. 392 // See TreatAsSet(), as below. 393 AS_SMART_LIST, // Similar to AS_SET, but preserve the order and find the 394 // longest matching sequence from the first matching 395 // element. To use an optimal solution, call 396 // SetMatchIndicesForSmartListCallback() to pass it in. 397 AS_SMART_SET, // Similar to AS_SET, but match elements with fewest diffs. 398 }; 399 400 // The elements of the given repeated field will be treated as a set for 401 // diffing purposes, so different orderings of the same elements will be 402 // considered equal. Elements which are present on both sides of the 403 // comparison but which have changed position will be reported with 404 // ReportMoved(). Elements which only exist on one side or the other are 405 // reported with ReportAdded() and ReportDeleted() regardless of their 406 // positions. ReportModified() is never used for this repeated field. If 407 // the only differences between the compared messages is that some fields 408 // have been moved, then the comparison returns true. 409 // 410 // Note that despite the name of this method, this is really 411 // comparison as multisets: if one side of the comparison has a duplicate 412 // in the repeated field but the other side doesn't, this will count as 413 // a mismatch. 414 // 415 // If the scope of comparison is set to PARTIAL, then in addition to what's 416 // above, extra values added to repeated fields of the second message will 417 // not cause the comparison to fail. 418 // 419 // Note that set comparison is currently O(k * n^2) (where n is the total 420 // number of elements, and k is the average size of each element). In theory 421 // it could be made O(n * k) with a more complex hashing implementation. Feel 422 // free to contribute one if the current implementation is too slow for you. 423 // If partial matching is also enabled, the time complexity will be O(k * n^2 424 // + n^3) in which n^3 is the time complexity of the maximum matching 425 // algorithm. 426 // 427 // REQUIRES: field->is_repeated() and field not registered with TreatAsMap* 428 void TreatAsSet(const FieldDescriptor* field); 429 void TreatAsSmartSet(const FieldDescriptor* field); 430 431 // The elements of the given repeated field will be treated as a list for 432 // diffing purposes, so different orderings of the same elements will NOT be 433 // considered equal. 434 // 435 // REQUIRES: field->is_repeated() and field not registered with TreatAsMap* 436 void TreatAsList(const FieldDescriptor* field); 437 // Note that the complexity is similar to treating as SET. 438 void TreatAsSmartList(const FieldDescriptor* field); 439 440 // The elements of the given repeated field will be treated as a map for 441 // diffing purposes, with |key| being the map key. Thus, elements with the 442 // same key will be compared even if they do not appear at the same index. 443 // Differences are reported similarly to TreatAsSet(), except that 444 // ReportModified() is used to report elements with the same key but 445 // different values. Note that if an element is both moved and modified, 446 // only ReportModified() will be called. As with TreatAsSet, if the only 447 // differences between the compared messages is that some fields have been 448 // moved, then the comparison returns true. See TreatAsSet for notes on 449 // performance. 450 // 451 // REQUIRES: field->is_repeated() 452 // REQUIRES: field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE 453 // REQUIRES: key->containing_type() == field->message_type() 454 void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key); 455 // Same as TreatAsMap except that this method will use multiple fields as 456 // the key in comparison. All specified fields in 'key_fields' should be 457 // present in the compared elements. Two elements will be treated as having 458 // the same key iff they have the same value for every specified field. There 459 // are two steps in the comparison process. The first one is key matching. 460 // Every element from one message will be compared to every element from 461 // the other message. Only fields in 'key_fields' are compared in this step 462 // to decide if two elements have the same key. The second step is value 463 // comparison. Those pairs of elements with the same key (with equal value 464 // for every field in 'key_fields') will be compared in this step. 465 // Time complexity of the first step is O(s * m * n ^ 2) where s is the 466 // average size of the fields specified in 'key_fields', m is the number of 467 // fields in 'key_fields' and n is the number of elements. If partial 468 // matching is enabled, an extra O(n^3) will be incured by the maximum 469 // matching algorithm. The second step is O(k * n) where k is the average 470 // size of each element. 471 void TreatAsMapWithMultipleFieldsAsKey( 472 const FieldDescriptor* field, 473 const std::vector<const FieldDescriptor*>& key_fields); 474 // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field 475 // do not necessarily need to be a direct subfield. Each element in 476 // key_field_paths indicate a path from the message being compared, listing 477 // successive subfield to reach the key field. 478 // 479 // REQUIRES: 480 // for key_field_path in key_field_paths: 481 // key_field_path[0]->containing_type() == field->message_type() 482 // for i in [0, key_field_path.size() - 1): 483 // key_field_path[i+1]->containing_type() == 484 // key_field_path[i]->message_type() 485 // key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE 486 // !key_field_path[i]->is_repeated() 487 void TreatAsMapWithMultipleFieldPathsAsKey( 488 const FieldDescriptor* field, 489 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths); 490 491 // Uses a custom MapKeyComparator to determine if two elements have the same 492 // key when comparing a repeated field as a map. 493 // The caller is responsible to delete the key_comparator. 494 // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the 495 // first key matching step. Rather than comparing some specified fields, it 496 // will invoke the IsMatch method of the given 'key_comparator' to decide if 497 // two elements have the same key. 498 void TreatAsMapUsingKeyComparator(const FieldDescriptor* field, 499 const MapKeyComparator* key_comparator); 500 501 // Initiates and returns a new instance of MultipleFieldsMapKeyComparator. 502 MapKeyComparator* CreateMultipleFieldsMapKeyComparator( 503 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths); 504 505 // Add a custom ignore criteria that is evaluated in addition to the 506 // ignored fields added with IgnoreField. 507 // Takes ownership of ignore_criteria. AddIgnoreCriteria(IgnoreCriteria * ignore_criteria)508 void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria) { 509 AddIgnoreCriteria(absl::WrapUnique(ignore_criteria)); 510 } 511 void AddIgnoreCriteria(std::unique_ptr<IgnoreCriteria> ignore_criteria); 512 513 // Indicates that any field with the given descriptor should be 514 // ignored for the purposes of comparing two messages. This applies 515 // to fields nested in the message structure as well as top level 516 // ones. When the MessageDifferencer encounters an ignored field, 517 // ReportIgnored is called on the reporter, if one is specified. 518 // 519 // The only place where the field's 'ignored' status is not applied is when 520 // it is being used as a key in a field passed to TreatAsMap or is one of 521 // the fields passed to TreatAsMapWithMultipleFieldsAsKey. 522 // In this case it is compared in key matching but after that it's ignored 523 // in value comparison. 524 void IgnoreField(const FieldDescriptor* field); 525 526 // Sets the field comparator used to determine differences between protocol 527 // buffer fields. By default it's set to a DefaultFieldComparator instance. 528 // MessageDifferencer doesn't take ownership over the passed object. 529 // Note that this method must be called before Compare for the comparator to 530 // be used. 531 void set_field_comparator(FieldComparator* comparator); 532 void set_field_comparator(DefaultFieldComparator* comparator); 533 534 // DEPRECATED. Pass a DefaultFieldComparator instance instead. 535 // Sets the fraction and margin for the float comparison of a given field. 536 // Uses MathUtil::WithinFractionOrMargin to compare the values. 537 // NOTE: this method does nothing if differencer's field comparator has been 538 // set to a custom object. 539 // 540 // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or 541 // field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT 542 // REQUIRES: float_comparison_ == APPROXIMATE 543 void SetFractionAndMargin(const FieldDescriptor* field, double fraction, 544 double margin); 545 546 // Sets the type of comparison (as defined in the MessageFieldComparison 547 // enumeration above) that is used by this differencer when determining how 548 // to compare fields in messages. 549 void set_message_field_comparison(MessageFieldComparison comparison); 550 551 // Returns the current message field comparison used in this differencer. 552 MessageFieldComparison message_field_comparison() const; 553 554 // Tells the differencer whether or not to report matches. This method must 555 // be called before Compare. The default for a new differencer is false. set_report_matches(bool report_matches)556 void set_report_matches(bool report_matches) { 557 report_matches_ = report_matches; 558 } 559 560 // Tells the differencer whether or not to report moves (in a set or map 561 // repeated field). This method must be called before Compare. The default for 562 // a new differencer is true. set_report_moves(bool report_moves)563 void set_report_moves(bool report_moves) { report_moves_ = report_moves; } 564 565 // Tells the differencer whether or not to report ignored values. This method 566 // must be called before Compare. The default for a new differencer is true. set_report_ignores(bool report_ignores)567 void set_report_ignores(bool report_ignores) { 568 report_ignores_ = report_ignores; 569 } 570 571 // Sets the scope of the comparison (as defined in the Scope enumeration 572 // above) that is used by this differencer when determining which fields to 573 // compare between the messages. 574 void set_scope(Scope scope); 575 576 // Returns the current scope used by this differencer. 577 Scope scope() const; 578 579 // Only affects PARTIAL diffing. When set, all non-repeated no-presence fields 580 // which are set to their default value (which is the same as being unset) in 581 // message1 but are set to a non-default value in message2 will also be used 582 // in the comparison. 583 void set_force_compare_no_presence(bool value); 584 585 // If set, the fields in message1 that equal the fields passed here will be 586 // treated as required for comparison, even if they are absent. set_require_no_presence_fields(const google::protobuf::TextFormat::Parser::UnsetFieldsMetadata & fields)587 void set_require_no_presence_fields( 588 const google::protobuf::TextFormat::Parser::UnsetFieldsMetadata& fields) { 589 require_no_presence_fields_ = fields; 590 } 591 592 // DEPRECATED. Pass a DefaultFieldComparator instance instead. 593 // Sets the type of comparison (as defined in the FloatComparison enumeration 594 // above) that is used by this differencer when comparing float (and double) 595 // fields in messages. 596 // NOTE: this method does nothing if differencer's field comparator has been 597 // set to a custom object. 598 void set_float_comparison(FloatComparison comparison); 599 600 // Sets the type of comparison for repeated field (as defined in the 601 // RepeatedFieldComparison enumeration above) that is used by this 602 // differencer when compare repeated fields in messages. 603 void set_repeated_field_comparison(RepeatedFieldComparison comparison); 604 605 // Returns the current repeated field comparison used by this differencer. 606 RepeatedFieldComparison repeated_field_comparison() const; 607 608 // Compares the two specified messages, returning true if they are the same, 609 // false otherwise. If this method returns false, any changes between the 610 // two messages will be reported if a Reporter was specified via 611 // ReportDifferencesTo (see also ReportDifferencesToString). 612 // 613 // This method REQUIRES that the two messages have the same 614 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 615 bool Compare(const Message& message1, const Message& message2); 616 617 // Same as above, except comparing only the list of fields specified by the 618 // two vectors of FieldDescriptors. 619 bool CompareWithFields( 620 const Message& message1, const Message& message2, 621 const std::vector<const FieldDescriptor*>& message1_fields, 622 const std::vector<const FieldDescriptor*>& message2_fields); 623 624 // Automatically creates a reporter that will output the differences 625 // found (if any) to the specified output string pointer. Note that this 626 // method must be called before Compare. 627 void ReportDifferencesToString(std::string* output); 628 629 // Tells the MessageDifferencer to report differences via the specified 630 // reporter. Note that this method must be called before Compare for 631 // the reporter to be used. It is the responsibility of the caller to delete 632 // this object. 633 // If the provided pointer equals NULL, the MessageDifferencer stops reporting 634 // differences to any previously set reporters or output strings. 635 void ReportDifferencesTo(Reporter* reporter); 636 637 // Returns the list of fields which was automatically added to the list of 638 // compared fields by calling set_force_compare_no_presence and caused the 639 // last call to Compare to fail. NoPresenceFieldsCausingFailure()640 const absl::flat_hash_set<std::string>& NoPresenceFieldsCausingFailure() { 641 return force_compare_failure_triggering_fields_; 642 } 643 644 private: 645 // Class for processing Any deserialization. This logic is used by both the 646 // MessageDifferencer and StreamReporter classes. 647 class UnpackAnyField { 648 private: 649 std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_; 650 651 public: 652 UnpackAnyField() = default; 653 ~UnpackAnyField() = default; 654 // If "any" is of type google.protobuf.Any, extract its payload using 655 // DynamicMessageFactory and store in "data". 656 bool UnpackAny(const Message& any, std::unique_ptr<Message>* data); 657 }; 658 659 public: 660 // An implementation of the MessageDifferencer Reporter that outputs 661 // any differences found in human-readable form to the supplied 662 // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter 663 // *must* be '$'. 664 // 665 // WARNING: this reporter does not necessarily flush its output until it is 666 // destroyed. As a result, it is not safe to assume the output is valid or 667 // complete until after you destroy the reporter. For example, if you use a 668 // StreamReporter to write to a StringOutputStream, the target string may 669 // contain uninitialized data until the reporter is destroyed. 670 class PROTOBUF_EXPORT StreamReporter : public Reporter { 671 public: 672 explicit StreamReporter(io::ZeroCopyOutputStream* output); 673 explicit StreamReporter(io::Printer* printer); // delimiter '$' 674 StreamReporter(const StreamReporter&) = delete; 675 StreamReporter& operator=(const StreamReporter&) = delete; 676 ~StreamReporter() override; 677 678 // When set to true, the stream reporter will also output aggregates nodes 679 // (i.e. messages and groups) whose subfields have been modified. When 680 // false, will only report the individual subfields. Defaults to false. set_report_modified_aggregates(bool report)681 void set_report_modified_aggregates(bool report) { 682 report_modified_aggregates_ = report; 683 } 684 685 // The following are implementations of the methods described above. 686 687 void ReportAdded(const Message& message1, const Message& message2, 688 const std::vector<SpecificField>& field_path) override; 689 690 void ReportDeleted(const Message& message1, const Message& message2, 691 const std::vector<SpecificField>& field_path) override; 692 693 void ReportModified(const Message& message1, const Message& message2, 694 const std::vector<SpecificField>& field_path) override; 695 696 void ReportMoved(const Message& message1, const Message& message2, 697 const std::vector<SpecificField>& field_path) override; 698 699 void ReportMatched(const Message& message1, const Message& message2, 700 const std::vector<SpecificField>& field_path) override; 701 702 void ReportIgnored(const Message& message1, const Message& message2, 703 const std::vector<SpecificField>& field_path) override; 704 705 void ReportUnknownFieldIgnored( 706 const Message& message1, const Message& message2, 707 const std::vector<SpecificField>& field_path) override; 708 709 // Messages that are being compared must be provided to StreamReporter prior 710 // to processing 711 void SetMessages(const Message& message1, const Message& message2); 712 713 protected: 714 // Prints the specified path of fields to the buffer. 715 virtual void PrintPath(const std::vector<SpecificField>& field_path, 716 bool left_side); 717 718 // Prints the value of fields to the buffer. left_side is true if the 719 // given message is from the left side of the comparison, false if it 720 // was the right. This is relevant only to decide whether to follow 721 // unknown_field_index1 or unknown_field_index2 when an unknown field 722 // is encountered in field_path. 723 virtual void PrintValue(const Message& message, 724 const std::vector<SpecificField>& field_path, 725 bool left_side); 726 727 // Prints the specified path of unknown fields to the buffer. 728 virtual void PrintUnknownFieldValue(const UnknownField* unknown_field); 729 730 // Just print a string 731 void Print(const std::string& str); 732 733 private: 734 // helper function for PrintPath that contains logic for printing maps 735 void PrintMapKey(bool left_side, const SpecificField& specific_field); 736 737 io::Printer* printer_; 738 bool delete_printer_; 739 bool report_modified_aggregates_; 740 const Message* message1_; 741 const Message* message2_; 742 MessageDifferencer::UnpackAnyField unpack_any_field_; 743 }; 744 745 private: 746 friend class SimpleFieldComparator; 747 748 // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator. 749 // Implementation of this class needs to do field value comparison which 750 // relies on some private methods of MessageDifferencer. That's why this 751 // class is declared as a nested class of MessageDifferencer. 752 class MultipleFieldsMapKeyComparator; 753 754 // A MapKeyComparator for use with map_entries. 755 class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator { 756 public: 757 explicit MapEntryKeyComparator(MessageDifferencer* message_differencer); 758 bool IsMatch( 759 const Message& message1, const Message& message2, int unpacked_any, 760 const std::vector<SpecificField>& parent_fields) const override; 761 762 private: 763 MessageDifferencer* message_differencer_; 764 }; 765 766 // Returns true if field1's number() is less than field2's. 767 static bool FieldBefore(const FieldDescriptor* field1, 768 const FieldDescriptor* field2); 769 770 // Retrieve all the set fields, including extensions. 771 std::vector<const FieldDescriptor*> RetrieveFields(const Message& message, 772 bool base_message); 773 774 // Combine the two lists of fields into the combined_fields output vector. 775 // All fields present in both lists will always be included in the combined 776 // list. Fields only present in one of the lists will only appear in the 777 // combined list if the corresponding fields_scope option is set to FULL. 778 std::vector<const FieldDescriptor*> CombineFields( 779 const Message& message1, 780 const std::vector<const FieldDescriptor*>& fields1, Scope fields1_scope, 781 const std::vector<const FieldDescriptor*>& fields2, Scope fields2_scope); 782 783 // Internal version of the Compare method which performs the actual 784 // comparison. The parent_fields vector is a vector containing field 785 // descriptors of all fields accessed to get to this comparison operation 786 // (i.e. if the current message is an embedded message, the parent_fields 787 // vector will contain the field that has this embedded message). 788 bool Compare(const Message& message1, const Message& message2, 789 int unpacked_any, std::vector<SpecificField>* parent_fields); 790 791 // Compares all the unknown fields in two messages. 792 bool CompareUnknownFields(const Message& message1, const Message& message2, 793 const UnknownFieldSet&, const UnknownFieldSet&, 794 std::vector<SpecificField>* parent_fields); 795 796 // Compares the specified messages for the requested field lists. The field 797 // lists are modified depending on comparison settings, and then passed to 798 // CompareWithFieldsInternal. 799 bool CompareRequestedFieldsUsingSettings( 800 const Message& message1, const Message& message2, int unpacked_any, 801 const std::vector<const FieldDescriptor*>& message1_fields, 802 const std::vector<const FieldDescriptor*>& message2_fields, 803 std::vector<SpecificField>* parent_fields); 804 805 // Compares the specified messages with the specified field lists. 806 bool CompareWithFieldsInternal( 807 const Message& message1, const Message& message2, int unpacked_any, 808 const std::vector<const FieldDescriptor*>& message1_fields, 809 const std::vector<const FieldDescriptor*>& message2_fields, 810 std::vector<SpecificField>* parent_fields); 811 812 // Compares the repeated fields, and report the error. 813 bool CompareRepeatedField(const Message& message1, const Message& message2, 814 int unpacked_any, const FieldDescriptor* field, 815 std::vector<SpecificField>* parent_fields); 816 817 // Compares map fields, and report the error. 818 bool CompareMapField(const Message& message1, const Message& message2, 819 int unpacked_any, const FieldDescriptor* field, 820 std::vector<SpecificField>* parent_fields); 821 822 // Helper for CompareRepeatedField and CompareMapField: compares and reports 823 // differences element-wise. This is the implementation for non-map fields, 824 // and can also compare map fields by using the underlying representation. 825 bool CompareRepeatedRep(const Message& message1, const Message& message2, 826 int unpacked_any, const FieldDescriptor* field, 827 std::vector<SpecificField>* parent_fields); 828 829 // Helper for CompareMapField: compare the map fields using map reflection 830 // instead of sync to repeated. 831 bool CompareMapFieldByMapReflection(const Message& message1, 832 const Message& message2, int unpacked_any, 833 const FieldDescriptor* field, 834 std::vector<SpecificField>* parent_fields, 835 DefaultFieldComparator* comparator); 836 837 // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields. 838 bool CompareFieldValue(const Message& message1, const Message& message2, 839 int unpacked_any, const FieldDescriptor* field, 840 int index1, int index2); 841 842 // Compares the specified field on the two messages, returning 843 // true if they are the same, false otherwise. For repeated fields, 844 // this method only compares the value in the specified index. This method 845 // uses Compare functions to recurse into submessages. 846 // The parent_fields vector is used in calls to a Reporter instance calls. 847 // It can be NULL, in which case the MessageDifferencer will create new 848 // list of parent messages if it needs to recursively compare the given field. 849 // To avoid confusing users you should not set it to NULL unless you modified 850 // Reporter to handle the change of parent_fields correctly. 851 bool CompareFieldValueUsingParentFields( 852 const Message& message1, const Message& message2, int unpacked_any, 853 const FieldDescriptor* field, int index1, int index2, 854 std::vector<SpecificField>* parent_fields); 855 856 // Compares the specified field on the two messages, returning comparison 857 // result, as returned by appropriate FieldComparator. 858 FieldComparator::ComparisonResult GetFieldComparisonResult( 859 const Message& message1, const Message& message2, 860 const FieldDescriptor* field, int index1, int index2, 861 const FieldContext* field_context); 862 863 // Check if the two elements in the repeated field are match to each other. 864 // if the key_comprator is NULL, this function returns true when the two 865 // elements are equal. 866 bool IsMatch(const FieldDescriptor* repeated_field, 867 const MapKeyComparator* key_comparator, const Message* message1, 868 const Message* message2, int unpacked_any, 869 const std::vector<SpecificField>& parent_fields, 870 Reporter* reporter, int index1, int index2); 871 872 // Returns true when this repeated field has been configured to be treated 873 // as a Set / SmartSet / SmartList. 874 bool IsTreatedAsSet(const FieldDescriptor* field); 875 bool IsTreatedAsSmartSet(const FieldDescriptor* field); 876 877 bool IsTreatedAsSmartList(const FieldDescriptor* field); 878 // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList 879 // by default to find the longest matching sequence from the first matching 880 // element. The callback takes two vectors showing the matching indices from 881 // the other vector, where -1 means an unmatch. 882 void SetMatchIndicesForSmartListCallback( 883 std::function<void(std::vector<int>*, std::vector<int>*)> callback); 884 885 // Returns true when this repeated field is to be compared as a subset, ie. 886 // has been configured to be treated as a set or map and scope is set to 887 // PARTIAL. 888 bool IsTreatedAsSubset(const FieldDescriptor* field); 889 890 // Returns true if this field is to be ignored when this 891 // MessageDifferencer compares messages. 892 bool IsIgnored(const Message& message1, const Message& message2, 893 const FieldDescriptor* field, 894 const std::vector<SpecificField>& parent_fields); 895 896 // Returns true if this unknown field is to be ignored when this 897 // MessageDifferencer compares messages. 898 bool IsUnknownFieldIgnored(const Message& message1, const Message& message2, 899 const SpecificField& field, 900 const std::vector<SpecificField>& parent_fields); 901 902 // Returns MapKeyComparator* when this field has been configured to be treated 903 // as a map or its is_map() return true. If not, returns NULL. 904 const MapKeyComparator* GetMapKeyComparator( 905 const FieldDescriptor* field) const; 906 907 // Attempts to match indices of a repeated field, so that the contained values 908 // match. Clears output vectors and sets their values to indices of paired 909 // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1 910 // and match_list2[1] == 0. The unmatched indices are indicated by -1. 911 // Assumes the repeated field is not treated as a simple list. 912 // This method returns false if the match failed. However, it doesn't mean 913 // that the comparison succeeds when this method returns true (you need to 914 // double-check in this case). 915 bool MatchRepeatedFieldIndices( 916 const Message& message1, const Message& message2, int unpacked_any, 917 const FieldDescriptor* repeated_field, 918 const MapKeyComparator* key_comparator, 919 const std::vector<SpecificField>& parent_fields, 920 std::vector<int>* match_list1, std::vector<int>* match_list2); 921 922 // Checks if index is equal to new_index in all the specific fields. 923 static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields); 924 925 // ABSL_CHECKs that the given repeated field can be compared according to 926 // new_comparison. 927 void CheckRepeatedFieldComparisons( 928 const FieldDescriptor* field, 929 const RepeatedFieldComparison& new_comparison); 930 931 // Whether we should still compare the field despite its absence in message1. 932 bool ShouldCompareNoPresence(const Message& message1, 933 const Reflection& reflection1, 934 const FieldDescriptor* field2) const; 935 936 Reporter* reporter_; 937 DefaultFieldComparator default_field_comparator_; 938 MessageFieldComparison message_field_comparison_; 939 Scope scope_; 940 absl::flat_hash_set<const FieldDescriptor*> force_compare_no_presence_fields_; 941 google::protobuf::TextFormat::Parser::UnsetFieldsMetadata require_no_presence_fields_; 942 absl::flat_hash_set<std::string> force_compare_failure_triggering_fields_; 943 RepeatedFieldComparison repeated_field_comparison_; 944 945 absl::flat_hash_map<const FieldDescriptor*, RepeatedFieldComparison> 946 repeated_field_comparisons_; 947 // Keeps track of MapKeyComparators that are created within 948 // MessageDifferencer. These MapKeyComparators should be deleted 949 // before MessageDifferencer is destroyed. 950 // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't 951 // store the supplied FieldDescriptors directly. Instead, a new 952 // MapKeyComparator is created for comparison purpose. 953 std::vector<MapKeyComparator*> owned_key_comparators_; 954 absl::flat_hash_map<const FieldDescriptor*, const MapKeyComparator*> 955 map_field_key_comparator_; 956 MapEntryKeyComparator map_entry_key_comparator_; 957 std::vector<std::unique_ptr<IgnoreCriteria>> ignore_criteria_; 958 // Reused multiple times in RetrieveFields to avoid extra allocations 959 std::vector<const FieldDescriptor*> tmp_message_fields_; 960 961 absl::flat_hash_set<const FieldDescriptor*> ignored_fields_; 962 963 union { 964 DefaultFieldComparator* default_impl; 965 FieldComparator* base; 966 } field_comparator_ = {&default_field_comparator_}; 967 enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault; 968 969 bool report_matches_; 970 bool report_moves_; 971 bool report_ignores_; 972 bool force_compare_no_presence_ = false; 973 974 std::string* output_string_; 975 976 // Callback to post-process the matched indices to support SMART_LIST. 977 std::function<void(std::vector<int>*, std::vector<int>*)> 978 match_indices_for_smart_list_callback_; 979 980 MessageDifferencer::UnpackAnyField unpack_any_field_; 981 }; 982 983 // This class provides extra information to the FieldComparator::Compare 984 // function. 985 class PROTOBUF_EXPORT FieldContext { 986 public: FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)987 explicit FieldContext( 988 std::vector<MessageDifferencer::SpecificField>* parent_fields) 989 : parent_fields_(parent_fields) {} 990 parent_fields()991 std::vector<MessageDifferencer::SpecificField>* parent_fields() const { 992 return parent_fields_; 993 } 994 995 private: 996 std::vector<MessageDifferencer::SpecificField>* parent_fields_; 997 }; 998 999 } // namespace util 1000 } // namespace protobuf 1001 } // namespace google 1002 1003 #include "google/protobuf/port_undef.inc" 1004 1005 #endif // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 1006