1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr@google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // This file defines static methods and classes for comparing Protocol 36 // Messages. 37 // 38 // Aug. 2008: Added Unknown Fields Comparison for messages. 39 // Aug. 2009: Added different options to compare repeated fields. 40 // Apr. 2010: Moved field comparison to FieldComparator 41 // Sep. 2020: Added option to output map keys in path 42 43 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 44 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 45 46 47 #include <functional> 48 #include <map> 49 #include <memory> 50 #include <set> 51 #include <string> 52 #include <vector> 53 54 #include <google/protobuf/descriptor.h> // FieldDescriptor 55 #include <google/protobuf/message.h> // Message 56 #include <google/protobuf/unknown_field_set.h> 57 #include <google/protobuf/util/field_comparator.h> 58 59 // Always include as last one, otherwise it can break compilation 60 #include <google/protobuf/port_def.inc> 61 62 namespace google { 63 namespace protobuf { 64 65 class DynamicMessageFactory; 66 class FieldDescriptor; 67 68 namespace io { 69 class ZeroCopyOutputStream; 70 class Printer; 71 } // namespace io 72 73 namespace util { 74 75 class DefaultFieldComparator; 76 class FieldContext; // declared below MessageDifferencer 77 78 // Defines a collection of field descriptors. 79 // In case of internal google codebase we are using absl::FixedArray instead 80 // of vector. It significantly speeds up proto comparison (by ~30%) by 81 // reducing the number of malloc/free operations 82 typedef std::vector<const FieldDescriptor*> FieldDescriptorArray; 83 84 // A basic differencer that can be used to determine 85 // the differences between two specified Protocol Messages. If any differences 86 // are found, the Compare method will return false, and any differencer reporter 87 // specified via ReportDifferencesTo will have its reporting methods called (see 88 // below for implementation of the report). Based off of the original 89 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h 90 // (Thanks Todd!). 91 // 92 // MessageDifferencer REQUIRES that compared messages be the same type, defined 93 // as messages that share the same descriptor. If not, the behavior of this 94 // class is undefined. 95 // 96 // People disagree on what MessageDifferencer should do when asked to compare 97 // messages with different descriptors. Some people think it should always 98 // return false. Others expect it to try to look for similar fields and 99 // compare them anyway -- especially if the descriptors happen to be identical. 100 // If we chose either of these behaviors, some set of people would find it 101 // surprising, and could end up writing code expecting the other behavior 102 // without realizing their error. Therefore, we forbid that usage. 103 // 104 // This class is implemented based on the proto2 reflection. The performance 105 // should be good enough for normal usages. However, for places where the 106 // performance is extremely sensitive, there are several alternatives: 107 // - Comparing serialized string 108 // Downside: false negatives (there are messages that are the same but their 109 // serialized strings are different). 110 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin) 111 // Downside: more generated code; maintenance overhead for the additional rule 112 // (must be in sync with the original proto_library). 113 // 114 // Note on handling of google.protobuf.Any: MessageDifferencer automatically 115 // unpacks Any::value into a Message and compares its individual fields. 116 // Messages encoded in a repeated Any cannot be compared using TreatAsMap. 117 // 118 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to 119 // guard it with a lock to use the same MessageDifferencer instance from 120 // multiple threads. Note that it's fine to call static comparison methods 121 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for 122 // performance critical code as it leads to extra allocations. 123 class PROTOBUF_EXPORT MessageDifferencer { 124 public: 125 // Determines whether the supplied messages are equal. Equality is defined as 126 // all fields within the two messages being set to the same value. Primitive 127 // fields and strings are compared by value while embedded messages/groups 128 // are compared as if via a recursive call. Use Compare() with IgnoreField() 129 // if some fields should be ignored in the comparison. Use Compare() with 130 // TreatAsSet() if there are repeated fields where ordering does not matter. 131 // 132 // This method REQUIRES that the two messages have the same 133 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 134 static bool Equals(const Message& message1, const Message& message2); 135 136 // Determines whether the supplied messages are equivalent. Equivalency is 137 // defined as all fields within the two messages having the same value. This 138 // differs from the Equals method above in that fields with default values 139 // are considered set to said value automatically. For details on how default 140 // values are defined for each field type, see: 141 // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional. 142 // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare() 143 // if some fields should be ignored in the comparison. 144 // 145 // This method REQUIRES that the two messages have the same 146 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 147 static bool Equivalent(const Message& message1, const Message& message2); 148 149 // Determines whether the supplied messages are approximately equal. 150 // Approximate equality is defined as all fields within the two messages 151 // being approximately equal. Primitive (non-float) fields and strings are 152 // compared by value, floats are compared using MathUtil::AlmostEquals() and 153 // embedded messages/groups are compared as if via a recursive call. Use 154 // IgnoreField() and Compare() if some fields should be ignored in the 155 // comparison. 156 // 157 // This method REQUIRES that the two messages have the same 158 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 159 static bool ApproximatelyEquals(const Message& message1, 160 const Message& message2); 161 162 // Determines whether the supplied messages are approximately equivalent. 163 // Approximate equivalency is defined as all fields within the two messages 164 // being approximately equivalent. As in 165 // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and 166 // strings are compared by value, floats are compared using 167 // MathUtil::AlmostEquals() and embedded messages/groups are compared as if 168 // via a recursive call. However, fields with default values are considered 169 // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField() 170 // and Compare() if some fields should be ignored in the comparison. 171 // 172 // This method REQUIRES that the two messages have the same 173 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 174 static bool ApproximatelyEquivalent(const Message& message1, 175 const Message& message2); 176 177 // Identifies an individual field in a message instance. Used for field_path, 178 // below. 179 struct SpecificField { 180 // For known fields, "field" is filled in and "unknown_field_number" is -1. 181 // For unknown fields, "field" is NULL, "unknown_field_number" is the field 182 // number, and "unknown_field_type" is its type. 183 const FieldDescriptor* field = nullptr; 184 int unknown_field_number = -1; 185 UnknownField::Type unknown_field_type = UnknownField::Type::TYPE_VARINT; 186 187 // If this a repeated field, "index" is the index within it. For unknown 188 // fields, this is the index of the field among all unknown fields of the 189 // same field number and type. 190 int index = -1; 191 192 // If "field" is a repeated field which is being treated as a map or 193 // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates 194 // the index the position to which the element has moved. If the element 195 // has not moved, "new_index" will have the same value as "index". 196 int new_index = -1; 197 198 // If "field" is a map field, point to the map entry. 199 const Message* map_entry1 = nullptr; 200 const Message* map_entry2 = nullptr; 201 202 // For unknown fields, these are the pointers to the UnknownFieldSet 203 // containing the unknown fields. In certain cases (e.g. proto1's 204 // MessageSet, or nested groups of unknown fields), these may differ from 205 // the messages' internal UnknownFieldSets. 206 const UnknownFieldSet* unknown_field_set1 = nullptr; 207 const UnknownFieldSet* unknown_field_set2 = nullptr; 208 209 // For unknown fields, these are the index of the field within the 210 // UnknownFieldSets. One or the other will be -1 when 211 // reporting an addition or deletion. 212 int unknown_field_index1 = -1; 213 int unknown_field_index2 = -1; 214 }; 215 216 // Abstract base class from which all MessageDifferencer 217 // reporters derive. The five Report* methods below will be called when 218 // a field has been added, deleted, modified, moved, or matched. The third 219 // argument is a vector of FieldDescriptor pointers which describes the chain 220 // of fields that was taken to find the current field. For example, for a 221 // field found in an embedded message, the vector will contain two 222 // FieldDescriptors. The first will be the field of the embedded message 223 // itself and the second will be the actual field in the embedded message 224 // that was added/deleted/modified. 225 // Fields will be reported in PostTraversalOrder. 226 // For example, given following proto, if both baz and mooo are changed. 227 // foo { 228 // bar { 229 // baz: 1 230 // mooo: 2 231 // } 232 // } 233 // ReportModified will be invoked with following order: 234 // 1. foo.bar.baz or foo.bar.mooo 235 // 2. foo.bar.mooo or foo.bar.baz 236 // 2. foo.bar 237 // 3. foo 238 class PROTOBUF_EXPORT Reporter { 239 public: 240 Reporter(); 241 virtual ~Reporter(); 242 243 // Reports that a field has been added into Message2. 244 virtual void ReportAdded(const Message& message1, const Message& message2, 245 const std::vector<SpecificField>& field_path) = 0; 246 247 // Reports that a field has been deleted from Message1. 248 virtual void ReportDeleted( 249 const Message& message1, const Message& message2, 250 const std::vector<SpecificField>& field_path) = 0; 251 252 // Reports that the value of a field has been modified. 253 virtual void ReportModified( 254 const Message& message1, const Message& message2, 255 const std::vector<SpecificField>& field_path) = 0; 256 257 // Reports that a repeated field has been moved to another location. This 258 // only applies when using TreatAsSet or TreatAsMap() -- see below. Also 259 // note that for any given field, ReportModified and ReportMoved are 260 // mutually exclusive. If a field has been both moved and modified, then 261 // only ReportModified will be called. ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)262 virtual void ReportMoved( 263 const Message& /* message1 */, const Message& /* message2 */, 264 const std::vector<SpecificField>& /* field_path */) {} 265 266 // Reports that two fields match. Useful for doing side-by-side diffs. 267 // This function is mutually exclusive with ReportModified and ReportMoved. 268 // Note that you must call set_report_matches(true) before calling Compare 269 // to make use of this function. ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)270 virtual void ReportMatched( 271 const Message& /* message1 */, const Message& /* message2 */, 272 const std::vector<SpecificField>& /* field_path */) {} 273 274 // Reports that two fields would have been compared, but the 275 // comparison has been skipped because the field was marked as 276 // 'ignored' using IgnoreField(). This function is mutually 277 // exclusive with all the other Report() functions. 278 // 279 // The contract of ReportIgnored is slightly different than the 280 // other Report() functions, in that |field_path.back().index| is 281 // always equal to -1, even if the last field is repeated. This is 282 // because while the other Report() functions indicate where in a 283 // repeated field the action (Addition, Deletion, etc...) 284 // happened, when a repeated field is 'ignored', the differencer 285 // simply calls ReportIgnored on the repeated field as a whole and 286 // moves on without looking at its individual elements. 287 // 288 // Furthermore, ReportIgnored() does not indicate whether the 289 // fields were in fact equal or not, as Compare() does not inspect 290 // these fields at all. It is up to the Reporter to decide whether 291 // the fields are equal or not (perhaps with a second call to 292 // Compare()), if it cares. ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)293 virtual void ReportIgnored( 294 const Message& /* message1 */, const Message& /* message2 */, 295 const std::vector<SpecificField>& /* field_path */) {} 296 297 // Report that an unknown field is ignored. (see comment above). 298 // Note this is a different function since the last SpecificField in field 299 // path has a null field. This could break existing Reporter. ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)300 virtual void ReportUnknownFieldIgnored( 301 const Message& /* message1 */, const Message& /* message2 */, 302 const std::vector<SpecificField>& /* field_path */) {} 303 304 private: 305 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter); 306 }; 307 308 // MapKeyComparator is used to determine if two elements have the same key 309 // when comparing elements of a repeated field as a map. 310 class PROTOBUF_EXPORT MapKeyComparator { 311 public: 312 MapKeyComparator(); 313 virtual ~MapKeyComparator(); 314 IsMatch(const Message &,const Message &,const std::vector<SpecificField> &)315 virtual bool IsMatch( 316 const Message& /* message1 */, const Message& /* message2 */, 317 const std::vector<SpecificField>& /* parent_fields */) const { 318 GOOGLE_CHECK(false) << "IsMatch() is not implemented."; 319 return false; 320 } 321 322 private: 323 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator); 324 }; 325 326 // Abstract base class from which all IgnoreCriteria derive. 327 // By adding IgnoreCriteria more complex ignore logic can be implemented. 328 // IgnoreCriteria are registered with AddIgnoreCriteria. For each compared 329 // field IsIgnored is called on each added IgnoreCriteria until one returns 330 // true or all return false. 331 // IsIgnored is called for fields where at least one side has a value. 332 class PROTOBUF_EXPORT IgnoreCriteria { 333 public: 334 IgnoreCriteria(); 335 virtual ~IgnoreCriteria(); 336 337 // Returns true if the field should be ignored. 338 virtual bool IsIgnored( 339 const Message& /* message1 */, const Message& /* message2 */, 340 const FieldDescriptor* /* field */, 341 const std::vector<SpecificField>& /* parent_fields */) = 0; 342 343 // Returns true if the unknown field should be ignored. 344 // Note: This will be called for unknown fields as well in which case 345 // field.field will be null. IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)346 virtual bool IsUnknownFieldIgnored( 347 const Message& /* message1 */, const Message& /* message2 */, 348 const SpecificField& /* field */, 349 const std::vector<SpecificField>& /* parent_fields */) { 350 return false; 351 } 352 }; 353 354 // To add a Reporter, construct default here, then use ReportDifferencesTo or 355 // ReportDifferencesToString. 356 explicit MessageDifferencer(); 357 358 ~MessageDifferencer(); 359 360 enum MessageFieldComparison { 361 EQUAL, // Fields must be present in both messages 362 // for the messages to be considered the same. 363 EQUIVALENT, // Fields with default values are considered set 364 // for comparison purposes even if not explicitly 365 // set in the messages themselves. Unknown fields 366 // are ignored. 367 }; 368 369 enum Scope { 370 FULL, // All fields of both messages are considered in the comparison. 371 PARTIAL // Only fields present in the first message are considered; fields 372 // set only in the second message will be skipped during 373 // comparison. 374 }; 375 376 // DEPRECATED. Use FieldComparator::FloatComparison instead. 377 enum FloatComparison { 378 EXACT, // Floats and doubles are compared exactly. 379 APPROXIMATE // Floats and doubles are compared using the 380 // MathUtil::AlmostEquals method. 381 }; 382 383 enum RepeatedFieldComparison { 384 AS_LIST, // Repeated fields are compared in order. Differing values at 385 // the same index are reported using ReportModified(). If the 386 // repeated fields have different numbers of elements, the 387 // unpaired elements are reported using ReportAdded() or 388 // ReportDeleted(). 389 AS_SET, // Treat all the repeated fields as sets. 390 // See TreatAsSet(), as below. 391 AS_SMART_LIST, // Similar to AS_SET, but preserve the order and find the 392 // longest matching sequence from the first matching 393 // element. To use an optimal solution, call 394 // SetMatchIndicesForSmartListCallback() to pass it in. 395 AS_SMART_SET, // Similar to AS_SET, but match elements with fewest diffs. 396 }; 397 398 // The elements of the given repeated field will be treated as a set for 399 // diffing purposes, so different orderings of the same elements will be 400 // considered equal. Elements which are present on both sides of the 401 // comparison but which have changed position will be reported with 402 // ReportMoved(). Elements which only exist on one side or the other are 403 // reported with ReportAdded() and ReportDeleted() regardless of their 404 // positions. ReportModified() is never used for this repeated field. If 405 // the only differences between the compared messages is that some fields 406 // have been moved, then the comparison returns true. 407 // 408 // Note that despite the name of this method, this is really 409 // comparison as multisets: if one side of the comparison has a duplicate 410 // in the repeated field but the other side doesn't, this will count as 411 // a mismatch. 412 // 413 // If the scope of comparison is set to PARTIAL, then in addition to what's 414 // above, extra values added to repeated fields of the second message will 415 // not cause the comparison to fail. 416 // 417 // Note that set comparison is currently O(k * n^2) (where n is the total 418 // number of elements, and k is the average size of each element). In theory 419 // it could be made O(n * k) with a more complex hashing implementation. Feel 420 // free to contribute one if the current implementation is too slow for you. 421 // If partial matching is also enabled, the time complexity will be O(k * n^2 422 // + n^3) in which n^3 is the time complexity of the maximum matching 423 // algorithm. 424 // 425 // REQUIRES: field->is_repeated() and field not registered with TreatAsMap* 426 void TreatAsSet(const FieldDescriptor* field); 427 void TreatAsSmartSet(const FieldDescriptor* field); 428 429 // The elements of the given repeated field will be treated as a list for 430 // diffing purposes, so different orderings of the same elements will NOT be 431 // considered equal. 432 // 433 // REQUIRES: field->is_repeated() and field not registered with TreatAsMap* 434 void TreatAsList(const FieldDescriptor* field); 435 // Note that the complexity is similar to treating as SET. 436 void TreatAsSmartList(const FieldDescriptor* field); 437 438 // The elements of the given repeated field will be treated as a map for 439 // diffing purposes, with |key| being the map key. Thus, elements with the 440 // same key will be compared even if they do not appear at the same index. 441 // Differences are reported similarly to TreatAsSet(), except that 442 // ReportModified() is used to report elements with the same key but 443 // different values. Note that if an element is both moved and modified, 444 // only ReportModified() will be called. As with TreatAsSet, if the only 445 // differences between the compared messages is that some fields have been 446 // moved, then the comparison returns true. See TreatAsSet for notes on 447 // performance. 448 // 449 // REQUIRES: field->is_repeated() 450 // REQUIRES: field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE 451 // REQUIRES: key->containing_type() == field->message_type() 452 void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key); 453 // Same as TreatAsMap except that this method will use multiple fields as 454 // the key in comparison. All specified fields in 'key_fields' should be 455 // present in the compared elements. Two elements will be treated as having 456 // the same key iff they have the same value for every specified field. There 457 // are two steps in the comparison process. The first one is key matching. 458 // Every element from one message will be compared to every element from 459 // the other message. Only fields in 'key_fields' are compared in this step 460 // to decide if two elements have the same key. The second step is value 461 // comparison. Those pairs of elements with the same key (with equal value 462 // for every field in 'key_fields') will be compared in this step. 463 // Time complexity of the first step is O(s * m * n ^ 2) where s is the 464 // average size of the fields specified in 'key_fields', m is the number of 465 // fields in 'key_fields' and n is the number of elements. If partial 466 // matching is enabled, an extra O(n^3) will be incured by the maximum 467 // matching algorithm. The second step is O(k * n) where k is the average 468 // size of each element. 469 void TreatAsMapWithMultipleFieldsAsKey( 470 const FieldDescriptor* field, 471 const std::vector<const FieldDescriptor*>& key_fields); 472 // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field 473 // do not necessarily need to be a direct subfield. Each element in 474 // key_field_paths indicate a path from the message being compared, listing 475 // successive subfield to reach the key field. 476 // 477 // REQUIRES: 478 // for key_field_path in key_field_paths: 479 // key_field_path[0]->containing_type() == field->message_type() 480 // for i in [0, key_field_path.size() - 1): 481 // key_field_path[i+1]->containing_type() == 482 // key_field_path[i]->message_type() 483 // key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE 484 // !key_field_path[i]->is_repeated() 485 void TreatAsMapWithMultipleFieldPathsAsKey( 486 const FieldDescriptor* field, 487 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths); 488 489 // Uses a custom MapKeyComparator to determine if two elements have the same 490 // key when comparing a repeated field as a map. 491 // The caller is responsible to delete the key_comparator. 492 // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the 493 // first key matching step. Rather than comparing some specified fields, it 494 // will invoke the IsMatch method of the given 'key_comparator' to decide if 495 // two elements have the same key. 496 void TreatAsMapUsingKeyComparator(const FieldDescriptor* field, 497 const MapKeyComparator* key_comparator); 498 499 // Initiates and returns a new instance of MultipleFieldsMapKeyComparator. 500 MapKeyComparator* CreateMultipleFieldsMapKeyComparator( 501 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths); 502 503 // Add a custom ignore criteria that is evaluated in addition to the 504 // ignored fields added with IgnoreField. 505 // Takes ownership of ignore_criteria. 506 void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria); 507 508 // Indicates that any field with the given descriptor should be 509 // ignored for the purposes of comparing two messages. This applies 510 // to fields nested in the message structure as well as top level 511 // ones. When the MessageDifferencer encounters an ignored field, 512 // ReportIgnored is called on the reporter, if one is specified. 513 // 514 // The only place where the field's 'ignored' status is not applied is when 515 // it is being used as a key in a field passed to TreatAsMap or is one of 516 // the fields passed to TreatAsMapWithMultipleFieldsAsKey. 517 // In this case it is compared in key matching but after that it's ignored 518 // in value comparison. 519 void IgnoreField(const FieldDescriptor* field); 520 521 // Sets the field comparator used to determine differences between protocol 522 // buffer fields. By default it's set to a DefaultFieldComparator instance. 523 // MessageDifferencer doesn't take ownership over the passed object. 524 // Note that this method must be called before Compare for the comparator to 525 // be used. 526 void set_field_comparator(FieldComparator* comparator); 527 #ifdef PROTOBUF_FUTURE_BREAKING_CHANGES 528 void set_field_comparator(DefaultFieldComparator* comparator); 529 #endif // PROTOBUF_FUTURE_BREAKING_CHANGES 530 531 // DEPRECATED. Pass a DefaultFieldComparator instance instead. 532 // Sets the fraction and margin for the float comparison of a given field. 533 // Uses MathUtil::WithinFractionOrMargin to compare the values. 534 // NOTE: this method does nothing if differencer's field comparator has been 535 // set to a custom object. 536 // 537 // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or 538 // field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT 539 // REQUIRES: float_comparison_ == APPROXIMATE 540 void SetFractionAndMargin(const FieldDescriptor* field, double fraction, 541 double margin); 542 543 // Sets the type of comparison (as defined in the MessageFieldComparison 544 // enumeration above) that is used by this differencer when determining how 545 // to compare fields in messages. 546 void set_message_field_comparison(MessageFieldComparison comparison); 547 548 // Returns the current message field comparison used in this differencer. 549 MessageFieldComparison message_field_comparison() const; 550 551 // Tells the differencer whether or not to report matches. This method must 552 // be called before Compare. The default for a new differencer is false. set_report_matches(bool report_matches)553 void set_report_matches(bool report_matches) { 554 report_matches_ = report_matches; 555 } 556 557 // Tells the differencer whether or not to report moves (in a set or map 558 // repeated field). This method must be called before Compare. The default for 559 // a new differencer is true. set_report_moves(bool report_moves)560 void set_report_moves(bool report_moves) { report_moves_ = report_moves; } 561 562 // Tells the differencer whether or not to report ignored values. This method 563 // must be called before Compare. The default for a new differencer is true. set_report_ignores(bool report_ignores)564 void set_report_ignores(bool report_ignores) { 565 report_ignores_ = report_ignores; 566 } 567 568 // Sets the scope of the comparison (as defined in the Scope enumeration 569 // above) that is used by this differencer when determining which fields to 570 // compare between the messages. 571 void set_scope(Scope scope); 572 573 // Returns the current scope used by this differencer. 574 Scope scope() const; 575 576 // DEPRECATED. Pass a DefaultFieldComparator instance instead. 577 // Sets the type of comparison (as defined in the FloatComparison enumeration 578 // above) that is used by this differencer when comparing float (and double) 579 // fields in messages. 580 // NOTE: this method does nothing if differencer's field comparator has been 581 // set to a custom object. 582 void set_float_comparison(FloatComparison comparison); 583 584 // Sets the type of comparison for repeated field (as defined in the 585 // RepeatedFieldComparison enumeration above) that is used by this 586 // differencer when compare repeated fields in messages. 587 void set_repeated_field_comparison(RepeatedFieldComparison comparison); 588 589 // Returns the current repeated field comparison used by this differencer. 590 RepeatedFieldComparison repeated_field_comparison() const; 591 592 // Compares the two specified messages, returning true if they are the same, 593 // false otherwise. If this method returns false, any changes between the 594 // two messages will be reported if a Reporter was specified via 595 // ReportDifferencesTo (see also ReportDifferencesToString). 596 // 597 // This method REQUIRES that the two messages have the same 598 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 599 bool Compare(const Message& message1, const Message& message2); 600 601 // Same as above, except comparing only the list of fields specified by the 602 // two vectors of FieldDescriptors. 603 bool CompareWithFields( 604 const Message& message1, const Message& message2, 605 const std::vector<const FieldDescriptor*>& message1_fields, 606 const std::vector<const FieldDescriptor*>& message2_fields); 607 608 // Automatically creates a reporter that will output the differences 609 // found (if any) to the specified output string pointer. Note that this 610 // method must be called before Compare. 611 void ReportDifferencesToString(std::string* output); 612 613 // Tells the MessageDifferencer to report differences via the specified 614 // reporter. Note that this method must be called before Compare for 615 // the reporter to be used. It is the responsibility of the caller to delete 616 // this object. 617 // If the provided pointer equals NULL, the MessageDifferencer stops reporting 618 // differences to any previously set reporters or output strings. 619 void ReportDifferencesTo(Reporter* reporter); 620 621 private: 622 // Class for processing Any deserialization. This logic is used by both the 623 // MessageDifferencer and StreamReporter classes. 624 class UnpackAnyField { 625 private: 626 std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_; 627 628 public: 629 UnpackAnyField() = default; 630 ~UnpackAnyField() = default; 631 // If "any" is of type google.protobuf.Any, extract its payload using 632 // DynamicMessageFactory and store in "data". 633 bool UnpackAny(const Message& any, std::unique_ptr<Message>* data); 634 }; 635 636 public: 637 // An implementation of the MessageDifferencer Reporter that outputs 638 // any differences found in human-readable form to the supplied 639 // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter 640 // *must* be '$'. 641 // 642 // WARNING: this reporter does not necessarily flush its output until it is 643 // destroyed. As a result, it is not safe to assume the output is valid or 644 // complete until after you destroy the reporter. For example, if you use a 645 // StreamReporter to write to a StringOutputStream, the target string may 646 // contain uninitialized data until the reporter is destroyed. 647 class PROTOBUF_EXPORT StreamReporter : public Reporter { 648 public: 649 explicit StreamReporter(io::ZeroCopyOutputStream* output); 650 explicit StreamReporter(io::Printer* printer); // delimiter '$' 651 ~StreamReporter() override; 652 653 // When set to true, the stream reporter will also output aggregates nodes 654 // (i.e. messages and groups) whose subfields have been modified. When 655 // false, will only report the individual subfields. Defaults to false. set_report_modified_aggregates(bool report)656 void set_report_modified_aggregates(bool report) { 657 report_modified_aggregates_ = report; 658 } 659 660 // The following are implementations of the methods described above. 661 662 void ReportAdded(const Message& message1, const Message& message2, 663 const std::vector<SpecificField>& field_path) override; 664 665 void ReportDeleted(const Message& message1, const Message& message2, 666 const std::vector<SpecificField>& field_path) override; 667 668 void ReportModified(const Message& message1, const Message& message2, 669 const std::vector<SpecificField>& field_path) override; 670 671 void ReportMoved(const Message& message1, const Message& message2, 672 const std::vector<SpecificField>& field_path) override; 673 674 void ReportMatched(const Message& message1, const Message& message2, 675 const std::vector<SpecificField>& field_path) override; 676 677 void ReportIgnored(const Message& message1, const Message& message2, 678 const std::vector<SpecificField>& field_path) override; 679 680 void ReportUnknownFieldIgnored( 681 const Message& message1, const Message& message2, 682 const std::vector<SpecificField>& field_path) override; 683 684 // Messages that are being compared must be provided to StreamReporter prior 685 // to processing 686 void SetMessages(const Message& message1, const Message& message2); 687 688 protected: 689 // Prints the specified path of fields to the buffer. 690 virtual void PrintPath(const std::vector<SpecificField>& field_path, 691 bool left_side); 692 693 // Prints the value of fields to the buffer. left_side is true if the 694 // given message is from the left side of the comparison, false if it 695 // was the right. This is relevant only to decide whether to follow 696 // unknown_field_index1 or unknown_field_index2 when an unknown field 697 // is encountered in field_path. 698 virtual void PrintValue(const Message& message, 699 const std::vector<SpecificField>& field_path, 700 bool left_side); 701 702 // Prints the specified path of unknown fields to the buffer. 703 virtual void PrintUnknownFieldValue(const UnknownField* unknown_field); 704 705 // Just print a string 706 void Print(const std::string& str); 707 708 private: 709 // helper function for PrintPath that contains logic for printing maps 710 void PrintMapKey(bool left_side, const SpecificField& specific_field); 711 712 io::Printer* printer_; 713 bool delete_printer_; 714 bool report_modified_aggregates_; 715 const Message* message1_; 716 const Message* message2_; 717 MessageDifferencer::UnpackAnyField unpack_any_field_; 718 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter); 719 }; 720 721 private: 722 friend class SimpleFieldComparator; 723 724 // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator. 725 // Implementation of this class needs to do field value comparison which 726 // relies on some private methods of MessageDifferencer. That's why this 727 // class is declared as a nested class of MessageDifferencer. 728 class MultipleFieldsMapKeyComparator; 729 730 // A MapKeyComparator for use with map_entries. 731 class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator { 732 public: 733 explicit MapEntryKeyComparator(MessageDifferencer* message_differencer); 734 bool IsMatch( 735 const Message& message1, const Message& message2, 736 const std::vector<SpecificField>& parent_fields) const override; 737 738 private: 739 MessageDifferencer* message_differencer_; 740 }; 741 742 // Returns true if field1's number() is less than field2's. 743 static bool FieldBefore(const FieldDescriptor* field1, 744 const FieldDescriptor* field2); 745 746 // Retrieve all the set fields, including extensions. 747 FieldDescriptorArray RetrieveFields(const Message& message, 748 bool base_message); 749 750 // Combine the two lists of fields into the combined_fields output vector. 751 // All fields present in both lists will always be included in the combined 752 // list. Fields only present in one of the lists will only appear in the 753 // combined list if the corresponding fields_scope option is set to FULL. 754 FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1, 755 Scope fields1_scope, 756 const FieldDescriptorArray& fields2, 757 Scope fields2_scope); 758 759 // Internal version of the Compare method which performs the actual 760 // comparison. The parent_fields vector is a vector containing field 761 // descriptors of all fields accessed to get to this comparison operation 762 // (i.e. if the current message is an embedded message, the parent_fields 763 // vector will contain the field that has this embedded message). 764 bool Compare(const Message& message1, const Message& message2, 765 std::vector<SpecificField>* parent_fields); 766 767 // Compares all the unknown fields in two messages. 768 bool CompareUnknownFields(const Message& message1, const Message& message2, 769 const UnknownFieldSet&, const UnknownFieldSet&, 770 std::vector<SpecificField>* parent_fields); 771 772 // Compares the specified messages for the requested field lists. The field 773 // lists are modified depending on comparison settings, and then passed to 774 // CompareWithFieldsInternal. 775 bool CompareRequestedFieldsUsingSettings( 776 const Message& message1, const Message& message2, 777 const FieldDescriptorArray& message1_fields, 778 const FieldDescriptorArray& message2_fields, 779 std::vector<SpecificField>* parent_fields); 780 781 // Compares the specified messages with the specified field lists. 782 bool CompareWithFieldsInternal(const Message& message1, 783 const Message& message2, 784 const FieldDescriptorArray& message1_fields, 785 const FieldDescriptorArray& message2_fields, 786 std::vector<SpecificField>* parent_fields); 787 788 // Compares the repeated fields, and report the error. 789 bool CompareRepeatedField(const Message& message1, const Message& message2, 790 const FieldDescriptor* field, 791 std::vector<SpecificField>* parent_fields); 792 793 // Compares map fields, and report the error. 794 bool CompareMapField(const Message& message1, const Message& message2, 795 const FieldDescriptor* field, 796 std::vector<SpecificField>* parent_fields); 797 798 // Helper for CompareRepeatedField and CompareMapField: compares and reports 799 // differences element-wise. This is the implementation for non-map fields, 800 // and can also compare map fields by using the underlying representation. 801 bool CompareRepeatedRep(const Message& message1, const Message& message2, 802 const FieldDescriptor* field, 803 std::vector<SpecificField>* parent_fields); 804 805 // Helper for CompareMapField: compare the map fields using map reflection 806 // instead of sync to repeated. 807 bool CompareMapFieldByMapReflection(const Message& message1, 808 const Message& message2, 809 const FieldDescriptor* field, 810 std::vector<SpecificField>* parent_fields, 811 DefaultFieldComparator* comparator); 812 813 // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields. 814 bool CompareFieldValue(const Message& message1, const Message& message2, 815 const FieldDescriptor* field, int index1, int index2); 816 817 // Compares the specified field on the two messages, returning 818 // true if they are the same, false otherwise. For repeated fields, 819 // this method only compares the value in the specified index. This method 820 // uses Compare functions to recurse into submessages. 821 // The parent_fields vector is used in calls to a Reporter instance calls. 822 // It can be NULL, in which case the MessageDifferencer will create new 823 // list of parent messages if it needs to recursively compare the given field. 824 // To avoid confusing users you should not set it to NULL unless you modified 825 // Reporter to handle the change of parent_fields correctly. 826 bool CompareFieldValueUsingParentFields( 827 const Message& message1, const Message& message2, 828 const FieldDescriptor* field, int index1, int index2, 829 std::vector<SpecificField>* parent_fields); 830 831 // Compares the specified field on the two messages, returning comparison 832 // result, as returned by appropriate FieldComparator. 833 FieldComparator::ComparisonResult GetFieldComparisonResult( 834 const Message& message1, const Message& message2, 835 const FieldDescriptor* field, int index1, int index2, 836 const FieldContext* field_context); 837 838 // Check if the two elements in the repeated field are match to each other. 839 // if the key_comprator is NULL, this function returns true when the two 840 // elements are equal. 841 bool IsMatch(const FieldDescriptor* repeated_field, 842 const MapKeyComparator* key_comparator, const Message* message1, 843 const Message* message2, 844 const std::vector<SpecificField>& parent_fields, 845 Reporter* reporter, int index1, int index2); 846 847 // Returns true when this repeated field has been configured to be treated 848 // as a Set / SmartSet / SmartList. 849 bool IsTreatedAsSet(const FieldDescriptor* field); 850 bool IsTreatedAsSmartSet(const FieldDescriptor* field); 851 852 bool IsTreatedAsSmartList(const FieldDescriptor* field); 853 // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList 854 // by default to find the longest matching sequence from the first matching 855 // element. The callback takes two vectors showing the matching indices from 856 // the other vector, where -1 means an unmatch. 857 void SetMatchIndicesForSmartListCallback( 858 std::function<void(std::vector<int>*, std::vector<int>*)> callback); 859 860 // Returns true when this repeated field is to be compared as a subset, ie. 861 // has been configured to be treated as a set or map and scope is set to 862 // PARTIAL. 863 bool IsTreatedAsSubset(const FieldDescriptor* field); 864 865 // Returns true if this field is to be ignored when this 866 // MessageDifferencer compares messages. 867 bool IsIgnored(const Message& message1, const Message& message2, 868 const FieldDescriptor* field, 869 const std::vector<SpecificField>& parent_fields); 870 871 // Returns true if this unknown field is to be ignored when this 872 // MessageDifferencer compares messages. 873 bool IsUnknownFieldIgnored(const Message& message1, const Message& message2, 874 const SpecificField& field, 875 const std::vector<SpecificField>& parent_fields); 876 877 // Returns MapKeyComparator* when this field has been configured to be treated 878 // as a map or its is_map() return true. If not, returns NULL. 879 const MapKeyComparator* GetMapKeyComparator( 880 const FieldDescriptor* field) const; 881 882 // Attempts to match indices of a repeated field, so that the contained values 883 // match. Clears output vectors and sets their values to indices of paired 884 // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1 885 // and match_list2[1] == 0. The unmatched indices are indicated by -1. 886 // Assumes the repeated field is not treated as a simple list. 887 // This method returns false if the match failed. However, it doesn't mean 888 // that the comparison succeeds when this method returns true (you need to 889 // double-check in this case). 890 bool MatchRepeatedFieldIndices( 891 const Message& message1, const Message& message2, 892 const FieldDescriptor* repeated_field, 893 const MapKeyComparator* key_comparator, 894 const std::vector<SpecificField>& parent_fields, 895 std::vector<int>* match_list1, std::vector<int>* match_list2); 896 897 // Checks if index is equal to new_index in all the specific fields. 898 static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields); 899 900 // CHECKs that the given repeated field can be compared according to 901 // new_comparison. 902 void CheckRepeatedFieldComparisons( 903 const FieldDescriptor* field, 904 const RepeatedFieldComparison& new_comparison); 905 906 // Defines a map between field descriptors and their MapKeyComparators. 907 // Used for repeated fields when they are configured as TreatAsMap. 908 typedef std::map<const FieldDescriptor*, const MapKeyComparator*> 909 FieldKeyComparatorMap; 910 911 // Defines a set to store field descriptors. Used for repeated fields when 912 // they are configured as TreatAsSet. 913 typedef std::set<const FieldDescriptor*> FieldSet; 914 typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap; 915 916 Reporter* reporter_; 917 DefaultFieldComparator default_field_comparator_; 918 MessageFieldComparison message_field_comparison_; 919 Scope scope_; 920 RepeatedFieldComparison repeated_field_comparison_; 921 922 FieldMap repeated_field_comparisons_; 923 // Keeps track of MapKeyComparators that are created within 924 // MessageDifferencer. These MapKeyComparators should be deleted 925 // before MessageDifferencer is destroyed. 926 // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't 927 // store the supplied FieldDescriptors directly. Instead, a new 928 // MapKeyComparator is created for comparison purpose. 929 std::vector<MapKeyComparator*> owned_key_comparators_; 930 FieldKeyComparatorMap map_field_key_comparator_; 931 MapEntryKeyComparator map_entry_key_comparator_; 932 std::vector<IgnoreCriteria*> ignore_criteria_; 933 // Reused multiple times in RetrieveFields to avoid extra allocations 934 std::vector<const FieldDescriptor*> tmp_message_fields_; 935 936 FieldSet ignored_fields_; 937 938 union { 939 DefaultFieldComparator* default_impl; 940 FieldComparator* base; 941 } field_comparator_ = {&default_field_comparator_}; 942 enum { kFCDefault, kFCBase } field_comparator_kind_ = kFCDefault; 943 944 bool report_matches_; 945 bool report_moves_; 946 bool report_ignores_; 947 948 std::string* output_string_; 949 950 // Callback to post-process the matched indices to support SMART_LIST. 951 std::function<void(std::vector<int>*, std::vector<int>*)> 952 match_indices_for_smart_list_callback_; 953 954 MessageDifferencer::UnpackAnyField unpack_any_field_; 955 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer); 956 }; 957 958 // This class provides extra information to the FieldComparator::Compare 959 // function. 960 class PROTOBUF_EXPORT FieldContext { 961 public: FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)962 explicit FieldContext( 963 std::vector<MessageDifferencer::SpecificField>* parent_fields) 964 : parent_fields_(parent_fields) {} 965 parent_fields()966 std::vector<MessageDifferencer::SpecificField>* parent_fields() const { 967 return parent_fields_; 968 } 969 970 private: 971 std::vector<MessageDifferencer::SpecificField>* parent_fields_; 972 }; 973 974 } // namespace util 975 } // namespace protobuf 976 } // namespace google 977 978 #include <google/protobuf/port_undef.inc> 979 980 #endif // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 981