1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // https://developers.google.com/protocol-buffers/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: jschorr@google.com (Joseph Schorr) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // This file defines static methods and classes for comparing Protocol 36 // Messages. 37 // 38 // Aug. 2008: Added Unknown Fields Comparison for messages. 39 // Aug. 2009: Added different options to compare repeated fields. 40 // Apr. 2010: Moved field comparison to FieldComparator. 41 42 #ifndef GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 43 #define GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 44 45 #include <functional> 46 #include <map> 47 #include <set> 48 #include <string> 49 #include <vector> 50 51 #include <google/protobuf/descriptor.h> // FieldDescriptor 52 #include <google/protobuf/message.h> // Message 53 #include <google/protobuf/unknown_field_set.h> 54 #include <google/protobuf/util/field_comparator.h> 55 56 // Always include as last one, otherwise it can break compilation 57 #include <google/protobuf/port_def.inc> 58 59 namespace google { 60 namespace protobuf { 61 62 class DynamicMessageFactory; 63 class FieldDescriptor; 64 65 namespace io { 66 class ZeroCopyOutputStream; 67 class Printer; 68 } // namespace io 69 70 namespace util { 71 72 class DefaultFieldComparator; 73 class FieldContext; // declared below MessageDifferencer 74 75 // Defines a collection of field descriptors. 76 // In case of internal google codebase we are using absl::FixedArray instead 77 // of vector. It significantly speeds up proto comparison (by ~30%) by 78 // reducing the number of malloc/free operations 79 typedef std::vector<const FieldDescriptor*> FieldDescriptorArray; 80 81 // A basic differencer that can be used to determine 82 // the differences between two specified Protocol Messages. If any differences 83 // are found, the Compare method will return false, and any differencer reporter 84 // specified via ReportDifferencesTo will have its reporting methods called (see 85 // below for implementation of the report). Based off of the original 86 // ProtocolDifferencer implementation in //net/proto/protocol-differencer.h 87 // (Thanks Todd!). 88 // 89 // MessageDifferencer REQUIRES that compared messages be the same type, defined 90 // as messages that share the same descriptor. If not, the behavior of this 91 // class is undefined. 92 // 93 // People disagree on what MessageDifferencer should do when asked to compare 94 // messages with different descriptors. Some people think it should always 95 // return false. Others expect it to try to look for similar fields and 96 // compare them anyway -- especially if the descriptors happen to be identical. 97 // If we chose either of these behaviors, some set of people would find it 98 // surprising, and could end up writing code expecting the other behavior 99 // without realizing their error. Therefore, we forbid that usage. 100 // 101 // This class is implemented based on the proto2 reflection. The performance 102 // should be good enough for normal usages. However, for places where the 103 // performance is extremely sensitive, there are several alternatives: 104 // - Comparing serialized string 105 // Downside: false negatives (there are messages that are the same but their 106 // serialized strings are different). 107 // - Equals code generator by compiler plugin (net/proto2/contrib/equals_plugin) 108 // Downside: more generated code; maintenance overhead for the additional rule 109 // (must be in sync with the original proto_library). 110 // 111 // Note on handling of google.protobuf.Any: MessageDifferencer automatically 112 // unpacks Any::value into a Message and compares its individual fields. 113 // Messages encoded in a repeated Any cannot be compared using TreatAsMap. 114 // 115 // Note on thread-safety: MessageDifferencer is *not* thread-safe. You need to 116 // guard it with a lock to use the same MessageDifferencer instance from 117 // multiple threads. Note that it's fine to call static comparison methods 118 // (like MessageDifferencer::Equals) concurrently, but it's not recommended for 119 // performance critical code as it leads to extra allocations. 120 class PROTOBUF_EXPORT MessageDifferencer { 121 public: 122 // Determines whether the supplied messages are equal. Equality is defined as 123 // all fields within the two messages being set to the same value. Primitive 124 // fields and strings are compared by value while embedded messages/groups 125 // are compared as if via a recursive call. Use Compare() with IgnoreField() 126 // if some fields should be ignored in the comparison. Use Compare() with 127 // TreatAsSet() if there are repeated fields where ordering does not matter. 128 // 129 // This method REQUIRES that the two messages have the same 130 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 131 static bool Equals(const Message& message1, const Message& message2); 132 133 // Determines whether the supplied messages are equivalent. Equivalency is 134 // defined as all fields within the two messages having the same value. This 135 // differs from the Equals method above in that fields with default values 136 // are considered set to said value automatically. For details on how default 137 // values are defined for each field type, see: 138 // https://developers.google.com/protocol-buffers/docs/proto?csw=1#optional. 139 // Also, Equivalent() ignores unknown fields. Use IgnoreField() and Compare() 140 // if some fields should be ignored in the comparison. 141 // 142 // This method REQUIRES that the two messages have the same 143 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 144 static bool Equivalent(const Message& message1, const Message& message2); 145 146 // Determines whether the supplied messages are approximately equal. 147 // Approximate equality is defined as all fields within the two messages 148 // being approximately equal. Primitive (non-float) fields and strings are 149 // compared by value, floats are compared using MathUtil::AlmostEquals() and 150 // embedded messages/groups are compared as if via a recursive call. Use 151 // IgnoreField() and Compare() if some fields should be ignored in the 152 // comparison. 153 // 154 // This method REQUIRES that the two messages have the same 155 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 156 static bool ApproximatelyEquals(const Message& message1, 157 const Message& message2); 158 159 // Determines whether the supplied messages are approximately equivalent. 160 // Approximate equivalency is defined as all fields within the two messages 161 // being approximately equivalent. As in 162 // MessageDifferencer::ApproximatelyEquals, primitive (non-float) fields and 163 // strings are compared by value, floats are compared using 164 // MathUtil::AlmostEquals() and embedded messages/groups are compared as if 165 // via a recursive call. However, fields with default values are considered 166 // set to said value, as per MessageDiffencer::Equivalent. Use IgnoreField() 167 // and Compare() if some fields should be ignored in the comparison. 168 // 169 // This method REQUIRES that the two messages have the same 170 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 171 static bool ApproximatelyEquivalent(const Message& message1, 172 const Message& message2); 173 174 // Identifies an individual field in a message instance. Used for field_path, 175 // below. 176 struct SpecificField { 177 // For known fields, "field" is filled in and "unknown_field_number" is -1. 178 // For unknown fields, "field" is NULL, "unknown_field_number" is the field 179 // number, and "unknown_field_type" is its type. 180 const FieldDescriptor* field; 181 int unknown_field_number; 182 UnknownField::Type unknown_field_type; 183 184 // If this a repeated field, "index" is the index within it. For unknown 185 // fields, this is the index of the field among all unknown fields of the 186 // same field number and type. 187 int index; 188 189 // If "field" is a repeated field which is being treated as a map or 190 // a set (see TreatAsMap() and TreatAsSet(), below), new_index indicates 191 // the index the position to which the element has moved. If the element 192 // has not moved, "new_index" will have the same value as "index". 193 int new_index; 194 195 // For unknown fields, these are the pointers to the UnknownFieldSet 196 // containing the unknown fields. In certain cases (e.g. proto1's 197 // MessageSet, or nested groups of unknown fields), these may differ from 198 // the messages' internal UnknownFieldSets. 199 const UnknownFieldSet* unknown_field_set1; 200 const UnknownFieldSet* unknown_field_set2; 201 202 // For unknown fields, these are the index of the field within the 203 // UnknownFieldSets. One or the other will be -1 when 204 // reporting an addition or deletion. 205 int unknown_field_index1; 206 int unknown_field_index2; 207 SpecificFieldSpecificField208 SpecificField() 209 : field(NULL), 210 unknown_field_number(-1), 211 index(-1), 212 new_index(-1), 213 unknown_field_set1(NULL), 214 unknown_field_set2(NULL), 215 unknown_field_index1(-1), 216 unknown_field_index2(-1) {} 217 }; 218 219 // Abstract base class from which all MessageDifferencer 220 // reporters derive. The five Report* methods below will be called when 221 // a field has been added, deleted, modified, moved, or matched. The third 222 // argument is a vector of FieldDescriptor pointers which describes the chain 223 // of fields that was taken to find the current field. For example, for a 224 // field found in an embedded message, the vector will contain two 225 // FieldDescriptors. The first will be the field of the embedded message 226 // itself and the second will be the actual field in the embedded message 227 // that was added/deleted/modified. 228 // Fields will be reported in PostTraversalOrder. 229 // For example, given following proto, if both baz and quux are changed. 230 // foo { 231 // bar { 232 // baz: 1 233 // quux: 2 234 // } 235 // } 236 // ReportModified will be invoked with following order: 237 // 1. foo.bar.baz or foo.bar.quux 238 // 2. foo.bar.quux or foo.bar.baz 239 // 2. foo.bar 240 // 3. foo 241 class PROTOBUF_EXPORT Reporter { 242 public: 243 Reporter(); 244 virtual ~Reporter(); 245 246 // Reports that a field has been added into Message2. 247 virtual void ReportAdded(const Message& message1, const Message& message2, 248 const std::vector<SpecificField>& field_path) = 0; 249 250 // Reports that a field has been deleted from Message1. 251 virtual void ReportDeleted( 252 const Message& message1, const Message& message2, 253 const std::vector<SpecificField>& field_path) = 0; 254 255 // Reports that the value of a field has been modified. 256 virtual void ReportModified( 257 const Message& message1, const Message& message2, 258 const std::vector<SpecificField>& field_path) = 0; 259 260 // Reports that a repeated field has been moved to another location. This 261 // only applies when using TreatAsSet or TreatAsMap() -- see below. Also 262 // note that for any given field, ReportModified and ReportMoved are 263 // mutually exclusive. If a field has been both moved and modified, then 264 // only ReportModified will be called. ReportMoved(const Message &,const Message &,const std::vector<SpecificField> &)265 virtual void ReportMoved( 266 const Message& /* message1 */, const Message& /* message2 */, 267 const std::vector<SpecificField>& /* field_path */) {} 268 269 // Reports that two fields match. Useful for doing side-by-side diffs. 270 // This function is mutually exclusive with ReportModified and ReportMoved. 271 // Note that you must call set_report_matches(true) before calling Compare 272 // to make use of this function. ReportMatched(const Message &,const Message &,const std::vector<SpecificField> &)273 virtual void ReportMatched( 274 const Message& /* message1 */, const Message& /* message2 */, 275 const std::vector<SpecificField>& /* field_path */) {} 276 277 // Reports that two fields would have been compared, but the 278 // comparison has been skipped because the field was marked as 279 // 'ignored' using IgnoreField(). This function is mutually 280 // exclusive with all the other Report() functions. 281 // 282 // The contract of ReportIgnored is slightly different than the 283 // other Report() functions, in that |field_path.back().index| is 284 // always equal to -1, even if the last field is repeated. This is 285 // because while the other Report() functions indicate where in a 286 // repeated field the action (Addition, Deletion, etc...) 287 // happened, when a repeated field is 'ignored', the differencer 288 // simply calls ReportIgnored on the repeated field as a whole and 289 // moves on without looking at its individual elements. 290 // 291 // Furthermore, ReportIgnored() does not indicate whether the 292 // fields were in fact equal or not, as Compare() does not inspect 293 // these fields at all. It is up to the Reporter to decide whether 294 // the fields are equal or not (perhaps with a second call to 295 // Compare()), if it cares. ReportIgnored(const Message &,const Message &,const std::vector<SpecificField> &)296 virtual void ReportIgnored( 297 const Message& /* message1 */, const Message& /* message2 */, 298 const std::vector<SpecificField>& /* field_path */) {} 299 300 // Report that an unknown field is ignored. (see comment above). 301 // Note this is a different function since the last SpecificField in field 302 // path has a null field. This could break existing Reporter. ReportUnknownFieldIgnored(const Message &,const Message &,const std::vector<SpecificField> &)303 virtual void ReportUnknownFieldIgnored( 304 const Message& /* message1 */, const Message& /* message2 */, 305 const std::vector<SpecificField>& /* field_path */) {} 306 307 private: 308 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Reporter); 309 }; 310 311 // MapKeyComparator is used to determine if two elements have the same key 312 // when comparing elements of a repeated field as a map. 313 class PROTOBUF_EXPORT MapKeyComparator { 314 public: 315 MapKeyComparator(); 316 virtual ~MapKeyComparator(); 317 IsMatch(const Message &,const Message &,const std::vector<SpecificField> &)318 virtual bool IsMatch( 319 const Message& /* message1 */, const Message& /* message2 */, 320 const std::vector<SpecificField>& /* parent_fields */) const { 321 GOOGLE_CHECK(false) << "IsMatch() is not implemented."; 322 return false; 323 } 324 325 private: 326 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapKeyComparator); 327 }; 328 329 // Abstract base class from which all IgnoreCriteria derive. 330 // By adding IgnoreCriteria more complex ignore logic can be implemented. 331 // IgnoreCriteria are registed with AddIgnoreCriteria. For each compared 332 // field IsIgnored is called on each added IgnoreCriteria until one returns 333 // true or all return false. 334 // IsIgnored is called for fields where at least one side has a value. 335 class PROTOBUF_EXPORT IgnoreCriteria { 336 public: 337 IgnoreCriteria(); 338 virtual ~IgnoreCriteria(); 339 340 // Returns true if the field should be ignored. 341 virtual bool IsIgnored( 342 const Message& /* message1 */, const Message& /* message2 */, 343 const FieldDescriptor* /* field */, 344 const std::vector<SpecificField>& /* parent_fields */) = 0; 345 346 // Returns true if the unknown field should be ignored. 347 // Note: This will be called for unknown fields as well in which case 348 // field.field will be null. IsUnknownFieldIgnored(const Message &,const Message &,const SpecificField &,const std::vector<SpecificField> &)349 virtual bool IsUnknownFieldIgnored( 350 const Message& /* message1 */, const Message& /* message2 */, 351 const SpecificField& /* field */, 352 const std::vector<SpecificField>& /* parent_fields */) { 353 return false; 354 } 355 }; 356 357 // To add a Reporter, construct default here, then use ReportDifferencesTo or 358 // ReportDifferencesToString. 359 explicit MessageDifferencer(); 360 361 ~MessageDifferencer(); 362 363 enum MessageFieldComparison { 364 EQUAL, // Fields must be present in both messages 365 // for the messages to be considered the same. 366 EQUIVALENT, // Fields with default values are considered set 367 // for comparison purposes even if not explicitly 368 // set in the messages themselves. Unknown fields 369 // are ignored. 370 }; 371 372 enum Scope { 373 FULL, // All fields of both messages are considered in the comparison. 374 PARTIAL // Only fields present in the first message are considered; fields 375 // set only in the second message will be skipped during 376 // comparison. 377 }; 378 379 // DEPRECATED. Use FieldComparator::FloatComparison instead. 380 enum FloatComparison { 381 EXACT, // Floats and doubles are compared exactly. 382 APPROXIMATE // Floats and doubles are compared using the 383 // MathUtil::AlmostEquals method. 384 }; 385 386 enum RepeatedFieldComparison { 387 AS_LIST, // Repeated fields are compared in order. Differing values at 388 // the same index are reported using ReportModified(). If the 389 // repeated fields have different numbers of elements, the 390 // unpaired elements are reported using ReportAdded() or 391 // ReportDeleted(). 392 AS_SET, // Treat all the repeated fields as sets. 393 // See TreatAsSet(), as below. 394 AS_SMART_LIST, // Similar to AS_SET, but preserve the order and find the 395 // longest matching sequence from the first matching 396 // element. To use an optimal solution, call 397 // SetMatchIndicesForSmartListCallback() to pass it in. 398 AS_SMART_SET, // Similar to AS_SET, but match elements with fewest diffs. 399 }; 400 401 // The elements of the given repeated field will be treated as a set for 402 // diffing purposes, so different orderings of the same elements will be 403 // considered equal. Elements which are present on both sides of the 404 // comparison but which have changed position will be reported with 405 // ReportMoved(). Elements which only exist on one side or the other are 406 // reported with ReportAdded() and ReportDeleted() regardless of their 407 // positions. ReportModified() is never used for this repeated field. If 408 // the only differences between the compared messages is that some fields 409 // have been moved, then the comparison returns true. 410 // 411 // Note that despite the name of this method, this is really 412 // comparison as multisets: if one side of the comparison has a duplicate 413 // in the repeated field but the other side doesn't, this will count as 414 // a mismatch. 415 // 416 // If the scope of comparison is set to PARTIAL, then in addition to what's 417 // above, extra values added to repeated fields of the second message will 418 // not cause the comparison to fail. 419 // 420 // Note that set comparison is currently O(k * n^2) (where n is the total 421 // number of elements, and k is the average size of each element). In theory 422 // it could be made O(n * k) with a more complex hashing implementation. Feel 423 // free to contribute one if the current implementation is too slow for you. 424 // If partial matching is also enabled, the time complexity will be O(k * n^2 425 // + n^3) in which n^3 is the time complexity of the maximum matching 426 // algorithm. 427 // 428 // REQUIRES: field->is_repeated() and field not registered with TreatAsList 429 void TreatAsSet(const FieldDescriptor* field); 430 void TreatAsSmartSet(const FieldDescriptor* field); 431 432 // The elements of the given repeated field will be treated as a list for 433 // diffing purposes, so different orderings of the same elements will NOT be 434 // considered equal. 435 // 436 // REQUIRED: field->is_repeated() and field not registered with TreatAsSet 437 void TreatAsList(const FieldDescriptor* field); 438 // Note that the complexity is similar to treating as SET. 439 void TreatAsSmartList(const FieldDescriptor* field); 440 441 // The elements of the given repeated field will be treated as a map for 442 // diffing purposes, with |key| being the map key. Thus, elements with the 443 // same key will be compared even if they do not appear at the same index. 444 // Differences are reported similarly to TreatAsSet(), except that 445 // ReportModified() is used to report elements with the same key but 446 // different values. Note that if an element is both moved and modified, 447 // only ReportModified() will be called. As with TreatAsSet, if the only 448 // differences between the compared messages is that some fields have been 449 // moved, then the comparison returns true. See TreatAsSet for notes on 450 // performance. 451 // 452 // REQUIRES: field->is_repeated() 453 // REQUIRES: field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE 454 // REQUIRES: key->containing_type() == field->message_type() 455 void TreatAsMap(const FieldDescriptor* field, const FieldDescriptor* key); 456 // Same as TreatAsMap except that this method will use multiple fields as 457 // the key in comparison. All specified fields in 'key_fields' should be 458 // present in the compared elements. Two elements will be treated as having 459 // the same key iff they have the same value for every specified field. There 460 // are two steps in the comparison process. The first one is key matching. 461 // Every element from one message will be compared to every element from 462 // the other message. Only fields in 'key_fields' are compared in this step 463 // to decide if two elements have the same key. The second step is value 464 // comparison. Those pairs of elements with the same key (with equal value 465 // for every field in 'key_fields') will be compared in this step. 466 // Time complexity of the first step is O(s * m * n ^ 2) where s is the 467 // average size of the fields specified in 'key_fields', m is the number of 468 // fields in 'key_fields' and n is the number of elements. If partial 469 // matching is enabled, an extra O(n^3) will be incured by the maximum 470 // matching algorithm. The second step is O(k * n) where k is the average 471 // size of each element. 472 void TreatAsMapWithMultipleFieldsAsKey( 473 const FieldDescriptor* field, 474 const std::vector<const FieldDescriptor*>& key_fields); 475 // Same as TreatAsMapWithMultipleFieldsAsKey, except that each of the field 476 // do not necessarily need to be a direct subfield. Each element in 477 // key_field_paths indicate a path from the message being compared, listing 478 // successive subfield to reach the key field. 479 // 480 // REQUIRES: 481 // for key_field_path in key_field_paths: 482 // key_field_path[0]->containing_type() == field->message_type() 483 // for i in [0, key_field_path.size() - 1): 484 // key_field_path[i+1]->containing_type() == 485 // key_field_path[i]->message_type() 486 // key_field_path[i]->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE 487 // !key_field_path[i]->is_repeated() 488 void TreatAsMapWithMultipleFieldPathsAsKey( 489 const FieldDescriptor* field, 490 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths); 491 492 // Uses a custom MapKeyComparator to determine if two elements have the same 493 // key when comparing a repeated field as a map. 494 // The caller is responsible to delete the key_comparator. 495 // This method varies from TreatAsMapWithMultipleFieldsAsKey only in the 496 // first key matching step. Rather than comparing some specified fields, it 497 // will invoke the IsMatch method of the given 'key_comparator' to decide if 498 // two elements have the same key. 499 void TreatAsMapUsingKeyComparator(const FieldDescriptor* field, 500 const MapKeyComparator* key_comparator); 501 502 // Initiates and returns a new instance of MultipleFieldsMapKeyComparator. 503 MapKeyComparator* CreateMultipleFieldsMapKeyComparator( 504 const std::vector<std::vector<const FieldDescriptor*> >& key_field_paths); 505 506 // Add a custom ignore criteria that is evaluated in addition to the 507 // ignored fields added with IgnoreField. 508 // Takes ownership of ignore_criteria. 509 void AddIgnoreCriteria(IgnoreCriteria* ignore_criteria); 510 511 // Indicates that any field with the given descriptor should be 512 // ignored for the purposes of comparing two messages. This applies 513 // to fields nested in the message structure as well as top level 514 // ones. When the MessageDifferencer encounters an ignored field, 515 // ReportIgnored is called on the reporter, if one is specified. 516 // 517 // The only place where the field's 'ignored' status is not applied is when 518 // it is being used as a key in a field passed to TreatAsMap or is one of 519 // the fields passed to TreatAsMapWithMultipleFieldsAsKey. 520 // In this case it is compared in key matching but after that it's ignored 521 // in value comparison. 522 void IgnoreField(const FieldDescriptor* field); 523 524 // Sets the field comparator used to determine differences between protocol 525 // buffer fields. By default it's set to a DefaultFieldComparator instance. 526 // MessageDifferencer doesn't take ownership over the passed object. 527 // Note that this method must be called before Compare for the comparator to 528 // be used. 529 void set_field_comparator(FieldComparator* comparator); 530 531 // DEPRECATED. Pass a DefaultFieldComparator instance instead. 532 // Sets the fraction and margin for the float comparison of a given field. 533 // Uses MathUtil::WithinFractionOrMargin to compare the values. 534 // NOTE: this method does nothing if differencer's field comparator has been 535 // set to a custom object. 536 // 537 // REQUIRES: field->cpp_type == FieldDescriptor::CPPTYPE_DOUBLE or 538 // field->cpp_type == FieldDescriptor::CPPTYPE_FLOAT 539 // REQUIRES: float_comparison_ == APPROXIMATE 540 void SetFractionAndMargin(const FieldDescriptor* field, double fraction, 541 double margin); 542 543 // Sets the type of comparison (as defined in the MessageFieldComparison 544 // enumeration above) that is used by this differencer when determining how 545 // to compare fields in messages. 546 void set_message_field_comparison(MessageFieldComparison comparison); 547 548 // Tells the differencer whether or not to report matches. This method must 549 // be called before Compare. The default for a new differencer is false. set_report_matches(bool report_matches)550 void set_report_matches(bool report_matches) { 551 report_matches_ = report_matches; 552 } 553 554 // Tells the differencer whether or not to report moves (in a set or map 555 // repeated field). This method must be called before Compare. The default for 556 // a new differencer is true. set_report_moves(bool report_moves)557 void set_report_moves(bool report_moves) { report_moves_ = report_moves; } 558 559 // Tells the differencer whether or not to report ignored values. This method 560 // must be called before Compare. The default for a new differencer is true. set_report_ignores(bool report_ignores)561 void set_report_ignores(bool report_ignores) { 562 report_ignores_ = report_ignores; 563 } 564 565 // Sets the scope of the comparison (as defined in the Scope enumeration 566 // above) that is used by this differencer when determining which fields to 567 // compare between the messages. 568 void set_scope(Scope scope); 569 570 // Returns the current scope used by this differencer. 571 Scope scope(); 572 573 // DEPRECATED. Pass a DefaultFieldComparator instance instead. 574 // Sets the type of comparison (as defined in the FloatComparison enumeration 575 // above) that is used by this differencer when comparing float (and double) 576 // fields in messages. 577 // NOTE: this method does nothing if differencer's field comparator has been 578 // set to a custom object. 579 void set_float_comparison(FloatComparison comparison); 580 581 // Sets the type of comparison for repeated field (as defined in the 582 // RepeatedFieldComparison enumeration above) that is used by this 583 // differencer when compare repeated fields in messages. 584 void set_repeated_field_comparison(RepeatedFieldComparison comparison); 585 586 // Returns the current repeated field comparison used by this differencer. 587 RepeatedFieldComparison repeated_field_comparison(); 588 589 // Compares the two specified messages, returning true if they are the same, 590 // false otherwise. If this method returns false, any changes between the 591 // two messages will be reported if a Reporter was specified via 592 // ReportDifferencesTo (see also ReportDifferencesToString). 593 // 594 // This method REQUIRES that the two messages have the same 595 // Descriptor (message1.GetDescriptor() == message2.GetDescriptor()). 596 bool Compare(const Message& message1, const Message& message2); 597 598 // Same as above, except comparing only the list of fields specified by the 599 // two vectors of FieldDescriptors. 600 bool CompareWithFields( 601 const Message& message1, const Message& message2, 602 const std::vector<const FieldDescriptor*>& message1_fields, 603 const std::vector<const FieldDescriptor*>& message2_fields); 604 605 // Automatically creates a reporter that will output the differences 606 // found (if any) to the specified output string pointer. Note that this 607 // method must be called before Compare. 608 void ReportDifferencesToString(std::string* output); 609 610 // Tells the MessageDifferencer to report differences via the specified 611 // reporter. Note that this method must be called before Compare for 612 // the reporter to be used. It is the responsibility of the caller to delete 613 // this object. 614 // If the provided pointer equals NULL, the MessageDifferencer stops reporting 615 // differences to any previously set reporters or output strings. 616 void ReportDifferencesTo(Reporter* reporter); 617 618 // An implementation of the MessageDifferencer Reporter that outputs 619 // any differences found in human-readable form to the supplied 620 // ZeroCopyOutputStream or Printer. If a printer is used, the delimiter 621 // *must* be '$'. 622 // 623 // WARNING: this reporter does not necessarily flush its output until it is 624 // destroyed. As a result, it is not safe to assume the output is valid or 625 // complete until after you destroy the reporter. For example, if you use a 626 // StreamReporter to write to a StringOutputStream, the target string may 627 // contain uninitialized data until the reporter is destroyed. 628 class PROTOBUF_EXPORT StreamReporter : public Reporter { 629 public: 630 explicit StreamReporter(io::ZeroCopyOutputStream* output); 631 explicit StreamReporter(io::Printer* printer); // delimiter '$' 632 ~StreamReporter() override; 633 634 // When set to true, the stream reporter will also output aggregates nodes 635 // (i.e. messages and groups) whose subfields have been modified. When 636 // false, will only report the individual subfields. Defaults to false. set_report_modified_aggregates(bool report)637 void set_report_modified_aggregates(bool report) { 638 report_modified_aggregates_ = report; 639 } 640 641 // The following are implementations of the methods described above. 642 643 void ReportAdded(const Message& message1, const Message& message2, 644 const std::vector<SpecificField>& field_path) override; 645 646 void ReportDeleted(const Message& message1, const Message& message2, 647 const std::vector<SpecificField>& field_path) override; 648 649 void ReportModified(const Message& message1, const Message& message2, 650 const std::vector<SpecificField>& field_path) override; 651 652 void ReportMoved(const Message& message1, const Message& message2, 653 const std::vector<SpecificField>& field_path) override; 654 655 void ReportMatched(const Message& message1, const Message& message2, 656 const std::vector<SpecificField>& field_path) override; 657 658 void ReportIgnored(const Message& message1, const Message& message2, 659 const std::vector<SpecificField>& field_path) override; 660 661 void ReportUnknownFieldIgnored( 662 const Message& message1, const Message& message2, 663 const std::vector<SpecificField>& field_path) override; 664 665 protected: 666 // Prints the specified path of fields to the buffer. message is used to 667 // print map keys. 668 virtual void PrintPath(const std::vector<SpecificField>& field_path, 669 bool left_side, const Message& message); 670 671 // Prints the specified path of fields to the buffer. 672 virtual void PrintPath(const std::vector<SpecificField>& field_path, 673 bool left_side); 674 675 // Prints the value of fields to the buffer. left_side is true if the 676 // given message is from the left side of the comparison, false if it 677 // was the right. This is relevant only to decide whether to follow 678 // unknown_field_index1 or unknown_field_index2 when an unknown field 679 // is encountered in field_path. 680 virtual void PrintValue(const Message& message, 681 const std::vector<SpecificField>& field_path, 682 bool left_side); 683 684 // Prints the specified path of unknown fields to the buffer. 685 virtual void PrintUnknownFieldValue(const UnknownField* unknown_field); 686 687 // Just print a string 688 void Print(const std::string& str); 689 690 private: 691 io::Printer* printer_; 692 bool delete_printer_; 693 bool report_modified_aggregates_; 694 695 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StreamReporter); 696 }; 697 698 private: 699 friend class DefaultFieldComparator; 700 701 // A MapKeyComparator to be used in TreatAsMapUsingKeyComparator. 702 // Implementation of this class needs to do field value comparison which 703 // relies on some private methods of MessageDifferencer. That's why this 704 // class is declared as a nested class of MessageDifferencer. 705 class MultipleFieldsMapKeyComparator; 706 707 // A MapKeyComparator for use with map_entries. 708 class PROTOBUF_EXPORT MapEntryKeyComparator : public MapKeyComparator { 709 public: 710 explicit MapEntryKeyComparator(MessageDifferencer* message_differencer); 711 bool IsMatch( 712 const Message& message1, const Message& message2, 713 const std::vector<SpecificField>& parent_fields) const override; 714 715 private: 716 MessageDifferencer* message_differencer_; 717 }; 718 719 // Returns true if field1's number() is less than field2's. 720 static bool FieldBefore(const FieldDescriptor* field1, 721 const FieldDescriptor* field2); 722 723 // Retrieve all the set fields, including extensions. 724 FieldDescriptorArray RetrieveFields(const Message& message, 725 bool base_message); 726 727 // Combine the two lists of fields into the combined_fields output vector. 728 // All fields present in both lists will always be included in the combined 729 // list. Fields only present in one of the lists will only appear in the 730 // combined list if the corresponding fields_scope option is set to FULL. 731 FieldDescriptorArray CombineFields(const FieldDescriptorArray& fields1, 732 Scope fields1_scope, 733 const FieldDescriptorArray& fields2, 734 Scope fields2_scope); 735 736 // Internal version of the Compare method which performs the actual 737 // comparison. The parent_fields vector is a vector containing field 738 // descriptors of all fields accessed to get to this comparison operation 739 // (i.e. if the current message is an embedded message, the parent_fields 740 // vector will contain the field that has this embedded message). 741 bool Compare(const Message& message1, const Message& message2, 742 std::vector<SpecificField>* parent_fields); 743 744 // Compares all the unknown fields in two messages. 745 bool CompareUnknownFields(const Message& message1, const Message& message2, 746 const UnknownFieldSet&, const UnknownFieldSet&, 747 std::vector<SpecificField>* parent_fields); 748 749 // Compares the specified messages for the requested field lists. The field 750 // lists are modified depending on comparison settings, and then passed to 751 // CompareWithFieldsInternal. 752 bool CompareRequestedFieldsUsingSettings( 753 const Message& message1, const Message& message2, 754 const FieldDescriptorArray& message1_fields, 755 const FieldDescriptorArray& message2_fields, 756 std::vector<SpecificField>* parent_fields); 757 758 // Compares the specified messages with the specified field lists. 759 bool CompareWithFieldsInternal(const Message& message1, 760 const Message& message2, 761 const FieldDescriptorArray& message1_fields, 762 const FieldDescriptorArray& message2_fields, 763 std::vector<SpecificField>* parent_fields); 764 765 // Compares the repeated fields, and report the error. 766 bool CompareRepeatedField(const Message& message1, const Message& message2, 767 const FieldDescriptor* field, 768 std::vector<SpecificField>* parent_fields); 769 770 // Shorthand for CompareFieldValueUsingParentFields with NULL parent_fields. 771 bool CompareFieldValue(const Message& message1, const Message& message2, 772 const FieldDescriptor* field, int index1, int index2); 773 774 // Compares the specified field on the two messages, returning 775 // true if they are the same, false otherwise. For repeated fields, 776 // this method only compares the value in the specified index. This method 777 // uses Compare functions to recurse into submessages. 778 // The parent_fields vector is used in calls to a Reporter instance calls. 779 // It can be NULL, in which case the MessageDifferencer will create new 780 // list of parent messages if it needs to recursively compare the given field. 781 // To avoid confusing users you should not set it to NULL unless you modified 782 // Reporter to handle the change of parent_fields correctly. 783 bool CompareFieldValueUsingParentFields( 784 const Message& message1, const Message& message2, 785 const FieldDescriptor* field, int index1, int index2, 786 std::vector<SpecificField>* parent_fields); 787 788 // Compares the specified field on the two messages, returning comparison 789 // result, as returned by appropriate FieldComparator. 790 FieldComparator::ComparisonResult GetFieldComparisonResult( 791 const Message& message1, const Message& message2, 792 const FieldDescriptor* field, int index1, int index2, 793 const FieldContext* field_context); 794 795 // Check if the two elements in the repeated field are match to each other. 796 // if the key_comprator is NULL, this function returns true when the two 797 // elements are equal. 798 bool IsMatch(const FieldDescriptor* repeated_field, 799 const MapKeyComparator* key_comparator, const Message* message1, 800 const Message* message2, 801 const std::vector<SpecificField>& parent_fields, 802 Reporter* reporter, int index1, int index2); 803 804 // Returns true when this repeated field has been configured to be treated 805 // as a Set / SmartSet / SmartList. 806 bool IsTreatedAsSet(const FieldDescriptor* field); 807 bool IsTreatedAsSmartSet(const FieldDescriptor* field); 808 809 bool IsTreatedAsSmartList(const FieldDescriptor* field); 810 // When treating as SMART_LIST, it uses MatchIndicesPostProcessorForSmartList 811 // by default to find the longest matching sequence from the first matching 812 // element. The callback takes two vectors showing the matching indices from 813 // the other vector, where -1 means an unmatch. 814 void SetMatchIndicesForSmartListCallback( 815 std::function<void(std::vector<int>*, std::vector<int>*)> callback); 816 817 // Returns true when this repeated field is to be compared as a subset, ie. 818 // has been configured to be treated as a set or map and scope is set to 819 // PARTIAL. 820 bool IsTreatedAsSubset(const FieldDescriptor* field); 821 822 // Returns true if this field is to be ignored when this 823 // MessageDifferencer compares messages. 824 bool IsIgnored(const Message& message1, const Message& message2, 825 const FieldDescriptor* field, 826 const std::vector<SpecificField>& parent_fields); 827 828 // Returns true if this unknown field is to be ignored when this 829 // MessageDifferencer compares messages. 830 bool IsUnknownFieldIgnored(const Message& message1, const Message& message2, 831 const SpecificField& field, 832 const std::vector<SpecificField>& parent_fields); 833 834 // Returns MapKeyComparator* when this field has been configured to be treated 835 // as a map or its is_map() return true. If not, returns NULL. 836 const MapKeyComparator* GetMapKeyComparator( 837 const FieldDescriptor* field) const; 838 839 // Attempts to match indices of a repeated field, so that the contained values 840 // match. Clears output vectors and sets their values to indices of paired 841 // messages, ie. if message1[0] matches message2[1], then match_list1[0] == 1 842 // and match_list2[1] == 0. The unmatched indices are indicated by -1. 843 // Assumes the repeated field is not treated as a simple list. 844 // This method returns false if the match failed. However, it doesn't mean 845 // that the comparison succeeds when this method returns true (you need to 846 // double-check in this case). 847 bool MatchRepeatedFieldIndices( 848 const Message& message1, const Message& message2, 849 const FieldDescriptor* repeated_field, 850 const MapKeyComparator* key_comparator, 851 const std::vector<SpecificField>& parent_fields, 852 std::vector<int>* match_list1, std::vector<int>* match_list2); 853 854 // If "any" is of type google.protobuf.Any, extract its payload using 855 // DynamicMessageFactory and store in "data". 856 bool UnpackAny(const Message& any, std::unique_ptr<Message>* data); 857 858 // Checks if index is equal to new_index in all the specific fields. 859 static bool CheckPathChanged(const std::vector<SpecificField>& parent_fields); 860 861 // CHECKs that the given repeated field can be compared according to 862 // new_comparison. 863 void CheckRepeatedFieldComparisons( 864 const FieldDescriptor* field, 865 const RepeatedFieldComparison& new_comparison); 866 867 // Defines a map between field descriptors and their MapKeyComparators. 868 // Used for repeated fields when they are configured as TreatAsMap. 869 typedef std::map<const FieldDescriptor*, const MapKeyComparator*> 870 FieldKeyComparatorMap; 871 872 // Defines a set to store field descriptors. Used for repeated fields when 873 // they are configured as TreatAsSet. 874 typedef std::set<const FieldDescriptor*> FieldSet; 875 typedef std::map<const FieldDescriptor*, RepeatedFieldComparison> FieldMap; 876 877 Reporter* reporter_; 878 DefaultFieldComparator default_field_comparator_; 879 FieldComparator* field_comparator_; 880 MessageFieldComparison message_field_comparison_; 881 Scope scope_; 882 RepeatedFieldComparison repeated_field_comparison_; 883 884 FieldMap repeated_field_comparisons_; 885 // Keeps track of MapKeyComparators that are created within 886 // MessageDifferencer. These MapKeyComparators should be deleted 887 // before MessageDifferencer is destroyed. 888 // When TreatAsMap or TreatAsMapWithMultipleFieldsAsKey is called, we don't 889 // store the supplied FieldDescriptors directly. Instead, a new 890 // MapKeyComparator is created for comparison purpose. 891 std::vector<MapKeyComparator*> owned_key_comparators_; 892 FieldKeyComparatorMap map_field_key_comparator_; 893 MapEntryKeyComparator map_entry_key_comparator_; 894 std::vector<IgnoreCriteria*> ignore_criteria_; 895 // Reused multiple times in RetrieveFields to avoid extra allocations 896 std::vector<const FieldDescriptor*> tmp_message_fields_; 897 898 FieldSet ignored_fields_; 899 900 bool report_matches_; 901 bool report_moves_; 902 bool report_ignores_; 903 904 std::string* output_string_; 905 906 // Callback to post-process the matched indices to support SMART_LIST. 907 std::function<void(std::vector<int>*, std::vector<int>*)> 908 match_indices_for_smart_list_callback_; 909 910 std::unique_ptr<DynamicMessageFactory> dynamic_message_factory_; 911 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageDifferencer); 912 }; 913 914 // This class provides extra information to the FieldComparator::Compare 915 // function. 916 class PROTOBUF_EXPORT FieldContext { 917 public: FieldContext(std::vector<MessageDifferencer::SpecificField> * parent_fields)918 explicit FieldContext( 919 std::vector<MessageDifferencer::SpecificField>* parent_fields) 920 : parent_fields_(parent_fields) {} 921 parent_fields()922 std::vector<MessageDifferencer::SpecificField>* parent_fields() const { 923 return parent_fields_; 924 } 925 926 private: 927 std::vector<MessageDifferencer::SpecificField>* parent_fields_; 928 }; 929 930 } // namespace util 931 } // namespace protobuf 932 } // namespace google 933 934 #include <google/protobuf/port_undef.inc> 935 936 #endif // GOOGLE_PROTOBUF_UTIL_MESSAGE_DIFFERENCER_H__ 937