1 // Copyright 2006 Google Inc. 2 // Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #ifndef OPEN_VCDIFF_VCDIFFENGINE_H_ 17 #define OPEN_VCDIFF_VCDIFFENGINE_H_ 18 19 #include <config.h> 20 #include <stddef.h> // size_t 21 #include <stdint.h> // uint32_t 22 23 namespace open_vcdiff { 24 25 class BlockHash; 26 class OutputStringInterface; 27 class CodeTableWriterInterface; 28 29 // The VCDiffEngine class is used to find the optimal encoding (in terms of COPY 30 // and ADD instructions) for a given dictionary and target window. To write the 31 // instructions for this encoding, it calls the Copy() and Add() methods of the 32 // code table writer object which is passed as an argument to Encode(). 33 class VCDiffEngine { 34 public: 35 // The minimum size of a string match that is worth putting into a COPY 36 // instruction. Since this value is more than twice the block size, the 37 // encoder will always discover a match of this size, no matter whether it is 38 // aligned on block boundaries in the dictionary text. 39 static const size_t kMinimumMatchSize = 32; 40 41 VCDiffEngine(const char* dictionary, size_t dictionary_size); 42 43 ~VCDiffEngine(); 44 45 // Initializes the object before use. 46 // This method must be called after constructing a VCDiffEngine object, 47 // and before any other method may be called. It should not be called 48 // twice on the same object. 49 // Returns true if initialization succeeded, or false if an error occurred, 50 // in which case no other method except the destructor may then be used 51 // on the object. 52 // The Init() method is the only one allowed to treat hashed_dictionary_ 53 // as non-const. 54 bool Init(); 55 dictionary_size()56 size_t dictionary_size() const { return dictionary_size_; } 57 58 // Main worker function. Finds the best matches between the dictionary 59 // (source) and target data, and uses the coder to write a 60 // delta file window into *diff. 61 // Because it is a const function, many threads 62 // can call Encode() at once for the same VCDiffEngine object. 63 // All thread-specific data will be stored in the coder and diff arguments. 64 // The coder object must have been fully initialized (by calling its Init() 65 // method, if any) before calling this function. 66 // 67 // look_for_target_matches determines whether to look for matches 68 // within the previously encoded target data, or just within the source 69 // (dictionary) data. Please see vcencoder.h for a full explanation 70 // of this parameter. 71 void Encode(const char* target_data, 72 size_t target_size, 73 bool look_for_target_matches, 74 OutputStringInterface* diff, 75 CodeTableWriterInterface* coder) const; 76 77 private: ShouldGenerateCopyInstructionForMatchOfSize(size_t size)78 static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) { 79 return size >= kMinimumMatchSize; 80 } 81 82 // The following two functions use templates to produce two different 83 // versions of the code depending on the value of the option 84 // look_for_target_matches. This approach saves a test-and-branch instruction 85 // within the inner loop of EncodeCopyForBestMatch. 86 template<bool look_for_target_matches> 87 void EncodeInternal(const char* target_data, 88 size_t target_size, 89 OutputStringInterface* diff, 90 CodeTableWriterInterface* coder) const; 91 92 // If look_for_target_matches is true, then target_hash must point to a valid 93 // BlockHash object, and cannot be NULL. If look_for_target_matches is 94 // false, then the value of target_hash is ignored. 95 template<bool look_for_target_matches> 96 size_t EncodeCopyForBestMatch(uint32_t hash_value, 97 const char* target_candidate_start, 98 const char* unencoded_target_start, 99 size_t unencoded_target_size, 100 const BlockHash* target_hash, 101 CodeTableWriterInterface* coder) const; 102 103 void AddUnmatchedRemainder(const char* unencoded_target_start, 104 size_t unencoded_target_size, 105 CodeTableWriterInterface* coder) const; 106 107 void FinishEncoding(size_t target_size, 108 OutputStringInterface* diff, 109 CodeTableWriterInterface* coder) const; 110 111 const char* dictionary_; // A copy of the dictionary contents 112 113 const size_t dictionary_size_; 114 115 // A hash that contains one element for every kBlockSize bytes of dictionary_. 116 // This can be reused to encode many different target strings using the 117 // same dictionary, without the need to compute the hash values each time. 118 const BlockHash* hashed_dictionary_; 119 120 // Making these private avoids implicit copy constructor & assignment operator 121 VCDiffEngine(const VCDiffEngine&); 122 void operator=(const VCDiffEngine&); 123 }; 124 125 } // namespace open_vcdiff 126 127 #endif // OPEN_VCDIFF_VCDIFFENGINE_H_ 128