• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 Google Inc.
2 // Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef OPEN_VCDIFF_VCDIFFENGINE_H_
17 #define OPEN_VCDIFF_VCDIFFENGINE_H_
18 
19 #include <config.h>
20 #include <stddef.h>  // size_t
21 #include <stdint.h>  // uint32_t
22 
23 namespace open_vcdiff {
24 
25 class BlockHash;
26 class OutputStringInterface;
27 class CodeTableWriterInterface;
28 
29 // The VCDiffEngine class is used to find the optimal encoding (in terms of COPY
30 // and ADD instructions) for a given dictionary and target window.  To write the
31 // instructions for this encoding, it calls the Copy() and Add() methods of the
32 // code table writer object which is passed as an argument to Encode().
33 class VCDiffEngine {
34  public:
35   // The minimum size of a string match that is worth putting into a COPY
36   // instruction.  Since this value is more than twice the block size, the
37   // encoder will always discover a match of this size, no matter whether it is
38   // aligned on block boundaries in the dictionary text.
39   static const size_t kMinimumMatchSize = 32;
40 
41   VCDiffEngine(const char* dictionary, size_t dictionary_size);
42 
43   ~VCDiffEngine();
44 
45   // Initializes the object before use.
46   // This method must be called after constructing a VCDiffEngine object,
47   // and before any other method may be called.  It should not be called
48   // twice on the same object.
49   // Returns true if initialization succeeded, or false if an error occurred,
50   // in which case no other method except the destructor may then be used
51   // on the object.
52   // The Init() method is the only one allowed to treat hashed_dictionary_
53   // as non-const.
54   bool Init();
55 
dictionary_size()56   size_t dictionary_size() const { return dictionary_size_; }
57 
58   // Main worker function.  Finds the best matches between the dictionary
59   // (source) and target data, and uses the coder to write a
60   // delta file window into *diff.
61   // Because it is a const function, many threads
62   // can call Encode() at once for the same VCDiffEngine object.
63   // All thread-specific data will be stored in the coder and diff arguments.
64   // The coder object must have been fully initialized (by calling its Init()
65   // method, if any) before calling this function.
66   //
67   // look_for_target_matches determines whether to look for matches
68   // within the previously encoded target data, or just within the source
69   // (dictionary) data.  Please see vcencoder.h for a full explanation
70   // of this parameter.
71   void Encode(const char* target_data,
72               size_t target_size,
73               bool look_for_target_matches,
74               OutputStringInterface* diff,
75               CodeTableWriterInterface* coder) const;
76 
77  private:
ShouldGenerateCopyInstructionForMatchOfSize(size_t size)78   static bool ShouldGenerateCopyInstructionForMatchOfSize(size_t size) {
79     return size >= kMinimumMatchSize;
80   }
81 
82   // The following two functions use templates to produce two different
83   // versions of the code depending on the value of the option
84   // look_for_target_matches.  This approach saves a test-and-branch instruction
85   // within the inner loop of EncodeCopyForBestMatch.
86   template<bool look_for_target_matches>
87   void EncodeInternal(const char* target_data,
88                       size_t target_size,
89                       OutputStringInterface* diff,
90                       CodeTableWriterInterface* coder) const;
91 
92   // If look_for_target_matches is true, then target_hash must point to a valid
93   // BlockHash object, and cannot be NULL.  If look_for_target_matches is
94   // false, then the value of target_hash is ignored.
95   template<bool look_for_target_matches>
96   size_t EncodeCopyForBestMatch(uint32_t hash_value,
97                                 const char* target_candidate_start,
98                                 const char* unencoded_target_start,
99                                 size_t unencoded_target_size,
100                                 const BlockHash* target_hash,
101                                 CodeTableWriterInterface* coder) const;
102 
103   void AddUnmatchedRemainder(const char* unencoded_target_start,
104                              size_t unencoded_target_size,
105                              CodeTableWriterInterface* coder) const;
106 
107   void FinishEncoding(size_t target_size,
108                       OutputStringInterface* diff,
109                       CodeTableWriterInterface* coder) const;
110 
111   const char* dictionary_;  // A copy of the dictionary contents
112 
113   const size_t dictionary_size_;
114 
115   // A hash that contains one element for every kBlockSize bytes of dictionary_.
116   // This can be reused to encode many different target strings using the
117   // same dictionary, without the need to compute the hash values each time.
118   const BlockHash* hashed_dictionary_;
119 
120   // Making these private avoids implicit copy constructor & assignment operator
121   VCDiffEngine(const VCDiffEngine&);
122   void operator=(const VCDiffEngine&);
123 };
124 
125 }  // namespace open_vcdiff
126 
127 #endif  // OPEN_VCDIFF_VCDIFFENGINE_H_
128