• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Classes to implement an Encoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29 
30 #include <config.h>
31 #include "google/vcencoder.h"
32 #include <vector>
33 #include "checksum.h"
34 #include "encodetable.h"
35 #include "logging.h"
36 #include "google/output_string.h"
37 #include "vcdiffengine.h"
38 
39 namespace open_vcdiff {
40 
HashedDictionary(const char * dictionary_contents,size_t dictionary_size)41 HashedDictionary::HashedDictionary(const char* dictionary_contents,
42                                    size_t dictionary_size)
43     : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
44 
~HashedDictionary()45 HashedDictionary::~HashedDictionary() { delete engine_; }
46 
Init()47 bool HashedDictionary::Init() {
48   return const_cast<VCDiffEngine*>(engine_)->Init();
49 }
50 
51 class VCDiffStreamingEncoderImpl {
52  public:
53   VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
54                              VCDiffFormatExtensionFlags format_extensions,
55                              bool look_for_target_matches);
56 
57   // These functions are identical to their counterparts
58   // in VCDiffStreamingEncoder.
59   bool StartEncoding(OutputStringInterface* out);
60 
61   bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
62 
63   bool FinishEncoding(OutputStringInterface* out);
64 
match_counts() const65   const std::vector<int>& match_counts() const {
66     return coder_.match_counts();
67   }
68 
69  private:
70   // Write the header (as defined in section 4.1 of the RFC) to *output.
71   // This includes information that can be gathered
72   // before the first chunk of input is available.
73   void WriteHeader(OutputStringInterface* output) const;
74 
75   const VCDiffEngine* engine_;
76 
77   // This implementation of the encoder uses the default
78   // code table.  A VCDiffCodeTableWriter could also be constructed
79   // using a custom code table.
80   VCDiffCodeTableWriter coder_;
81 
82   const VCDiffFormatExtensionFlags format_extensions_;
83 
84   // Determines whether to look for matches within the previously encoded
85   // target data, or just within the source (dictionary) data.  Please see
86   // vcencoder.h for a full explanation of this parameter.
87   const bool look_for_target_matches_;
88 
89   // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
90   // and FinishEncoding() are called in the correct order.  It will be true
91   // if StartEncoding() has been called, followed by zero or more calls to
92   // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
93   // be false initially, and also after FinishEncoding() has been called.
94   bool encode_chunk_allowed_;
95 
96   // Making these private avoids implicit copy constructor & assignment operator
97   VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
98   void operator=(const VCDiffStreamingEncoderImpl&);
99 };
100 
VCDiffStreamingEncoderImpl(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)101 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
102     const HashedDictionary* dictionary,
103     VCDiffFormatExtensionFlags format_extensions,
104     bool look_for_target_matches)
105     : engine_(dictionary->engine()),
106       coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
107       format_extensions_(format_extensions),
108       look_for_target_matches_(look_for_target_matches),
109       encode_chunk_allowed_(false) { }
110 
WriteHeader(OutputStringInterface * output) const111 inline void VCDiffStreamingEncoderImpl::WriteHeader(
112     OutputStringInterface* output) const {
113   DeltaFileHeader header_data = {
114     0xD6,  // Header1: "V" | 0x80
115     0xC3,  // Header2: "C" | 0x80
116     0xC4,  // Header3: "D" | 0x80
117     0x00,  // Header4: Draft standard format
118     0x00 };  // Hdr_Indicator:
119              // No compression, no custom code table
120   if (format_extensions_ != VCD_STANDARD_FORMAT) {
121     header_data.header4 = 'S';  // Header4: VCDIFF/SDCH, extensions used
122   }
123   output->append(reinterpret_cast<const char*>(&header_data),
124                  sizeof(header_data));
125   // If custom cache table sizes or a custom code table were used
126   // for encoding, here is where they would be appended to *output.
127   // This implementation of the encoder does not use those features,
128   // although the decoder can understand and interpret them.
129 }
130 
StartEncoding(OutputStringInterface * out)131 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
132     OutputStringInterface* out) {
133   if (!coder_.Init(engine_->dictionary_size())) {
134     LOG(DFATAL) << "Internal error: "
135                    "Initialization of code table writer failed" << LOG_ENDL;
136     return false;
137   }
138   WriteHeader(out);
139   encode_chunk_allowed_ = true;
140   return true;
141 }
142 
EncodeChunk(const char * data,size_t len,OutputStringInterface * out)143 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
144     const char* data,
145     size_t len,
146     OutputStringInterface* out) {
147   if (!encode_chunk_allowed_) {
148     LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
149     return false;
150   }
151   if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
152     coder_.AddChecksum(ComputeAdler32(data, len));
153   }
154   engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
155   return true;
156 }
157 
FinishEncoding(OutputStringInterface *)158 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
159     OutputStringInterface* /*out*/) {
160   if (!encode_chunk_allowed_) {
161     LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
162     return false;
163   }
164   encode_chunk_allowed_ = false;
165   // There should not be any need to output more data
166   // since EncodeChunk() encodes a complete target window
167   // and there is no end-of-delta-file marker.
168   return true;
169 }
170 
VCDiffStreamingEncoder(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)171 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
172     const HashedDictionary* dictionary,
173     VCDiffFormatExtensionFlags format_extensions,
174     bool look_for_target_matches)
175     : impl_(new VCDiffStreamingEncoderImpl(dictionary,
176                                            format_extensions,
177                                            look_for_target_matches)) { }
178 
~VCDiffStreamingEncoder()179 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
180 
StartEncodingToInterface(OutputStringInterface * out)181 bool VCDiffStreamingEncoder::StartEncodingToInterface(
182     OutputStringInterface* out) {
183   return impl_->StartEncoding(out);
184 }
185 
EncodeChunkToInterface(const char * data,size_t len,OutputStringInterface * out)186 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
187     const char* data,
188     size_t len,
189     OutputStringInterface* out) {
190   return impl_->EncodeChunk(data, len, out);
191 }
192 
FinishEncodingToInterface(OutputStringInterface * out)193 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
194     OutputStringInterface* out) {
195   return impl_->FinishEncoding(out);
196 }
197 
GetMatchCounts(std::vector<int> * match_counts) const198 void VCDiffStreamingEncoder::GetMatchCounts(
199     std::vector<int>* match_counts) const {
200   if (!match_counts) {
201     LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
202     return;
203   }
204   *match_counts = impl_->match_counts();
205 }
206 
EncodeToInterface(const char * target_data,size_t target_len,OutputStringInterface * out)207 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
208                                       size_t target_len,
209                                       OutputStringInterface* out) {
210   out->clear();
211   if (!encoder_) {
212     if (!dictionary_.Init()) {
213       LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
214       return false;
215     }
216     encoder_ = new VCDiffStreamingEncoder(&dictionary_,
217                                           flags_,
218                                           look_for_target_matches_);
219   }
220   if (!encoder_->StartEncodingToInterface(out)) {
221     return false;
222   }
223   if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
224     return false;
225   }
226   return encoder_->FinishEncodingToInterface(out);
227 }
228 
229 }  // namespace open_vcdiff
230