1 // Copyright 2007 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Classes to implement an Encoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29
30 #include <config.h>
31 #include "google/vcencoder.h"
32 #include <vector>
33 #include "checksum.h"
34 #include "encodetable.h"
35 #include "logging.h"
36 #include "google/output_string.h"
37 #include "vcdiffengine.h"
38
39 namespace open_vcdiff {
40
HashedDictionary(const char * dictionary_contents,size_t dictionary_size)41 HashedDictionary::HashedDictionary(const char* dictionary_contents,
42 size_t dictionary_size)
43 : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
44
~HashedDictionary()45 HashedDictionary::~HashedDictionary() { delete engine_; }
46
Init()47 bool HashedDictionary::Init() {
48 return const_cast<VCDiffEngine*>(engine_)->Init();
49 }
50
51 class VCDiffStreamingEncoderImpl {
52 public:
53 VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
54 VCDiffFormatExtensionFlags format_extensions,
55 bool look_for_target_matches);
56
57 // These functions are identical to their counterparts
58 // in VCDiffStreamingEncoder.
59 bool StartEncoding(OutputStringInterface* out);
60
61 bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
62
63 bool FinishEncoding(OutputStringInterface* out);
64
match_counts() const65 const std::vector<int>& match_counts() const {
66 return coder_.match_counts();
67 }
68
69 private:
70 // Write the header (as defined in section 4.1 of the RFC) to *output.
71 // This includes information that can be gathered
72 // before the first chunk of input is available.
73 void WriteHeader(OutputStringInterface* output) const;
74
75 const VCDiffEngine* engine_;
76
77 // This implementation of the encoder uses the default
78 // code table. A VCDiffCodeTableWriter could also be constructed
79 // using a custom code table.
80 VCDiffCodeTableWriter coder_;
81
82 const VCDiffFormatExtensionFlags format_extensions_;
83
84 // Determines whether to look for matches within the previously encoded
85 // target data, or just within the source (dictionary) data. Please see
86 // vcencoder.h for a full explanation of this parameter.
87 const bool look_for_target_matches_;
88
89 // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
90 // and FinishEncoding() are called in the correct order. It will be true
91 // if StartEncoding() has been called, followed by zero or more calls to
92 // EncodeChunk(), but FinishEncoding() has not yet been called. It will
93 // be false initially, and also after FinishEncoding() has been called.
94 bool encode_chunk_allowed_;
95
96 // Making these private avoids implicit copy constructor & assignment operator
97 VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT
98 void operator=(const VCDiffStreamingEncoderImpl&);
99 };
100
VCDiffStreamingEncoderImpl(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)101 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
102 const HashedDictionary* dictionary,
103 VCDiffFormatExtensionFlags format_extensions,
104 bool look_for_target_matches)
105 : engine_(dictionary->engine()),
106 coder_((format_extensions & VCD_FORMAT_INTERLEAVED) != 0),
107 format_extensions_(format_extensions),
108 look_for_target_matches_(look_for_target_matches),
109 encode_chunk_allowed_(false) { }
110
WriteHeader(OutputStringInterface * output) const111 inline void VCDiffStreamingEncoderImpl::WriteHeader(
112 OutputStringInterface* output) const {
113 DeltaFileHeader header_data = {
114 0xD6, // Header1: "V" | 0x80
115 0xC3, // Header2: "C" | 0x80
116 0xC4, // Header3: "D" | 0x80
117 0x00, // Header4: Draft standard format
118 0x00 }; // Hdr_Indicator:
119 // No compression, no custom code table
120 if (format_extensions_ != VCD_STANDARD_FORMAT) {
121 header_data.header4 = 'S'; // Header4: VCDIFF/SDCH, extensions used
122 }
123 output->append(reinterpret_cast<const char*>(&header_data),
124 sizeof(header_data));
125 // If custom cache table sizes or a custom code table were used
126 // for encoding, here is where they would be appended to *output.
127 // This implementation of the encoder does not use those features,
128 // although the decoder can understand and interpret them.
129 }
130
StartEncoding(OutputStringInterface * out)131 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
132 OutputStringInterface* out) {
133 if (!coder_.Init(engine_->dictionary_size())) {
134 LOG(DFATAL) << "Internal error: "
135 "Initialization of code table writer failed" << LOG_ENDL;
136 return false;
137 }
138 WriteHeader(out);
139 encode_chunk_allowed_ = true;
140 return true;
141 }
142
EncodeChunk(const char * data,size_t len,OutputStringInterface * out)143 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
144 const char* data,
145 size_t len,
146 OutputStringInterface* out) {
147 if (!encode_chunk_allowed_) {
148 LOG(ERROR) << "EncodeChunk called before StartEncoding" << LOG_ENDL;
149 return false;
150 }
151 if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
152 coder_.AddChecksum(ComputeAdler32(data, len));
153 }
154 engine_->Encode(data, len, look_for_target_matches_, out, &coder_);
155 return true;
156 }
157
FinishEncoding(OutputStringInterface *)158 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
159 OutputStringInterface* /*out*/) {
160 if (!encode_chunk_allowed_) {
161 LOG(ERROR) << "FinishEncoding called before StartEncoding" << LOG_ENDL;
162 return false;
163 }
164 encode_chunk_allowed_ = false;
165 // There should not be any need to output more data
166 // since EncodeChunk() encodes a complete target window
167 // and there is no end-of-delta-file marker.
168 return true;
169 }
170
VCDiffStreamingEncoder(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)171 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
172 const HashedDictionary* dictionary,
173 VCDiffFormatExtensionFlags format_extensions,
174 bool look_for_target_matches)
175 : impl_(new VCDiffStreamingEncoderImpl(dictionary,
176 format_extensions,
177 look_for_target_matches)) { }
178
~VCDiffStreamingEncoder()179 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
180
StartEncodingToInterface(OutputStringInterface * out)181 bool VCDiffStreamingEncoder::StartEncodingToInterface(
182 OutputStringInterface* out) {
183 return impl_->StartEncoding(out);
184 }
185
EncodeChunkToInterface(const char * data,size_t len,OutputStringInterface * out)186 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
187 const char* data,
188 size_t len,
189 OutputStringInterface* out) {
190 return impl_->EncodeChunk(data, len, out);
191 }
192
FinishEncodingToInterface(OutputStringInterface * out)193 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
194 OutputStringInterface* out) {
195 return impl_->FinishEncoding(out);
196 }
197
GetMatchCounts(std::vector<int> * match_counts) const198 void VCDiffStreamingEncoder::GetMatchCounts(
199 std::vector<int>* match_counts) const {
200 if (!match_counts) {
201 LOG(DFATAL) << "GetMatchCounts() called with NULL argument" << LOG_ENDL;
202 return;
203 }
204 *match_counts = impl_->match_counts();
205 }
206
EncodeToInterface(const char * target_data,size_t target_len,OutputStringInterface * out)207 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
208 size_t target_len,
209 OutputStringInterface* out) {
210 out->clear();
211 if (!encoder_) {
212 if (!dictionary_.Init()) {
213 LOG(ERROR) << "Error initializing HashedDictionary" << LOG_ENDL;
214 return false;
215 }
216 encoder_ = new VCDiffStreamingEncoder(&dictionary_,
217 flags_,
218 look_for_target_matches_);
219 }
220 if (!encoder_->StartEncodingToInterface(out)) {
221 return false;
222 }
223 if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
224 return false;
225 }
226 return encoder_->FinishEncodingToInterface(out);
227 }
228
229 } // namespace open_vcdiff
230