1 // Copyright 2007 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Classes to implement an Encoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29
30 #include <config.h>
31 #include <memory> // auto_ptr
32 #include "checksum.h"
33 #include "encodetable.h"
34 #include "google/output_string.h"
35 #include "google/vcencoder.h"
36 #include "jsonwriter.h"
37 #include "logging.h"
38 #include "vcdiffengine.h"
39
40 namespace open_vcdiff {
41
HashedDictionary(const char * dictionary_contents,size_t dictionary_size)42 HashedDictionary::HashedDictionary(const char* dictionary_contents,
43 size_t dictionary_size)
44 : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
45
~HashedDictionary()46 HashedDictionary::~HashedDictionary() { delete engine_; }
47
Init()48 bool HashedDictionary::Init() {
49 return const_cast<VCDiffEngine*>(engine_)->Init();
50 }
51
52 class VCDiffStreamingEncoderImpl {
53 public:
54 VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
55 VCDiffFormatExtensionFlags format_extensions,
56 bool look_for_target_matches);
57
58 // These functions are identical to their counterparts
59 // in VCDiffStreamingEncoder.
60 bool StartEncoding(OutputStringInterface* out);
61
62 bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
63
64 bool FinishEncoding(OutputStringInterface* out);
65
66 private:
67 const VCDiffEngine* engine_;
68
69 std::auto_ptr<CodeTableWriterInterface> coder_;
70
71 const VCDiffFormatExtensionFlags format_extensions_;
72
73 // Determines whether to look for matches within the previously encoded
74 // target data, or just within the source (dictionary) data. Please see
75 // vcencoder.h for a full explanation of this parameter.
76 const bool look_for_target_matches_;
77
78 // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
79 // and FinishEncoding() are called in the correct order. It will be true
80 // if StartEncoding() has been called, followed by zero or more calls to
81 // EncodeChunk(), but FinishEncoding() has not yet been called. It will
82 // be false initially, and also after FinishEncoding() has been called.
83 bool encode_chunk_allowed_;
84
85 // Making these private avoids implicit copy constructor & assignment operator
86 VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&); // NOLINT
87 void operator=(const VCDiffStreamingEncoderImpl&);
88 };
89
VCDiffStreamingEncoderImpl(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)90 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
91 const HashedDictionary* dictionary,
92 VCDiffFormatExtensionFlags format_extensions,
93 bool look_for_target_matches)
94 : engine_(dictionary->engine()),
95 format_extensions_(format_extensions),
96 look_for_target_matches_(look_for_target_matches),
97 encode_chunk_allowed_(false) {
98 if (format_extensions & VCD_FORMAT_JSON) {
99 coder_.reset(new JSONCodeTableWriter());
100 } else {
101 // This implementation of the encoder uses the default
102 // code table. A VCDiffCodeTableWriter could also be constructed
103 // using a custom code table.
104 coder_.reset(new VCDiffCodeTableWriter(
105 (format_extensions & VCD_FORMAT_INTERLEAVED) != 0));
106 }
107 }
108
StartEncoding(OutputStringInterface * out)109 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
110 OutputStringInterface* out) {
111 if (!coder_->Init(engine_->dictionary_size())) {
112 VCD_DFATAL << "Internal error: "
113 "Initialization of code table writer failed" << VCD_ENDL;
114 return false;
115 }
116 coder_->WriteHeader(out, format_extensions_);
117 encode_chunk_allowed_ = true;
118 return true;
119 }
120
EncodeChunk(const char * data,size_t len,OutputStringInterface * out)121 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
122 const char* data,
123 size_t len,
124 OutputStringInterface* out) {
125 if (!encode_chunk_allowed_) {
126 VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
127 return false;
128 }
129 if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
130 coder_->AddChecksum(ComputeAdler32(data, len));
131 }
132 engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
133 return true;
134 }
135
FinishEncoding(OutputStringInterface * out)136 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
137 OutputStringInterface* out) {
138 if (!encode_chunk_allowed_) {
139 VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
140 return false;
141 }
142 encode_chunk_allowed_ = false;
143 coder_->FinishEncoding(out);
144 return true;
145 }
146
VCDiffStreamingEncoder(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)147 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
148 const HashedDictionary* dictionary,
149 VCDiffFormatExtensionFlags format_extensions,
150 bool look_for_target_matches)
151 : impl_(new VCDiffStreamingEncoderImpl(dictionary,
152 format_extensions,
153 look_for_target_matches)) { }
154
~VCDiffStreamingEncoder()155 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
156
StartEncodingToInterface(OutputStringInterface * out)157 bool VCDiffStreamingEncoder::StartEncodingToInterface(
158 OutputStringInterface* out) {
159 return impl_->StartEncoding(out);
160 }
161
EncodeChunkToInterface(const char * data,size_t len,OutputStringInterface * out)162 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
163 const char* data,
164 size_t len,
165 OutputStringInterface* out) {
166 return impl_->EncodeChunk(data, len, out);
167 }
168
FinishEncodingToInterface(OutputStringInterface * out)169 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
170 OutputStringInterface* out) {
171 return impl_->FinishEncoding(out);
172 }
173
EncodeToInterface(const char * target_data,size_t target_len,OutputStringInterface * out)174 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
175 size_t target_len,
176 OutputStringInterface* out) {
177 out->clear();
178 if (!encoder_) {
179 if (!dictionary_.Init()) {
180 VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
181 return false;
182 }
183 encoder_ = new VCDiffStreamingEncoder(&dictionary_,
184 flags_,
185 look_for_target_matches_);
186 }
187 if (!encoder_->StartEncodingToInterface(out)) {
188 return false;
189 }
190 if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
191 return false;
192 }
193 return encoder_->FinishEncodingToInterface(out);
194 }
195
196 } // namespace open_vcdiff
197