• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2007 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Classes to implement an Encoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29 
30 #include <config.h>
31 #include <memory>  // auto_ptr
32 #include "checksum.h"
33 #include "encodetable.h"
34 #include "google/output_string.h"
35 #include "google/vcencoder.h"
36 #include "jsonwriter.h"
37 #include "logging.h"
38 #include "vcdiffengine.h"
39 
40 namespace open_vcdiff {
41 
HashedDictionary(const char * dictionary_contents,size_t dictionary_size)42 HashedDictionary::HashedDictionary(const char* dictionary_contents,
43                                    size_t dictionary_size)
44     : engine_(new VCDiffEngine(dictionary_contents, dictionary_size)) { }
45 
~HashedDictionary()46 HashedDictionary::~HashedDictionary() { delete engine_; }
47 
Init()48 bool HashedDictionary::Init() {
49   return const_cast<VCDiffEngine*>(engine_)->Init();
50 }
51 
52 class VCDiffStreamingEncoderImpl {
53  public:
54   VCDiffStreamingEncoderImpl(const HashedDictionary* dictionary,
55                              VCDiffFormatExtensionFlags format_extensions,
56                              bool look_for_target_matches);
57 
58   // These functions are identical to their counterparts
59   // in VCDiffStreamingEncoder.
60   bool StartEncoding(OutputStringInterface* out);
61 
62   bool EncodeChunk(const char* data, size_t len, OutputStringInterface* out);
63 
64   bool FinishEncoding(OutputStringInterface* out);
65 
66  private:
67   const VCDiffEngine* engine_;
68 
69   std::auto_ptr<CodeTableWriterInterface> coder_;
70 
71   const VCDiffFormatExtensionFlags format_extensions_;
72 
73   // Determines whether to look for matches within the previously encoded
74   // target data, or just within the source (dictionary) data.  Please see
75   // vcencoder.h for a full explanation of this parameter.
76   const bool look_for_target_matches_;
77 
78   // This state variable is used to ensure that StartEncoding(), EncodeChunk(),
79   // and FinishEncoding() are called in the correct order.  It will be true
80   // if StartEncoding() has been called, followed by zero or more calls to
81   // EncodeChunk(), but FinishEncoding() has not yet been called.  It will
82   // be false initially, and also after FinishEncoding() has been called.
83   bool encode_chunk_allowed_;
84 
85   // Making these private avoids implicit copy constructor & assignment operator
86   VCDiffStreamingEncoderImpl(const VCDiffStreamingEncoderImpl&);  // NOLINT
87   void operator=(const VCDiffStreamingEncoderImpl&);
88 };
89 
VCDiffStreamingEncoderImpl(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)90 inline VCDiffStreamingEncoderImpl::VCDiffStreamingEncoderImpl(
91     const HashedDictionary* dictionary,
92     VCDiffFormatExtensionFlags format_extensions,
93     bool look_for_target_matches)
94     : engine_(dictionary->engine()),
95       format_extensions_(format_extensions),
96       look_for_target_matches_(look_for_target_matches),
97       encode_chunk_allowed_(false) {
98   if (format_extensions & VCD_FORMAT_JSON) {
99     coder_.reset(new JSONCodeTableWriter());
100   } else {
101     // This implementation of the encoder uses the default
102     // code table.  A VCDiffCodeTableWriter could also be constructed
103     // using a custom code table.
104     coder_.reset(new VCDiffCodeTableWriter(
105         (format_extensions & VCD_FORMAT_INTERLEAVED) != 0));
106   }
107 }
108 
StartEncoding(OutputStringInterface * out)109 inline bool VCDiffStreamingEncoderImpl::StartEncoding(
110     OutputStringInterface* out) {
111   if (!coder_->Init(engine_->dictionary_size())) {
112     VCD_DFATAL << "Internal error: "
113                   "Initialization of code table writer failed" << VCD_ENDL;
114     return false;
115   }
116   coder_->WriteHeader(out, format_extensions_);
117   encode_chunk_allowed_ = true;
118   return true;
119 }
120 
EncodeChunk(const char * data,size_t len,OutputStringInterface * out)121 inline bool VCDiffStreamingEncoderImpl::EncodeChunk(
122     const char* data,
123     size_t len,
124     OutputStringInterface* out) {
125   if (!encode_chunk_allowed_) {
126     VCD_ERROR << "EncodeChunk called before StartEncoding" << VCD_ENDL;
127     return false;
128   }
129   if ((format_extensions_ & VCD_FORMAT_CHECKSUM) != 0) {
130     coder_->AddChecksum(ComputeAdler32(data, len));
131   }
132   engine_->Encode(data, len, look_for_target_matches_, out, coder_.get());
133   return true;
134 }
135 
FinishEncoding(OutputStringInterface * out)136 inline bool VCDiffStreamingEncoderImpl::FinishEncoding(
137     OutputStringInterface* out) {
138   if (!encode_chunk_allowed_) {
139     VCD_ERROR << "FinishEncoding called before StartEncoding" << VCD_ENDL;
140     return false;
141   }
142   encode_chunk_allowed_ = false;
143   coder_->FinishEncoding(out);
144   return true;
145 }
146 
VCDiffStreamingEncoder(const HashedDictionary * dictionary,VCDiffFormatExtensionFlags format_extensions,bool look_for_target_matches)147 VCDiffStreamingEncoder::VCDiffStreamingEncoder(
148     const HashedDictionary* dictionary,
149     VCDiffFormatExtensionFlags format_extensions,
150     bool look_for_target_matches)
151     : impl_(new VCDiffStreamingEncoderImpl(dictionary,
152                                            format_extensions,
153                                            look_for_target_matches)) { }
154 
~VCDiffStreamingEncoder()155 VCDiffStreamingEncoder::~VCDiffStreamingEncoder() { delete impl_; }
156 
StartEncodingToInterface(OutputStringInterface * out)157 bool VCDiffStreamingEncoder::StartEncodingToInterface(
158     OutputStringInterface* out) {
159   return impl_->StartEncoding(out);
160 }
161 
EncodeChunkToInterface(const char * data,size_t len,OutputStringInterface * out)162 bool VCDiffStreamingEncoder::EncodeChunkToInterface(
163     const char* data,
164     size_t len,
165     OutputStringInterface* out) {
166   return impl_->EncodeChunk(data, len, out);
167 }
168 
FinishEncodingToInterface(OutputStringInterface * out)169 bool VCDiffStreamingEncoder::FinishEncodingToInterface(
170     OutputStringInterface* out) {
171   return impl_->FinishEncoding(out);
172 }
173 
EncodeToInterface(const char * target_data,size_t target_len,OutputStringInterface * out)174 bool VCDiffEncoder::EncodeToInterface(const char* target_data,
175                                       size_t target_len,
176                                       OutputStringInterface* out) {
177   out->clear();
178   if (!encoder_) {
179     if (!dictionary_.Init()) {
180       VCD_ERROR << "Error initializing HashedDictionary" << VCD_ENDL;
181       return false;
182     }
183     encoder_ = new VCDiffStreamingEncoder(&dictionary_,
184                                           flags_,
185                                           look_for_target_matches_);
186   }
187   if (!encoder_->StartEncodingToInterface(out)) {
188     return false;
189   }
190   if (!encoder_->EncodeChunkToInterface(target_data, target_len, out)) {
191     return false;
192   }
193   return encoder_->FinishEncodingToInterface(out);
194 }
195 
196 }  // namespace open_vcdiff
197