• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Implements a Decoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29 
30 #include <config.h>
31 #include "google/vcdecoder.h"
32 #include <stddef.h>  // size_t, ptrdiff_t
33 #include <stdint.h>  // int32_t
34 #include <string.h>  // memcpy, memset
35 #include <memory>  // auto_ptr
36 #include <string>
37 #include "addrcache.h"
38 #include "checksum.h"
39 #include "codetable.h"
40 #include "decodetable.h"
41 #include "headerparser.h"
42 #include "logging.h"
43 #include "google/output_string.h"
44 #include "varint_bigendian.h"
45 #include "vcdiff_defs.h"
46 
47 namespace open_vcdiff {
48 
49 // This class is used to parse delta file windows as described
50 // in RFC sections 4.2 and 4.3.  Its methods are not thread-safe.
51 //
52 // Here is the window format copied from the RFC:
53 //
54 // Window1
55 //     Win_Indicator                            - byte
56 //     [Source segment size]                    - integer
57 //     [Source segment position]                - integer
58 //     The delta encoding of the target window
59 //         Length of the delta encoding         - integer
60 //         The delta encoding
61 //             Size of the target window        - integer
62 //             Delta_Indicator                  - byte
63 //             Length of data for ADDs and RUNs - integer
64 //             Length of instructions and sizes - integer
65 //             Length of addresses for COPYs    - integer
66 //             Data section for ADDs and RUNs   - array of bytes
67 //             Instructions and sizes section   - array of bytes
68 //             Addresses section for COPYs      - array of bytes
69 // Window2
70 // ...
71 //
72 // Sample usage:
73 //
74 // VCDiffDeltaFileWindow delta_window_;
75 // delta_window_.Init(parent);
76 // ParseableChunk parseable_chunk(input_buffer,
77 //                                input_size,
78 //                                leftover_unencoded_bytes);
79 // while (!parseable_chunk.Empty()) {
80 //   switch (delta_window_.DecodeWindow(&parseable_chunk)) {
81 //     case RESULT_END_OF_DATA:
82 //       <Read more input and retry DecodeWindow later.>
83 //     case RESULT_ERROR:
84 //       <Handle error case.  An error log message has already been generated.>
85 //   }
86 // }
87 //
88 // DecodeWindow consumes only a single window, and needs to be placed within
89 // a loop if multiple windows are to be processed.
90 //
91 class VCDiffDeltaFileWindow {
92  public:
93   VCDiffDeltaFileWindow();
94   ~VCDiffDeltaFileWindow();
95 
96   // Init() should be called immediately after constructing the
97   // VCDiffDeltaFileWindow().  It must be called before DecodeWindow() can be
98   // invoked, or an error will occur.
99   void Init(VCDiffStreamingDecoderImpl* parent);
100 
101   // Resets the pointers to the data sections in the current window.
102   void Reset();
103 
UseCodeTable(const VCDiffCodeTableData & code_table_data,unsigned char max_mode)104   bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
105                     unsigned char max_mode) {
106     return reader_.UseCodeTable(code_table_data, max_mode);
107   }
108 
109   // Decodes a single delta window using the input data from *parseable_chunk.
110   // Appends the decoded target window to parent_->decoded_target().  Returns
111   // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if
112   // the end of input was reached before the entire window could be decoded and
113   // more input is expected (only possible if IsInterleaved() is true), or
114   // RESULT_ERROR if an error occurred during decoding.  In the RESULT_ERROR
115   // case, the value of parseable_chunk->pointer_ is undefined; otherwise,
116   // parseable_chunk->Advance() is called to point to the input data position
117   // just after the data that has been decoded.
118   //
119   VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk);
120 
FoundWindowHeader() const121   bool FoundWindowHeader() const {
122     return found_header_;
123   }
124 
MoreDataExpected() const125   bool MoreDataExpected() const {
126     // When parsing an interleaved-format delta file,
127     // every time DecodeBody() exits, interleaved_bytes_expected_
128     // will be decremented by the number of bytes parsed.  If it
129     // reaches zero, then there is no more data expected because
130     // the size of the interleaved section (given in the window
131     // header) has been reached.
132     return IsInterleaved() && (interleaved_bytes_expected_ > 0);
133   }
134 
target_window_start_pos() const135   size_t target_window_start_pos() const { return target_window_start_pos_; }
136 
set_target_window_start_pos(size_t new_start_pos)137   void set_target_window_start_pos(size_t new_start_pos) {
138     target_window_start_pos_ = new_start_pos;
139   }
140 
141   // Returns the number of bytes remaining to be decoded in the target window.
142   // If not in the process of decoding a window, returns 0.
143   size_t TargetBytesRemaining();
144 
145  private:
146   // Reads the header of the window section as described in RFC sections 4.2 and
147   // 4.3, up to and including the value "Length of addresses for COPYs".  If the
148   // entire header is found, this function sets up the DeltaWindowSections
149   // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
150   // that the decoder can begin decoding the opcodes in these sections.  Returns
151   // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
152   // available data was reached before the entire header could be read.  (The
153   // latter may be an error condition if there is no more data available.)
154   // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
155   // parsed header.
156   //
157   VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
158 
159   // After the window header has been parsed as far as the Delta_Indicator,
160   // this function is called to parse the following delta window header fields:
161   //
162   //     Length of data for ADDs and RUNs - integer (VarintBE format)
163   //     Length of instructions and sizes - integer (VarintBE format)
164   //     Length of addresses for COPYs    - integer (VarintBE format)
165   //
166   // If has_checksum_ is true, it also looks for the following element:
167   //
168   //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
169   //
170   // It sets up the DeltaWindowSections instructions_and_sizes_,
171   // data_for_add_and_run_, and addresses_for_copy_.  If the interleaved format
172   // is being used, all three sections will include the entire window body; if
173   // the standard format is used, three non-overlapping window sections will be
174   // defined.  Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
175   // if standard format is being used and there is not enough input data to read
176   // the entire window body.  Otherwise, returns RESULT_SUCCESS.
177   VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
178 
179   // Decodes the body of the window section as described in RFC sections 4.3,
180   // including the sections "Data section for ADDs and RUNs", "Instructions
181   // and sizes section", and "Addresses section for COPYs".  These sections
182   // must already have been set up by ReadWindowHeader().  Returns a
183   // non-negative value on success, or RESULT_END_OF_DATA if the end of input
184   // was reached before the entire window could be decoded (only possible if
185   // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
186   // decoding.  Appends as much of the decoded target window as possible to
187   // parent->decoded_target().
188   //
189   int DecodeBody(ParseableChunk* parseable_chunk);
190 
191   // Returns the number of bytes already decoded into the target window.
192   size_t TargetBytesDecoded();
193 
194   // Decodes a single ADD instruction, updating parent_->decoded_target_.
195   VCDiffResult DecodeAdd(size_t size);
196 
197   // Decodes a single RUN instruction, updating parent_->decoded_target_.
198   VCDiffResult DecodeRun(size_t size);
199 
200   // Decodes a single COPY instruction, updating parent_->decoded_target_.
201   VCDiffResult DecodeCopy(size_t size, unsigned char mode);
202 
203   // When using the interleaved format, this function is called both on parsing
204   // the header and on resuming after a RESULT_END_OF_DATA was returned from a
205   // previous call to DecodeBody().  It sets up all three section pointers to
206   // reference the same interleaved stream of instructions, sizes, addresses,
207   // and data.  These pointers must be reset every time that work resumes on a
208   // delta window,  because the input data string may have been changed or
209   // resized since DecodeBody() last returned.
UpdateInterleavedSectionPointers(const char * data_pos,const char * data_end)210   void UpdateInterleavedSectionPointers(const char* data_pos,
211                                         const char* data_end) {
212     const ptrdiff_t available_data = data_end - data_pos;
213     // Don't read past the end of currently-available data
214     if (available_data > interleaved_bytes_expected_) {
215       instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
216     } else {
217       instructions_and_sizes_.Init(data_pos, available_data);
218     }
219     data_for_add_and_run_.Init(&instructions_and_sizes_);
220     addresses_for_copy_.Init(&instructions_and_sizes_);
221   }
222 
223   // If true, the interleaved format described in AllowInterleaved() is used
224   // for the current delta file.  Only valid after ReadWindowHeader() has been
225   // called and returned a positive number (i.e., the whole header was parsed),
226   // but before the window has finished decoding.
227   //
IsInterleaved() const228   bool IsInterleaved() const {
229     // If the sections are interleaved, both addresses_for_copy_ and
230     // data_for_add_and_run_ should point at instructions_and_sizes_.
231     return !addresses_for_copy_.IsOwned();
232   }
233 
234   // Executes a single COPY or ADD instruction, appending data to
235   // parent_->decoded_target().
236   void CopyBytes(const char* data, size_t size);
237 
238   // Executes a single RUN instruction, appending data to
239   // parent_->decoded_target().
240   void RunByte(unsigned char byte, size_t size);
241 
242   // Advance *parseable_chunk to point to the current position in the
243   // instructions/sizes section.  If interleaved format is used, then
244   // decrement the number of expected bytes in the instructions/sizes section
245   // by the number of instruction/size bytes parsed.
246   void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
247 
248   // The parent object which was passed to Init().
249   VCDiffStreamingDecoderImpl* parent_;
250 
251   // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
252   // has been called and succeeded in parsing the delta window header, but the
253   // entire window has not yet been decoded.
254   bool found_header_;
255 
256   // Contents and length of the current source window.  source_segment_ptr_
257   // will be non-NULL if (a) the window section header for the current window
258   // has been read, but the window has not yet finished decoding; or
259   // (b) the window did not specify a source segment.
260   const char* source_segment_ptr_;
261   size_t source_segment_length_;
262 
263   // The delta encoding window sections as defined in RFC section 4.3.
264   // The pointer for each section will be incremented as data is consumed and
265   // decoded from that section.  If the interleaved format is used,
266   // data_for_add_and_run_ and addresses_for_copy_ will both point to
267   // instructions_and_sizes_; otherwise, they will be separate data sections.
268   //
269   DeltaWindowSection instructions_and_sizes_;
270   DeltaWindowSection data_for_add_and_run_;
271   DeltaWindowSection addresses_for_copy_;
272 
273   // The expected bytes left to decode in instructions_and_sizes_.  Only used
274   // for the interleaved format.
275   int interleaved_bytes_expected_;
276 
277   // The expected length of the target window once it has been decoded.
278   size_t target_window_length_;
279 
280   // The index in decoded_target at which the first byte of the current
281   // target window was/will be written.
282   size_t target_window_start_pos_;
283 
284   // If has_checksum_ is true, then expected_checksum_ contains an Adler32
285   // checksum of the target window data.  This is an extension included in the
286   // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
287   bool has_checksum_;
288   VCDChecksum expected_checksum_;
289 
290   VCDiffCodeTableReader reader_;
291 
292   // Making these private avoids implicit copy constructor & assignment operator
293   VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&);  // NOLINT
294   void operator=(const VCDiffDeltaFileWindow&);
295 };
296 
297 // *** Inline methods for VCDiffDeltaFileWindow
298 
VCDiffDeltaFileWindow()299 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
300   Reset();
301 }
302 
~VCDiffDeltaFileWindow()303 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
304 
Init(VCDiffStreamingDecoderImpl * parent)305 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
306   parent_ = parent;
307 }
308 
309 class VCDiffStreamingDecoderImpl {
310  public:
311   typedef std::string string;
312 
313   // The default maximum target file size (and target window size) if
314   // SetMaximumTargetFileSize() is not called.
315   static const size_t kDefaultMaximumTargetFileSize = 67108864U;  // 64 MB
316 
317   // The largest value that can be passed to SetMaximumTargetWindowSize().
318   // Using a larger value will result in an error.
319   static const size_t kTargetSizeLimit = 2147483647U;  // INT32_MAX
320 
321   // A constant that is the default value for planned_target_file_size_,
322   // indicating that the decoder does not have an expected length
323   // for the target data.
324   static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
325 
326   VCDiffStreamingDecoderImpl();
327   ~VCDiffStreamingDecoderImpl();
328 
329   // Resets all member variables to their initial states.
330   void Reset();
331 
332   // These functions are identical to their counterparts
333   // in VCDiffStreamingDecoder.
334   //
335   void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
336 
337   bool DecodeChunk(const char* data,
338                    size_t len,
339                    OutputStringInterface* output_string);
340 
341   bool FinishDecoding();
342 
343   // If true, the version of VCDIFF used in the current delta file allows
344   // for the interleaved format, in which instructions, addresses and data
345   // are all sent interleaved in the instructions section of each window
346   // rather than being sent in separate sections.  This is not part of
347   // the VCDIFF draft standard, so we've defined a special version code
348   // 'S' which implies that this feature is available.  Even if interleaving
349   // is supported, it is not mandatory; interleaved format will be implied
350   // if the address and data sections are both zero-length.
351   //
AllowInterleaved() const352   bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
353 
354   // If true, the version of VCDIFF used in the current delta file allows
355   // each delta window to contain an Adler32 checksum of the target window data.
356   // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
357   // this checksum will appear as a variable-length integer, just after the
358   // "length of addresses for COPYs" value and before the window data sections.
359   // It is possible for some windows in a delta file to use the checksum feature
360   // and for others not to use it (and leave the flag bit set to 0.)
361   // Just as with AllowInterleaved(), this extension is not part of the draft
362   // standard and is only available when the version code 'S' is specified.
363   //
AllowChecksum() const364   bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
365 
SetMaximumTargetFileSize(size_t new_maximum_target_file_size)366   bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
367     maximum_target_file_size_ = new_maximum_target_file_size;
368     return true;
369   }
370 
SetMaximumTargetWindowSize(size_t new_maximum_target_window_size)371   bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
372     if (new_maximum_target_window_size > kTargetSizeLimit) {
373       VCD_ERROR << "Specified maximum target window size "
374                 << new_maximum_target_window_size << " exceeds limit of "
375                 << kTargetSizeLimit << " bytes" << VCD_ENDL;
376       return false;
377     }
378     maximum_target_window_size_ = new_maximum_target_window_size;
379     return true;
380   }
381 
382   // See description of planned_target_file_size_, below.
HasPlannedTargetFileSize() const383   bool HasPlannedTargetFileSize() const {
384     return planned_target_file_size_ != kUnlimitedBytes;
385   }
386 
SetPlannedTargetFileSize(size_t planned_target_file_size)387   void SetPlannedTargetFileSize(size_t planned_target_file_size) {
388     planned_target_file_size_ = planned_target_file_size;
389   }
390 
AddToTotalTargetWindowSize(size_t window_size)391   void AddToTotalTargetWindowSize(size_t window_size) {
392     total_of_target_window_sizes_ += window_size;
393   }
394 
395   // Checks to see whether the decoded target data has reached its planned size.
ReachedPlannedTargetFileSize() const396   bool ReachedPlannedTargetFileSize() const {
397     if (!HasPlannedTargetFileSize()) {
398       return false;
399     }
400     // The planned target file size should not have been exceeded.
401     // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
402     // each target window would not make the target file exceed that limit, and
403     // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
404     // exceeds the advertised target window size.
405     if (total_of_target_window_sizes_ > planned_target_file_size_) {
406       VCD_DFATAL << "Internal error: Decoded data size "
407                  << total_of_target_window_sizes_
408                  << " exceeds planned target file size "
409                  << planned_target_file_size_ << VCD_ENDL;
410       return true;
411     }
412     return total_of_target_window_sizes_ == planned_target_file_size_;
413   }
414 
415   // Checks to see whether adding a new target window of the specified size
416   // would exceed the planned target file size, the maximum target file size,
417   // or the maximum target window size.  If so, logs an error and returns true;
418   // otherwise, returns false.
419   bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
420 
421   // Returns the amount of input data passed to the last DecodeChunk()
422   // that was not consumed by the decoder.  This is essential if
423   // SetPlannedTargetFileSize() is being used, in order to preserve the
424   // remaining input data stream once the planned target file has been decoded.
GetUnconsumedDataSize() const425   size_t GetUnconsumedDataSize() const {
426     return unparsed_bytes_.size();
427   }
428 
429   // This function will return true if the decoder has parsed a complete delta
430   // file header plus zero or more delta file windows, with no data left over.
431   // It will also return true if no delta data at all was decoded.  If these
432   // conditions are not met, then FinishDecoding() should not be called.
IsDecodingComplete() const433   bool IsDecodingComplete() const {
434     if (!FoundFileHeader()) {
435       // No complete delta file header has been parsed yet.  DecodeChunk()
436       // may have received some data that it hasn't yet parsed, in which case
437       // decoding is incomplete.
438       return unparsed_bytes_.empty();
439     } else if (custom_code_table_decoder_.get()) {
440       // The decoder is in the middle of parsing a custom code table.
441       return false;
442     } else if (delta_window_.FoundWindowHeader()) {
443       // The decoder is in the middle of parsing an interleaved format delta
444       // window.
445       return false;
446     } else if (ReachedPlannedTargetFileSize()) {
447       // The decoder found exactly the planned number of bytes.  In this case
448       // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
449       // data after the end of the delta file.
450       return true;
451     } else {
452       // No complete delta file window has been parsed yet.  DecodeChunk()
453       // may have received some data that it hasn't yet parsed, in which case
454       // decoding is incomplete.
455       return unparsed_bytes_.empty();
456     }
457   }
458 
dictionary_ptr() const459   const char* dictionary_ptr() const { return dictionary_ptr_; }
460 
dictionary_size() const461   size_t dictionary_size() const { return dictionary_size_; }
462 
addr_cache()463   VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
464 
decoded_target()465   string* decoded_target() { return &decoded_target_; }
466 
allow_vcd_target() const467   bool allow_vcd_target() const { return allow_vcd_target_; }
468 
SetAllowVcdTarget(bool allow_vcd_target)469   void SetAllowVcdTarget(bool allow_vcd_target) {
470     if (start_decoding_was_called_) {
471       VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()"
472                  << VCD_ENDL;
473       return;
474     }
475     allow_vcd_target_ = allow_vcd_target;
476   }
477 
478  private:
479   // Reads the VCDiff delta file header section as described in RFC section 4.1,
480   // except the custom code table data.  Returns RESULT_ERROR if an error
481   // occurred, or RESULT_END_OF_DATA if the end of available data was reached
482   // before the entire header could be read.  (The latter may be an error
483   // condition if there is no more data available.)  Otherwise, advances
484   // data->position_ past the header and returns RESULT_SUCCESS.
485   //
486   VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
487 
488   // Indicates whether or not the header has already been read.
FoundFileHeader() const489   bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
490 
491   // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
492   // file header, this function parses the custom cache sizes and initializes
493   // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
494   // custom code table in ReadCustomCodeTable().  Returns RESULT_ERROR if an
495   // error occurred, or RESULT_END_OF_DATA if the end of available data was
496   // reached before the custom cache sizes could be read.  Otherwise, returns
497   // the number of bytes read.
498   //
499   int InitCustomCodeTable(const char* data_start, const char* data_end);
500 
501   // If a custom code table was specified in the header section that was parsed
502   // by ReadDeltaFileHeader(), this function makes a recursive call to another
503   // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
504   // custom code table is expected to be supplied as an embedded VCDIFF
505   // encoding that uses the standard code table.  Returns RESULT_ERROR if an
506   // error occurs, or RESULT_END_OF_DATA if the end of available data was
507   // reached before the entire custom code table could be read.  Otherwise,
508   // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
509   // custom code table.  If the function returns RESULT_SUCCESS or
510   // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
511   //
512   VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
513 
514   // Called after the decoder exhausts all input data.  This function
515   // copies from decoded_target_ into output_string all the data that
516   // has not yet been output.  It sets decoded_target_output_position_
517   // to mark the start of the next data that needs to be output.
518   void AppendNewOutputText(OutputStringInterface* output_string);
519 
520   // Appends to output_string the portion of decoded_target_ that has
521   // not yet been output, then clears decoded_target_.  This function is
522   // called after each complete target window has been decoded if
523   // allow_vcd_target is false.  In that case, there is no need to retain
524   // target data from any window except the current window.
525   void FlushDecodedTarget(OutputStringInterface* output_string);
526 
527   // Contents and length of the source (dictionary) data.
528   const char* dictionary_ptr_;
529   size_t dictionary_size_;
530 
531   // This string will be used to store any unparsed bytes left over when
532   // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
533   // It will also be used to concatenate those unparsed bytes with the data
534   // supplied to the next call to DecodeChunk(), so that they appear in
535   // contiguous memory.
536   string unparsed_bytes_;
537 
538   // The portion of the target file that has been decoded so far.  This will be
539   // used to fill the output string for DecodeChunk(), and will also be used to
540   // execute COPY instructions that reference target data.  Since the source
541   // window can come from a range of addresses in the previously decoded target
542   // data, the entire target file needs to be available to the decoder, not just
543   // the current target window.
544   string decoded_target_;
545 
546   // The VCDIFF version byte (also known as "header4") from the
547   // delta file header.
548   unsigned char vcdiff_version_code_;
549 
550   VCDiffDeltaFileWindow delta_window_;
551 
552   std::auto_ptr<VCDiffAddressCache> addr_cache_;
553 
554   // Will be NULL unless a custom code table has been defined.
555   std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
556 
557   // Used to receive the decoded custom code table.
558   string custom_code_table_string_;
559 
560   // If a custom code table is specified, it will be expressed
561   // as an embedded VCDIFF delta file which uses the default code table
562   // as the source file (dictionary).  Use a child decoder object
563   // to decode that delta file.
564   std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
565 
566   // If set, then the decoder is expecting *exactly* this number of
567   // target bytes to be decoded from one or more delta file windows.
568   // If this number is exceeded while decoding a window, but was not met
569   // before starting on that window, an error will be reported.
570   // If FinishDecoding() is called before this number is met, an error
571   // will also be reported.  This feature is used for decoding the
572   // embedded code table data within a VCDIFF delta file; we want to
573   // stop processing the embedded data once the entire code table has
574   // been decoded, and treat the rest of the available data as part
575   // of the enclosing delta file.
576   size_t planned_target_file_size_;
577 
578   size_t maximum_target_file_size_;
579 
580   size_t maximum_target_window_size_;
581 
582   // Contains the sum of the decoded sizes of all target windows seen so far,
583   // including the expected total size of the current target window in progress
584   // (even if some of the current target window has not yet been decoded.)
585   size_t total_of_target_window_sizes_;
586 
587   // Contains the byte position within decoded_target_ of the first data that
588   // has not yet been output by AppendNewOutputText().
589   size_t decoded_target_output_position_;
590 
591   // This value is used to ensure the correct order of calls to the interface
592   // functions, i.e., a single call to StartDecoding(), followed by zero or
593   // more calls to DecodeChunk(), followed by a single call to
594   // FinishDecoding().
595   bool start_decoding_was_called_;
596 
597   // If this value is true then the VCD_TARGET flag can be specified to allow
598   // the source segment to be chosen from the previously-decoded target data.
599   // (This is the default behavior.)  If it is false, then specifying the
600   // VCD_TARGET flag is considered an error, and the decoder does not need to
601   // keep in memory any decoded target data prior to the current window.
602   bool allow_vcd_target_;
603 
604   // Making these private avoids implicit copy constructor & assignment operator
605   VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&);  // NOLINT
606   void operator=(const VCDiffStreamingDecoderImpl&);
607 };
608 
609 // *** Methods for VCDiffStreamingDecoderImpl
610 
611 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
612 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
613 
VCDiffStreamingDecoderImpl()614 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
615     : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
616       maximum_target_window_size_(kDefaultMaximumTargetFileSize),
617       allow_vcd_target_(true) {
618   delta_window_.Init(this);
619   Reset();
620 }
621 
622 // Reset() will delete the component objects without reallocating them.
~VCDiffStreamingDecoderImpl()623 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
624 
Reset()625 void VCDiffStreamingDecoderImpl::Reset() {
626   start_decoding_was_called_ = false;
627   dictionary_ptr_ = NULL;
628   dictionary_size_ = 0;
629   vcdiff_version_code_ = '\0';
630   planned_target_file_size_ = kUnlimitedBytes;
631   total_of_target_window_sizes_ = 0;
632   addr_cache_.reset();
633   custom_code_table_.reset();
634   custom_code_table_decoder_.reset();
635   delta_window_.Reset();
636   decoded_target_output_position_ = 0;
637 }
638 
StartDecoding(const char * dictionary_ptr,size_t dictionary_size)639 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
640                                                size_t dictionary_size) {
641   if (start_decoding_was_called_) {
642     VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()"
643                << VCD_ENDL;
644     return;
645   }
646   unparsed_bytes_.clear();
647   decoded_target_.clear();  // delta_window_.Reset() depends on this
648   Reset();
649   dictionary_ptr_ = dictionary_ptr;
650   dictionary_size_ = dictionary_size;
651   start_decoding_was_called_ = true;
652 }
653 
654 // Reads the VCDiff delta file header section as described in RFC section 4.1:
655 //
656 //     Header1                                  - byte = 0xD6 (ASCII 'V' | 0x80)
657 //     Header2                                  - byte = 0xC3 (ASCII 'C' | 0x80)
658 //     Header3                                  - byte = 0xC4 (ASCII 'D' | 0x80)
659 //     Header4                                  - byte
660 //     Hdr_Indicator                            - byte
661 //     [Secondary compressor ID]                - byte
662 //     [Length of code table data]              - integer
663 //     [Code table data]
664 //
665 // Initializes the code table and address cache objects.  Returns RESULT_ERROR
666 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was
667 // reached before the entire header could be read.  (The latter may be an error
668 // condition if there is no more data available.)  Otherwise, returns
669 // RESULT_SUCCESS, and removes the header bytes from the data string.
670 //
671 // It's relatively inefficient to expect this function to parse any number of
672 // input bytes available, down to 1 byte, but it is necessary in case the input
673 // is not a properly formatted VCDIFF delta file.  If the entire input consists
674 // of two bytes "12", then we should recognize that it does not match the
675 // initial VCDIFF magic number "VCD" and report an error, rather than waiting
676 // indefinitely for more input that will never arrive.
677 //
ReadDeltaFileHeader(ParseableChunk * data)678 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
679     ParseableChunk* data) {
680   if (FoundFileHeader()) {
681     return RESULT_SUCCESS;
682   }
683   size_t data_size = data->UnparsedSize();
684   const DeltaFileHeader* header =
685       reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
686   bool wrong_magic_number = false;
687   switch (data_size) {
688     // Verify only the bytes that are available.
689     default:
690       // Found header contents up to and including VCDIFF version
691       vcdiff_version_code_ = header->header4;
692       if ((vcdiff_version_code_ != 0x00) &&  // Draft standard VCDIFF (RFC 3284)
693           (vcdiff_version_code_ != 'S')) {   // Enhancements for SDCH protocol
694         VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL;
695         return RESULT_ERROR;
696       }
697       // fall through
698     case 3:
699       if (header->header3 != 0xC4) {  // magic value 'D' | 0x80
700         wrong_magic_number = true;
701       }
702       // fall through
703     case 2:
704       if (header->header2 != 0xC3) {  // magic value 'C' | 0x80
705         wrong_magic_number = true;
706       }
707       // fall through
708     case 1:
709       if (header->header1 != 0xD6) {  // magic value 'V' | 0x80
710         wrong_magic_number = true;
711       }
712       // fall through
713     case 0:
714       if (wrong_magic_number) {
715         VCD_ERROR << "Did not find VCDIFF header bytes; "
716                       "input is not a VCDIFF delta file" << VCD_ENDL;
717         return RESULT_ERROR;
718       }
719       if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
720   }
721   // Secondary compressor not supported.
722   if (header->hdr_indicator & VCD_DECOMPRESS) {
723     VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL;
724     return RESULT_ERROR;
725   }
726   if (header->hdr_indicator & VCD_CODETABLE) {
727     int bytes_parsed = InitCustomCodeTable(
728         data->UnparsedData() + sizeof(DeltaFileHeader),
729         data->End());
730     switch (bytes_parsed) {
731       case RESULT_ERROR:
732         return RESULT_ERROR;
733       case RESULT_END_OF_DATA:
734         return RESULT_END_OF_DATA;
735       default:
736         data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
737     }
738   } else {
739     addr_cache_.reset(new VCDiffAddressCache);
740     // addr_cache_->Init() will be called
741     // from VCDiffStreamingDecoderImpl::DecodeChunk()
742     data->Advance(sizeof(DeltaFileHeader));
743   }
744   return RESULT_SUCCESS;
745 }
746 
InitCustomCodeTable(const char * data_start,const char * data_end)747 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
748                                                     const char* data_end) {
749   // A custom code table is being specified.  Parse the variable-length
750   // cache sizes and begin parsing the encoded custom code table.
751   int32_t near_cache_size = 0, same_cache_size = 0;
752   VCDiffHeaderParser header_parser(data_start, data_end);
753   if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
754     return header_parser.GetResult();
755   }
756   if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
757     return header_parser.GetResult();
758   }
759   custom_code_table_.reset(new struct VCDiffCodeTableData);
760   memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
761   custom_code_table_string_.clear();
762   addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
763   // addr_cache_->Init() will be called
764   // from VCDiffStreamingDecoderImpl::DecodeChunk()
765 
766   // If we reach this point (the start of the custom code table)
767   // without encountering a RESULT_END_OF_DATA condition, then we won't call
768   // ReadDeltaFileHeader() again for this delta file.
769   //
770   // Instantiate a recursive decoder to interpret the custom code table
771   // as a VCDIFF encoding of the default code table.
772   custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
773   custom_code_table_decoder_->StartDecoding(
774       reinterpret_cast<const char*>(
775           &VCDiffCodeTableData::kDefaultCodeTableData),
776       sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
777   custom_code_table_decoder_->SetPlannedTargetFileSize(
778       sizeof(*custom_code_table_));
779   return static_cast<int>(header_parser.ParsedSize());
780 }
781 
ReadCustomCodeTable(ParseableChunk * data)782 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
783     ParseableChunk* data) {
784   if (!custom_code_table_decoder_.get()) {
785     return RESULT_SUCCESS;
786   }
787   if (!custom_code_table_.get()) {
788     VCD_DFATAL << "Internal error:  custom_code_table_decoder_ is set,"
789                   " but custom_code_table_ is NULL" << VCD_ENDL;
790     return RESULT_ERROR;
791   }
792   OutputString<string> output_string(&custom_code_table_string_);
793   if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
794                                                data->UnparsedSize(),
795                                                &output_string)) {
796     return RESULT_ERROR;
797   }
798   if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
799     // Skip over the consumed data.
800     data->Finish();
801     return RESULT_END_OF_DATA;
802   }
803   if (!custom_code_table_decoder_->FinishDecoding()) {
804     return RESULT_ERROR;
805   }
806   if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
807     VCD_DFATAL << "Decoded custom code table size ("
808                << custom_code_table_string_.length()
809                << ") does not match size of a code table ("
810                << sizeof(*custom_code_table_) << ")" << VCD_ENDL;
811     return RESULT_ERROR;
812   }
813   memcpy(custom_code_table_.get(),
814          custom_code_table_string_.data(),
815          sizeof(*custom_code_table_));
816   custom_code_table_string_.clear();
817   // Skip over the consumed data.
818   data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
819   custom_code_table_decoder_.reset();
820   delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
821   return RESULT_SUCCESS;
822 }
823 
FlushDecodedTarget(OutputStringInterface * output_string)824 void VCDiffStreamingDecoderImpl::FlushDecodedTarget(
825     OutputStringInterface* output_string) {
826   output_string->append(
827       decoded_target_.data() + decoded_target_output_position_,
828       decoded_target_.size() - decoded_target_output_position_);
829   decoded_target_.clear();
830   delta_window_.set_target_window_start_pos(0);
831   decoded_target_output_position_ = 0;
832 }
833 
AppendNewOutputText(OutputStringInterface * output_string)834 void VCDiffStreamingDecoderImpl::AppendNewOutputText(
835     OutputStringInterface* output_string) {
836   const size_t bytes_decoded_this_chunk =
837       decoded_target_.size() - decoded_target_output_position_;
838   if (bytes_decoded_this_chunk > 0) {
839     size_t target_bytes_remaining = delta_window_.TargetBytesRemaining();
840     if (target_bytes_remaining > 0) {
841       // The decoder is midway through decoding a target window.  Resize
842       // output_string to match the expected length.  The interface guarantees
843       // not to resize output_string more than once per target window decoded.
844       output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
845                                             + target_bytes_remaining);
846     }
847     output_string->append(
848         decoded_target_.data() + decoded_target_output_position_,
849         bytes_decoded_this_chunk);
850     decoded_target_output_position_ = decoded_target_.size();
851   }
852 }
853 
DecodeChunk(const char * data,size_t len,OutputStringInterface * output_string)854 bool VCDiffStreamingDecoderImpl::DecodeChunk(
855     const char* data,
856     size_t len,
857     OutputStringInterface* output_string) {
858   if (!start_decoding_was_called_) {
859     VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL;
860     Reset();
861     return false;
862   }
863   ParseableChunk parseable_chunk(data, len);
864   if (!unparsed_bytes_.empty()) {
865     unparsed_bytes_.append(data, len);
866     parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
867                                   unparsed_bytes_.size());
868   }
869   VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
870   if (RESULT_SUCCESS == result) {
871     result = ReadCustomCodeTable(&parseable_chunk);
872   }
873   if (RESULT_SUCCESS == result) {
874     while (!parseable_chunk.Empty()) {
875       result = delta_window_.DecodeWindow(&parseable_chunk);
876       if (RESULT_SUCCESS != result) {
877         break;
878       }
879       if (ReachedPlannedTargetFileSize()) {
880         // Found exactly the length we expected.  Stop decoding.
881         break;
882       }
883       if (!allow_vcd_target()) {
884         // VCD_TARGET will never be used to reference target data before the
885         // start of the current window, so flush and clear the contents of
886         // decoded_target_.
887         FlushDecodedTarget(output_string);
888       }
889     }
890   }
891   if (RESULT_ERROR == result) {
892     Reset();  // Don't allow further DecodeChunk calls
893     return false;
894   }
895   unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
896                          parseable_chunk.UnparsedSize());
897   AppendNewOutputText(output_string);
898   return true;
899 }
900 
901 // Finishes decoding after all data has been received.  Returns true
902 // if decoding of the entire stream was successful.
FinishDecoding()903 bool VCDiffStreamingDecoderImpl::FinishDecoding() {
904   bool success = true;
905   if (!start_decoding_was_called_) {
906     VCD_WARNING << "FinishDecoding() called before StartDecoding(),"
907                    " or called after DecodeChunk() returned false"
908                 << VCD_ENDL;
909     success = false;
910   } else if (!IsDecodingComplete()) {
911     VCD_ERROR << "FinishDecoding() called before parsing entire"
912                  " delta file window" << VCD_ENDL;
913     success = false;
914   }
915   // Reset the object state for the next decode operation
916   Reset();
917   return success;
918 }
919 
TargetWindowWouldExceedSizeLimits(size_t window_size) const920 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
921     size_t window_size) const {
922   if (window_size > maximum_target_window_size_) {
923     VCD_ERROR << "Length of target window (" << window_size
924               << ") exceeds limit of " << maximum_target_window_size_
925               << " bytes" << VCD_ENDL;
926     return true;
927   }
928   if (HasPlannedTargetFileSize()) {
929     // The logical expression to check would be:
930     //
931     //   total_of_target_window_sizes_ + window_size > planned_target_file_size_
932     //
933     // but the addition might cause an integer overflow if target_bytes_to_add
934     // is very large.  So it is better to check target_bytes_to_add against
935     // the remaining planned target bytes.
936     size_t remaining_planned_target_file_size =
937         planned_target_file_size_ - total_of_target_window_sizes_;
938     if (window_size > remaining_planned_target_file_size) {
939       VCD_ERROR << "Length of target window (" << window_size
940                 << " bytes) plus previous windows ("
941                 << total_of_target_window_sizes_
942                 << " bytes) would exceed planned size of "
943                 << planned_target_file_size_ << " bytes" << VCD_ENDL;
944       return true;
945     }
946   }
947   size_t remaining_maximum_target_bytes =
948       maximum_target_file_size_ - total_of_target_window_sizes_;
949   if (window_size > remaining_maximum_target_bytes) {
950     VCD_ERROR << "Length of target window (" << window_size
951               << " bytes) plus previous windows ("
952               << total_of_target_window_sizes_
953               << " bytes) would exceed maximum target file size of "
954               << maximum_target_file_size_ << " bytes" << VCD_ENDL;
955     return true;
956   }
957   return false;
958 }
959 
960 // *** Methods for VCDiffDeltaFileWindow
961 
Reset()962 void VCDiffDeltaFileWindow::Reset() {
963   found_header_ = false;
964 
965   // Mark the start of the current target window.
966   target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
967   target_window_length_ = 0;
968 
969   source_segment_ptr_ = NULL;
970   source_segment_length_ = 0;
971 
972   instructions_and_sizes_.Invalidate();
973   data_for_add_and_run_.Invalidate();
974   addresses_for_copy_.Invalidate();
975 
976   interleaved_bytes_expected_ = 0;
977 
978   has_checksum_ = false;
979   expected_checksum_ = 0;
980 }
981 
SetUpWindowSections(VCDiffHeaderParser * header_parser)982 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
983     VCDiffHeaderParser* header_parser) {
984   size_t add_and_run_data_length = 0;
985   size_t instructions_and_sizes_length = 0;
986   size_t addresses_length = 0;
987   if (!header_parser->ParseSectionLengths(has_checksum_,
988                                           &add_and_run_data_length,
989                                           &instructions_and_sizes_length,
990                                           &addresses_length,
991                                           &expected_checksum_)) {
992     return header_parser->GetResult();
993   }
994   if (parent_->AllowInterleaved() &&
995       (add_and_run_data_length == 0) &&
996       (addresses_length == 0)) {
997     // The interleaved format is being used.
998     interleaved_bytes_expected_ =
999         static_cast<int>(instructions_and_sizes_length);
1000     UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
1001                                      header_parser->End());
1002   } else {
1003     // If interleaved format is not used, then the whole window contents
1004     // must be available before decoding can begin.  If only part of
1005     // the current window is available, then report end of data
1006     // and re-parse the whole header when DecodeChunk() is called again.
1007     if (header_parser->UnparsedSize() < (add_and_run_data_length +
1008                                          instructions_and_sizes_length +
1009                                          addresses_length)) {
1010       return RESULT_END_OF_DATA;
1011     }
1012     data_for_add_and_run_.Init(header_parser->UnparsedData(),
1013                                add_and_run_data_length);
1014     instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1015                                  instructions_and_sizes_length);
1016     addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1017     if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
1018       VCD_ERROR << "The end of the instructions section "
1019                    "does not match the end of the delta window" << VCD_ENDL;
1020       return RESULT_ERROR;
1021     }
1022   }
1023   reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1024                instructions_and_sizes_.End());
1025   return RESULT_SUCCESS;
1026 }
1027 
1028 // Here are the elements of the delta window header to be parsed,
1029 // from section 4 of the RFC:
1030 //
1031 //     Window1
1032 //         Win_Indicator                            - byte
1033 //         [Source segment size]                    - integer
1034 //         [Source segment position]                - integer
1035 //         The delta encoding of the target window
1036 //             Length of the delta encoding         - integer
1037 //             The delta encoding
1038 //                 Size of the target window        - integer
1039 //                 Delta_Indicator                  - byte
1040 //                 Length of data for ADDs and RUNs - integer
1041 //                 Length of instructions and sizes - integer
1042 //                 Length of addresses for COPYs    - integer
1043 //                 Data section for ADDs and RUNs   - array of bytes
1044 //                 Instructions and sizes section   - array of bytes
1045 //                 Addresses section for COPYs      - array of bytes
1046 //
ReadHeader(ParseableChunk * parseable_chunk)1047 VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1048     ParseableChunk* parseable_chunk) {
1049   std::string* decoded_target = parent_->decoded_target();
1050   VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1051                                    parseable_chunk->End());
1052   size_t source_segment_position = 0;
1053   unsigned char win_indicator = 0;
1054   if (!header_parser.ParseWinIndicatorAndSourceSegment(
1055           parent_->dictionary_size(),
1056           decoded_target->size(),
1057           parent_->allow_vcd_target(),
1058           &win_indicator,
1059           &source_segment_length_,
1060           &source_segment_position)) {
1061     return header_parser.GetResult();
1062   }
1063   has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1064   if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1065     return header_parser.GetResult();
1066   }
1067   if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1068     // An error has been logged by TargetWindowWouldExceedSizeLimits().
1069     return RESULT_ERROR;
1070   }
1071   header_parser.ParseDeltaIndicator();
1072   VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1073   if (RESULT_SUCCESS != setup_return_code) {
1074     return setup_return_code;
1075   }
1076   // Reserve enough space in the output string for the current target window.
1077   const size_t wanted_capacity =
1078       target_window_start_pos_ + target_window_length_;
1079   if (decoded_target->capacity() < wanted_capacity) {
1080     decoded_target->reserve(wanted_capacity);
1081   }
1082   // Get a pointer to the start of the source segment.
1083   if (win_indicator & VCD_SOURCE) {
1084     source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1085   } else if (win_indicator & VCD_TARGET) {
1086     // This assignment must happen after the reserve().
1087     // decoded_target should not be resized again while processing this window,
1088     // so source_segment_ptr_ should remain valid.
1089     source_segment_ptr_ = decoded_target->data() + source_segment_position;
1090   }
1091   // The whole window header was found and parsed successfully.
1092   found_header_ = true;
1093   parseable_chunk->Advance(header_parser.ParsedSize());
1094   parent_->AddToTotalTargetWindowSize(target_window_length_);
1095   return RESULT_SUCCESS;
1096 }
1097 
UpdateInstructionPointer(ParseableChunk * parseable_chunk)1098 void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1099     ParseableChunk* parseable_chunk) {
1100   if (IsInterleaved()) {
1101     size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1102     // Reduce expected instruction segment length by bytes parsed
1103     interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1104     parseable_chunk->Advance(bytes_parsed);
1105   }
1106 }
1107 
TargetBytesDecoded()1108 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1109   return parent_->decoded_target()->size() - target_window_start_pos_;
1110 }
1111 
TargetBytesRemaining()1112 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1113   if (target_window_length_ == 0) {
1114     // There is no window being decoded at present
1115     return 0;
1116   } else {
1117     return target_window_length_ - TargetBytesDecoded();
1118   }
1119 }
1120 
CopyBytes(const char * data,size_t size)1121 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
1122   parent_->decoded_target()->append(data, size);
1123 }
1124 
RunByte(unsigned char byte,size_t size)1125 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1126   parent_->decoded_target()->append(size, byte);
1127 }
1128 
DecodeAdd(size_t size)1129 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1130   if (size > data_for_add_and_run_.UnparsedSize()) {
1131     return RESULT_END_OF_DATA;
1132   }
1133   // Write the next "size" data bytes
1134   CopyBytes(data_for_add_and_run_.UnparsedData(), size);
1135   data_for_add_and_run_.Advance(size);
1136   return RESULT_SUCCESS;
1137 }
1138 
DecodeRun(size_t size)1139 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1140   if (data_for_add_and_run_.Empty()) {
1141     return RESULT_END_OF_DATA;
1142   }
1143   // Write "size" copies of the next data byte
1144   RunByte(*data_for_add_and_run_.UnparsedData(), size);
1145   data_for_add_and_run_.Advance(1);
1146   return RESULT_SUCCESS;
1147 }
1148 
DecodeCopy(size_t size,unsigned char mode)1149 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1150                                                unsigned char mode) {
1151   // Keep track of the number of target bytes decoded as a local variable
1152   // to avoid recalculating it each time it is needed.
1153   size_t target_bytes_decoded = TargetBytesDecoded();
1154   const VCDAddress here_address =
1155       static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1156   const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1157       here_address,
1158       mode,
1159       addresses_for_copy_.UnparsedDataAddr(),
1160       addresses_for_copy_.End());
1161   switch (decoded_address) {
1162     case RESULT_ERROR:
1163       VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL;
1164       return RESULT_ERROR;
1165     case RESULT_END_OF_DATA:
1166       return RESULT_END_OF_DATA;
1167     default:
1168       if ((decoded_address < 0) || (decoded_address > here_address)) {
1169         VCD_DFATAL << "Internal error: unexpected address " << decoded_address
1170                    << " returned from DecodeAddress, with here_address = "
1171                    << here_address << VCD_ENDL;
1172         return RESULT_ERROR;
1173       }
1174       break;
1175   }
1176   size_t address = static_cast<size_t>(decoded_address);
1177   if ((address + size) <= source_segment_length_) {
1178     // Copy all data from source segment
1179     CopyBytes(&source_segment_ptr_[address], size);
1180     return RESULT_SUCCESS;
1181   }
1182   // Copy some data from target window...
1183   if (address < source_segment_length_) {
1184     // ... plus some data from source segment
1185     const size_t partial_copy_size = source_segment_length_ - address;
1186     CopyBytes(&source_segment_ptr_[address], partial_copy_size);
1187     target_bytes_decoded += partial_copy_size;
1188     address += partial_copy_size;
1189     size -= partial_copy_size;
1190   }
1191   address -= source_segment_length_;
1192   // address is now based at start of target window
1193   const char* const target_segment_ptr = parent_->decoded_target()->data() +
1194                                          target_window_start_pos_;
1195   while (size > (target_bytes_decoded - address)) {
1196     // Recursive copy that extends into the yet-to-be-copied target data
1197     const size_t partial_copy_size = target_bytes_decoded - address;
1198     CopyBytes(&target_segment_ptr[address], partial_copy_size);
1199     target_bytes_decoded += partial_copy_size;
1200     address += partial_copy_size;
1201     size -= partial_copy_size;
1202   }
1203   CopyBytes(&target_segment_ptr[address], size);
1204   return RESULT_SUCCESS;
1205 }
1206 
DecodeBody(ParseableChunk * parseable_chunk)1207 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1208   if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1209                               != parseable_chunk->UnparsedData())) {
1210     VCD_DFATAL << "Internal error: interleaved format is used, but the"
1211                   " input pointer does not point to the instructions section"
1212                << VCD_ENDL;
1213     return RESULT_ERROR;
1214   }
1215   while (TargetBytesDecoded() < target_window_length_) {
1216     int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1217     unsigned char mode = 0;
1218     VCDiffInstructionType instruction =
1219         reader_.GetNextInstruction(&decoded_size, &mode);
1220     switch (instruction) {
1221       case VCD_INSTRUCTION_END_OF_DATA:
1222         UpdateInstructionPointer(parseable_chunk);
1223         return RESULT_END_OF_DATA;
1224       case VCD_INSTRUCTION_ERROR:
1225         return RESULT_ERROR;
1226       default:
1227         break;
1228     }
1229     const size_t size = static_cast<size_t>(decoded_size);
1230     // The value of "size" itself could be enormous (say, INT32_MAX)
1231     // so check it individually against the limit to protect against
1232     // overflow when adding it to something else.
1233     if ((size > target_window_length_) ||
1234         ((size + TargetBytesDecoded()) > target_window_length_)) {
1235       VCD_ERROR << VCDiffInstructionName(instruction)
1236                 << " with size " << size
1237                 << " plus existing " << TargetBytesDecoded()
1238                 << " bytes of target data exceeds length of target"
1239                    " window (" << target_window_length_ << " bytes)"
1240                 << VCD_ENDL;
1241       return RESULT_ERROR;
1242     }
1243     VCDiffResult result = RESULT_SUCCESS;
1244     switch (instruction) {
1245       case VCD_ADD:
1246         result = DecodeAdd(size);
1247         break;
1248       case VCD_RUN:
1249         result = DecodeRun(size);
1250         break;
1251       case VCD_COPY:
1252         result = DecodeCopy(size, mode);
1253         break;
1254       default:
1255         VCD_DFATAL << "Unexpected instruction type " << instruction
1256                    << "in opcode stream" << VCD_ENDL;
1257         return RESULT_ERROR;
1258     }
1259     switch (result) {
1260       case RESULT_END_OF_DATA:
1261         reader_.UnGetInstruction();
1262         UpdateInstructionPointer(parseable_chunk);
1263         return RESULT_END_OF_DATA;
1264       case RESULT_ERROR:
1265         return RESULT_ERROR;
1266       case RESULT_SUCCESS:
1267         break;
1268     }
1269   }
1270   if (TargetBytesDecoded() != target_window_length_) {
1271     VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded()
1272               << " bytes) does not match expected size ("
1273               << target_window_length_ << " bytes)" << VCD_ENDL;
1274     return RESULT_ERROR;
1275   }
1276   const char* const target_window_start =
1277       parent_->decoded_target()->data() + target_window_start_pos_;
1278   if (has_checksum_ &&
1279       (ComputeAdler32(target_window_start, target_window_length_)
1280            != expected_checksum_)) {
1281     VCD_ERROR << "Target data does not match checksum; this could mean "
1282                  "that the wrong dictionary was used" << VCD_ENDL;
1283     return RESULT_ERROR;
1284   }
1285   if (!instructions_and_sizes_.Empty()) {
1286     VCD_ERROR << "Excess instructions and sizes left over "
1287                  "after decoding target window" << VCD_ENDL;
1288       return RESULT_ERROR;
1289   }
1290   if (!IsInterleaved()) {
1291     // Standard format is being used, with three separate sections for the
1292     // instructions, data, and addresses.
1293     if (!data_for_add_and_run_.Empty()) {
1294       VCD_ERROR << "Excess ADD/RUN data left over "
1295                    "after decoding target window" << VCD_ENDL;
1296         return RESULT_ERROR;
1297     }
1298     if (!addresses_for_copy_.Empty()) {
1299       VCD_ERROR << "Excess COPY addresses left over "
1300                    "after decoding target window" << VCD_ENDL;
1301         return RESULT_ERROR;
1302     }
1303     // Reached the end of the window.  Update the ParseableChunk to point to the
1304     // end of the addresses section, which is the last section in the window.
1305     parseable_chunk->SetPosition(addresses_for_copy_.End());
1306   } else {
1307     // Interleaved format is being used.
1308     UpdateInstructionPointer(parseable_chunk);
1309   }
1310   return RESULT_SUCCESS;
1311 }
1312 
DecodeWindow(ParseableChunk * parseable_chunk)1313 VCDiffResult VCDiffDeltaFileWindow::DecodeWindow(
1314     ParseableChunk* parseable_chunk) {
1315   if (!parent_) {
1316     VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() "
1317                   "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL;
1318     return RESULT_ERROR;
1319   }
1320   if (!found_header_) {
1321     switch (ReadHeader(parseable_chunk)) {
1322       case RESULT_END_OF_DATA:
1323         return RESULT_END_OF_DATA;
1324       case RESULT_ERROR:
1325         return RESULT_ERROR;
1326       default:
1327         // Reset address cache between windows (RFC section 5.1)
1328         if (!parent_->addr_cache()->Init()) {
1329           VCD_DFATAL << "Error initializing address cache" << VCD_ENDL;
1330           return RESULT_ERROR;
1331         }
1332     }
1333   } else {
1334     // We are resuming a window that was partially decoded before a
1335     // RESULT_END_OF_DATA was returned.  This can only happen on the first
1336     // loop iteration, and only if the interleaved format is enabled and used.
1337     if (!IsInterleaved()) {
1338       VCD_DFATAL << "Internal error: Resumed decoding of a delta file window"
1339                     " when interleaved format is not being used" << VCD_ENDL;
1340       return RESULT_ERROR;
1341     }
1342     UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1343                                      parseable_chunk->End());
1344     reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1345                            instructions_and_sizes_.End());
1346   }
1347   switch (DecodeBody(parseable_chunk)) {
1348     case RESULT_END_OF_DATA:
1349       if (MoreDataExpected()) {
1350         return RESULT_END_OF_DATA;
1351       } else {
1352         VCD_ERROR << "End of data reached while decoding VCDIFF delta file"
1353                   << VCD_ENDL;
1354         // fall through to RESULT_ERROR case
1355       }
1356     case RESULT_ERROR:
1357       return RESULT_ERROR;
1358     default:
1359       break;  // DecodeBody succeeded
1360   }
1361   // Get ready to read a new delta window
1362   Reset();
1363   return RESULT_SUCCESS;
1364 }
1365 
1366 // *** Methods for VCDiffStreamingDecoder
1367 
VCDiffStreamingDecoder()1368 VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1369 : impl_(new VCDiffStreamingDecoderImpl) { }
1370 
~VCDiffStreamingDecoder()1371 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1372 
StartDecoding(const char * source,size_t len)1373 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1374   impl_->StartDecoding(source, len);
1375 }
1376 
DecodeChunkToInterface(const char * data,size_t len,OutputStringInterface * output_string)1377 bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1378     const char* data,
1379     size_t len,
1380     OutputStringInterface* output_string) {
1381   return impl_->DecodeChunk(data, len, output_string);
1382 }
1383 
FinishDecoding()1384 bool VCDiffStreamingDecoder::FinishDecoding() {
1385   return impl_->FinishDecoding();
1386 }
1387 
SetMaximumTargetFileSize(size_t new_maximum_target_file_size)1388 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1389     size_t new_maximum_target_file_size) {
1390   return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1391 }
1392 
SetMaximumTargetWindowSize(size_t new_maximum_target_window_size)1393 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1394     size_t new_maximum_target_window_size) {
1395   return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1396 }
1397 
SetAllowVcdTarget(bool allow_vcd_target)1398 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
1399   impl_->SetAllowVcdTarget(allow_vcd_target);
1400 }
1401 
DecodeToInterface(const char * dictionary_ptr,size_t dictionary_size,const string & encoding,OutputStringInterface * target)1402 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1403                                       size_t dictionary_size,
1404                                       const string& encoding,
1405                                       OutputStringInterface* target) {
1406   target->clear();
1407   decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1408   if (!decoder_.DecodeChunkToInterface(encoding.data(),
1409                                        encoding.size(),
1410                                        target)) {
1411     return false;
1412   }
1413   return decoder_.FinishDecoding();
1414 }
1415 
1416 }  // namespace open_vcdiff
1417