• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Implements a Decoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29 
30 #include <config.h>
31 #include "google/vcdecoder.h"
32 #include <stddef.h>  // size_t, ptrdiff_t
33 #include <stdint.h>  // int32_t
34 #include <string.h>  // memcpy, memset
35 #include <memory>  // auto_ptr
36 #include <string>
37 #include "addrcache.h"
38 #include "checksum.h"
39 #include "codetable.h"
40 #include "decodetable.h"
41 #include "headerparser.h"
42 #include "logging.h"
43 #include "google/output_string.h"
44 #include "varint_bigendian.h"
45 #include "vcdiff_defs.h"
46 
47 namespace open_vcdiff {
48 
49 // This class is used to parse delta file windows as described
50 // in RFC sections 4.2 and 4.3.  Its methods are not thread-safe.
51 //
52 // Here is the window format copied from the RFC:
53 //
54 // Window1
55 //     Win_Indicator                            - byte
56 //     [Source segment size]                    - integer
57 //     [Source segment position]                - integer
58 //     The delta encoding of the target window
59 //         Length of the delta encoding         - integer
60 //         The delta encoding
61 //             Size of the target window        - integer
62 //             Delta_Indicator                  - byte
63 //             Length of data for ADDs and RUNs - integer
64 //             Length of instructions and sizes - integer
65 //             Length of addresses for COPYs    - integer
66 //             Data section for ADDs and RUNs   - array of bytes
67 //             Instructions and sizes section   - array of bytes
68 //             Addresses section for COPYs      - array of bytes
69 // Window2
70 // ...
71 //
72 // Sample usage:
73 //
74 // VCDiffDeltaFileWindow delta_window_;
75 // delta_window_.Init(parent);
76 // ParseableChunk parseable_chunk(input_buffer,
77 //                                input_size,
78 //                                leftover_unencoded_bytes);
79 // switch (delta_window_.DecodeWindows(&parseable_chunk)) {
80 //   case RESULT_END_OF_DATA:
81 //     <Read more input and retry DecodeWindows later.>
82 //   case RESULT_ERROR:
83 //     <Handle error case.  An error log message has already been generated.>
84 // }
85 //
86 // DecodeWindows consumes as many windows from the input as it can.  It only
87 // needs to be placed within a loop if the loop is used to obtain more input
88 // (delta file) data.
89 //
90 class VCDiffDeltaFileWindow {
91  public:
92   VCDiffDeltaFileWindow();
93   ~VCDiffDeltaFileWindow();
94 
95   // Init() should be called immediately after constructing the
96   // VCDiffDeltaFileWindow().  It must be called before DecodeWindows() can be
97   // invoked, or an error will occur.
98   void Init(VCDiffStreamingDecoderImpl* parent);
99 
100   // Resets the pointers to the data sections in the current window.
101   void Reset();
102 
UseCodeTable(const VCDiffCodeTableData & code_table_data,unsigned char max_mode)103   bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
104                     unsigned char max_mode) {
105     return reader_.UseCodeTable(code_table_data, max_mode);
106   }
107 
108   // Decodes as many delta windows as possible using the input data from
109   // *parseable_chunk.  Appends the decoded target windows to
110   // parent_->decoded_target().  Returns RESULT_SUCCESS on success, or
111   // RESULT_END_OF_DATA if the end of input was reached before the entire window
112   // could be decoded and more input is expected (only possible if
113   // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
114   // decoding.  In the RESULT_ERROR case, the value of parseable_chunk->pointer_
115   // is undefined; otherwise, parseable_chunk->Advance() is called to point to
116   // the input data position just after the data that has been decoded.
117   //
118   // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder
119   // expects *exactly* this number of target bytes to be decoded from one or
120   // more delta file windows.  If this number is met exactly after finishing a
121   // delta window, this function will return RESULT_SUCCESS without processing
122   // any more bytes from data_pointer.  If this number is exceeded while
123   // decoding a window, but was not met before starting that window,
124   // then RESULT_ERROR will be returned.
125   //
126   VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk);
127 
FoundWindowHeader() const128   bool FoundWindowHeader() const {
129     return found_header_;
130   }
131 
MoreDataExpected() const132   bool MoreDataExpected() const {
133     // When parsing an interleaved-format delta file,
134     // every time DecodeBody() exits, interleaved_bytes_expected_
135     // will be decremented by the number of bytes parsed.  If it
136     // reaches zero, then there is no more data expected because
137     // the size of the interleaved section (given in the window
138     // header) has been reached.
139     return IsInterleaved() && (interleaved_bytes_expected_ > 0);
140   }
141 
target_window_start_pos() const142   size_t target_window_start_pos() const { return target_window_start_pos_; }
143 
set_target_window_start_pos(size_t new_start_pos)144   void set_target_window_start_pos(size_t new_start_pos) {
145     target_window_start_pos_ = new_start_pos;
146   }
147 
148   // Returns the number of bytes remaining to be decoded in the target window.
149   // If not in the process of decoding a window, returns 0.
150   size_t TargetBytesRemaining();
151 
152  private:
153   // Reads the header of the window section as described in RFC sections 4.2 and
154   // 4.3, up to and including the value "Length of addresses for COPYs".  If the
155   // entire header is found, this function sets up the DeltaWindowSections
156   // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
157   // that the decoder can begin decoding the opcodes in these sections.  Returns
158   // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
159   // available data was reached before the entire header could be read.  (The
160   // latter may be an error condition if there is no more data available.)
161   // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
162   // parsed header.
163   //
164   VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
165 
166   // After the window header has been parsed as far as the Delta_Indicator,
167   // this function is called to parse the following delta window header fields:
168   //
169   //     Length of data for ADDs and RUNs - integer (VarintBE format)
170   //     Length of instructions and sizes - integer (VarintBE format)
171   //     Length of addresses for COPYs    - integer (VarintBE format)
172   //
173   // If has_checksum_ is true, it also looks for the following element:
174   //
175   //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
176   //
177   // It sets up the DeltaWindowSections instructions_and_sizes_,
178   // data_for_add_and_run_, and addresses_for_copy_.  If the interleaved format
179   // is being used, all three sections will include the entire window body; if
180   // the standard format is used, three non-overlapping window sections will be
181   // defined.  Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
182   // if standard format is being used and there is not enough input data to read
183   // the entire window body.  Otherwise, returns RESULT_SUCCESS.
184   VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
185 
186   // Decodes the body of the window section as described in RFC sections 4.3,
187   // including the sections "Data section for ADDs and RUNs", "Instructions
188   // and sizes section", and "Addresses section for COPYs".  These sections
189   // must already have been set up by ReadWindowHeader().  Returns a
190   // non-negative value on success, or RESULT_END_OF_DATA if the end of input
191   // was reached before the entire window could be decoded (only possible if
192   // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
193   // decoding.  Appends as much of the decoded target window as possible to
194   // parent->decoded_target().
195   //
196   int DecodeBody(ParseableChunk* parseable_chunk);
197 
198   // Returns the number of bytes already decoded into the target window.
199   size_t TargetBytesDecoded();
200 
201   // Decodes a single ADD instruction, updating parent_->decoded_target_.
202   VCDiffResult DecodeAdd(size_t size);
203 
204   // Decodes a single RUN instruction, updating parent_->decoded_target_.
205   VCDiffResult DecodeRun(size_t size);
206 
207   // Decodes a single COPY instruction, updating parent_->decoded_target_.
208   VCDiffResult DecodeCopy(size_t size, unsigned char mode);
209 
210   // When using the interleaved format, this function is called both on parsing
211   // the header and on resuming after a RESULT_END_OF_DATA was returned from a
212   // previous call to DecodeBody().  It sets up all three section pointers to
213   // reference the same interleaved stream of instructions, sizes, addresses,
214   // and data.  These pointers must be reset every time that work resumes on a
215   // delta window,  because the input data string may have been changed or
216   // resized since DecodeBody() last returned.
UpdateInterleavedSectionPointers(const char * data_pos,const char * data_end)217   void UpdateInterleavedSectionPointers(const char* data_pos,
218                                         const char* data_end) {
219     const ptrdiff_t available_data = data_end - data_pos;
220     // Don't read past the end of currently-available data
221     if (available_data > interleaved_bytes_expected_) {
222       instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
223     } else {
224       instructions_and_sizes_.Init(data_pos, available_data);
225     }
226     data_for_add_and_run_.Init(&instructions_and_sizes_);
227     addresses_for_copy_.Init(&instructions_and_sizes_);
228   }
229 
230   // If true, the interleaved format described in AllowInterleaved() is used
231   // for the current delta file.  Only valid after ReadWindowHeader() has been
232   // called and returned a positive number (i.e., the whole header was parsed),
233   // but before the window has finished decoding.
234   //
IsInterleaved() const235   bool IsInterleaved() const {
236     // If the sections are interleaved, both addresses_for_copy_ and
237     // data_for_add_and_run_ should point at instructions_and_sizes_.
238     return !addresses_for_copy_.IsOwned();
239   }
240 
241   // Executes a single COPY or ADD instruction, appending data to
242   // parent_->decoded_target().
243   void CopyBytes(const char* data, size_t size);
244 
245   // Executes a single RUN instruction, appending data to
246   // parent_->decoded_target().
247   void RunByte(unsigned char byte, size_t size);
248 
249   // Advance *parseable_chunk to point to the current position in the
250   // instructions/sizes section.  If interleaved format is used, then
251   // decrement the number of expected bytes in the instructions/sizes section
252   // by the number of instruction/size bytes parsed.
253   void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
254 
255   // The parent object which was passed to Init().
256   VCDiffStreamingDecoderImpl* parent_;
257 
258   // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
259   // has been called and succeeded in parsing the delta window header, but the
260   // entire window has not yet been decoded.
261   bool found_header_;
262 
263   // Contents and length of the current source window.  source_segment_ptr_
264   // will be non-NULL if (a) the window section header for the current window
265   // has been read, but the window has not yet finished decoding; or
266   // (b) the window did not specify a source segment.
267   const char* source_segment_ptr_;
268   size_t source_segment_length_;
269 
270   // The delta encoding window sections as defined in RFC section 4.3.
271   // The pointer for each section will be incremented as data is consumed and
272   // decoded from that section.  If the interleaved format is used,
273   // data_for_add_and_run_ and addresses_for_copy_ will both point to
274   // instructions_and_sizes_; otherwise, they will be separate data sections.
275   //
276   DeltaWindowSection instructions_and_sizes_;
277   DeltaWindowSection data_for_add_and_run_;
278   DeltaWindowSection addresses_for_copy_;
279 
280   // The expected bytes left to decode in instructions_and_sizes_.  Only used
281   // for the interleaved format.
282   int interleaved_bytes_expected_;
283 
284   // The expected length of the target window once it has been decoded.
285   size_t target_window_length_;
286 
287   // The index in decoded_target at which the first byte of the current
288   // target window was/will be written.
289   size_t target_window_start_pos_;
290 
291   // If has_checksum_ is true, then expected_checksum_ contains an Adler32
292   // checksum of the target window data.  This is an extension included in the
293   // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
294   bool has_checksum_;
295   VCDChecksum expected_checksum_;
296 
297   VCDiffCodeTableReader reader_;
298 
299   // Making these private avoids implicit copy constructor & assignment operator
300   VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&);  // NOLINT
301   void operator=(const VCDiffDeltaFileWindow&);
302 };
303 
304 // *** Inline methods for VCDiffDeltaFileWindow
305 
VCDiffDeltaFileWindow()306 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
307   Reset();
308 }
309 
~VCDiffDeltaFileWindow()310 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
311 
Init(VCDiffStreamingDecoderImpl * parent)312 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
313   parent_ = parent;
314 }
315 
316 class VCDiffStreamingDecoderImpl {
317  public:
318   typedef std::string string;
319 
320   // The default maximum target file size (and target window size) if
321   // SetMaximumTargetFileSize() is not called.
322   static const size_t kDefaultMaximumTargetFileSize = 67108864U;  // 64 MB
323 
324   // The largest value that can be passed to SetMaximumTargetWindowSize().
325   // Using a larger value will result in an error.
326   static const size_t kTargetSizeLimit = 2147483647U;  // INT32_MAX
327 
328   // A constant that is the default value for planned_target_file_size_,
329   // indicating that the decoder does not have an expected length
330   // for the target data.
331   static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
332 
333   VCDiffStreamingDecoderImpl();
334   ~VCDiffStreamingDecoderImpl();
335 
336   // Resets all member variables to their initial states.
337   void Reset();
338 
339   // These functions are identical to their counterparts
340   // in VCDiffStreamingDecoder.
341   //
342   void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
343 
344   bool DecodeChunk(const char* data,
345                    size_t len,
346                    OutputStringInterface* output_string);
347 
348   bool FinishDecoding();
349 
350   // If true, the version of VCDIFF used in the current delta file allows
351   // for the interleaved format, in which instructions, addresses and data
352   // are all sent interleaved in the instructions section of each window
353   // rather than being sent in separate sections.  This is not part of
354   // the VCDIFF draft standard, so we've defined a special version code
355   // 'S' which implies that this feature is available.  Even if interleaving
356   // is supported, it is not mandatory; interleaved format will be implied
357   // if the address and data sections are both zero-length.
358   //
AllowInterleaved() const359   bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
360 
361   // If true, the version of VCDIFF used in the current delta file allows
362   // each delta window to contain an Adler32 checksum of the target window data.
363   // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
364   // this checksum will appear as a variable-length integer, just after the
365   // "length of addresses for COPYs" value and before the window data sections.
366   // It is possible for some windows in a delta file to use the checksum feature
367   // and for others not to use it (and leave the flag bit set to 0.)
368   // Just as with AllowInterleaved(), this extension is not part of the draft
369   // standard and is only available when the version code 'S' is specified.
370   //
AllowChecksum() const371   bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
372 
SetMaximumTargetFileSize(size_t new_maximum_target_file_size)373   bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
374     maximum_target_file_size_ = new_maximum_target_file_size;
375     return true;
376   }
377 
SetMaximumTargetWindowSize(size_t new_maximum_target_window_size)378   bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
379     if (new_maximum_target_window_size > kTargetSizeLimit) {
380       LOG(ERROR) << "Specified maximum target window size "
381                  << new_maximum_target_window_size << " exceeds limit of "
382                  << kTargetSizeLimit << " bytes" << LOG_ENDL;
383       return false;
384     }
385     maximum_target_window_size_ = new_maximum_target_window_size;
386     return true;
387   }
388 
389   // See description of planned_target_file_size_, below.
HasPlannedTargetFileSize() const390   bool HasPlannedTargetFileSize() const {
391     return planned_target_file_size_ != kUnlimitedBytes;
392   }
393 
SetPlannedTargetFileSize(size_t planned_target_file_size)394   void SetPlannedTargetFileSize(size_t planned_target_file_size) {
395     planned_target_file_size_ = planned_target_file_size;
396   }
397 
AddToTotalTargetWindowSize(size_t window_size)398   void AddToTotalTargetWindowSize(size_t window_size) {
399     total_of_target_window_sizes_ += window_size;
400   }
401 
402   // Checks to see whether the decoded target data has reached its planned size.
ReachedPlannedTargetFileSize() const403   bool ReachedPlannedTargetFileSize() const {
404     if (!HasPlannedTargetFileSize()) {
405       return false;
406     }
407     // The planned target file size should not have been exceeded.
408     // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
409     // each target window would not make the target file exceed that limit, and
410     // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
411     // exceeds the advertised target window size.
412     if (total_of_target_window_sizes_ > planned_target_file_size_) {
413       LOG(DFATAL) << "Internal error: Decoded data size "
414                   << total_of_target_window_sizes_
415                   << " exceeds planned target file size "
416                   << planned_target_file_size_ << LOG_ENDL;
417       return true;
418     }
419     return total_of_target_window_sizes_ == planned_target_file_size_;
420   }
421 
422   // Checks to see whether adding a new target window of the specified size
423   // would exceed the planned target file size, the maximum target file size,
424   // or the maximum target window size.  If so, logs an error and returns true;
425   // otherwise, returns false.
426   bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
427 
428   // Returns the amount of input data passed to the last DecodeChunk()
429   // that was not consumed by the decoder.  This is essential if
430   // SetPlannedTargetFileSize() is being used, in order to preserve the
431   // remaining input data stream once the planned target file has been decoded.
GetUnconsumedDataSize() const432   size_t GetUnconsumedDataSize() const {
433     return unparsed_bytes_.size();
434   }
435 
436   // This function will return true if the decoder has parsed a complete delta
437   // file header plus zero or more delta file windows, with no data left over.
438   // It will also return true if no delta data at all was decoded.  If these
439   // conditions are not met, then FinishDecoding() should not be called.
IsDecodingComplete() const440   bool IsDecodingComplete() const {
441     if (!FoundFileHeader()) {
442       // No complete delta file header has been parsed yet.  DecodeChunk()
443       // may have received some data that it hasn't yet parsed, in which case
444       // decoding is incomplete.
445       return unparsed_bytes_.empty();
446     } else if (custom_code_table_decoder_.get()) {
447       // The decoder is in the middle of parsing a custom code table.
448       return false;
449     } else if (delta_window_.FoundWindowHeader()) {
450       // The decoder is in the middle of parsing an interleaved format delta
451       // window.
452       return false;
453     } else if (ReachedPlannedTargetFileSize()) {
454       // The decoder found exactly the planned number of bytes.  In this case
455       // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
456       // data after the end of the delta file.
457       return true;
458     } else {
459       // No complete delta file window has been parsed yet.  DecodeChunk()
460       // may have received some data that it hasn't yet parsed, in which case
461       // decoding is incomplete.
462       return unparsed_bytes_.empty();
463     }
464   }
465 
dictionary_ptr() const466   const char* dictionary_ptr() const { return dictionary_ptr_; }
467 
dictionary_size() const468   size_t dictionary_size() const { return dictionary_size_; }
469 
addr_cache()470   VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
471 
decoded_target()472   string* decoded_target() { return &decoded_target_; }
473 
allow_vcd_target() const474   bool allow_vcd_target() const { return allow_vcd_target_; }
475 
SetAllowVcdTarget(bool allow_vcd_target)476   void SetAllowVcdTarget(bool allow_vcd_target) {
477     if (start_decoding_was_called_) {
478       LOG(DFATAL) << "SetAllowVcdTarget() called after StartDecoding()"
479                   << LOG_ENDL;
480       return;
481     }
482     allow_vcd_target_ = allow_vcd_target;
483   }
484 
485   // Removes the contents of decoded_target_ that precede the beginning of the
486   // current window.
487   void TruncateToBeginningOfWindow();
488 
489  private:
490   // Reads the VCDiff delta file header section as described in RFC section 4.1,
491   // except the custom code table data.  Returns RESULT_ERROR if an error
492   // occurred, or RESULT_END_OF_DATA if the end of available data was reached
493   // before the entire header could be read.  (The latter may be an error
494   // condition if there is no more data available.)  Otherwise, advances
495   // data->position_ past the header and returns RESULT_SUCCESS.
496   //
497   VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
498 
499   // Indicates whether or not the header has already been read.
FoundFileHeader() const500   bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
501 
502   // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
503   // file header, this function parses the custom cache sizes and initializes
504   // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
505   // custom code table in ReadCustomCodeTable().  Returns RESULT_ERROR if an
506   // error occurred, or RESULT_END_OF_DATA if the end of available data was
507   // reached before the custom cache sizes could be read.  Otherwise, returns
508   // the number of bytes read.
509   //
510   int InitCustomCodeTable(const char* data_start, const char* data_end);
511 
512   // If a custom code table was specified in the header section that was parsed
513   // by ReadDeltaFileHeader(), this function makes a recursive call to another
514   // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
515   // custom code table is expected to be supplied as an embedded VCDIFF
516   // encoding that uses the standard code table.  Returns RESULT_ERROR if an
517   // error occurs, or RESULT_END_OF_DATA if the end of available data was
518   // reached before the entire custom code table could be read.  Otherwise,
519   // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
520   // custom code table.  If the function returns RESULT_SUCCESS or
521   // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
522   //
523   VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
524 
525   // Contents and length of the source (dictionary) data.
526   const char* dictionary_ptr_;
527   size_t dictionary_size_;
528 
529   // This string will be used to store any unparsed bytes left over when
530   // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
531   // It will also be used to concatenate those unparsed bytes with the data
532   // supplied to the next call to DecodeChunk(), so that they appear in
533   // contiguous memory.
534   string unparsed_bytes_;
535 
536   // The portion of the target file that has been decoded so far.  This will be
537   // used to fill the output string for DecodeChunk(), and will also be used to
538   // execute COPY instructions that reference target data.  Since the source
539   // window can come from a range of addresses in the previously decoded target
540   // data, the entire target file needs to be available to the decoder, not just
541   // the current target window.
542   string decoded_target_;
543 
544   // The VCDIFF version byte (also known as "header4") from the
545   // delta file header.
546   unsigned char vcdiff_version_code_;
547 
548   VCDiffDeltaFileWindow delta_window_;
549 
550   std::auto_ptr<VCDiffAddressCache> addr_cache_;
551 
552   // Will be NULL unless a custom code table has been defined.
553   std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
554 
555   // Used to receive the decoded custom code table.
556   string custom_code_table_string_;
557 
558   // If a custom code table is specified, it will be expressed
559   // as an embedded VCDIFF delta file which uses the default code table
560   // as the source file (dictionary).  Use a child decoder object
561   // to decode that delta file.
562   std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
563 
564   // If set, then the decoder is expecting *exactly* this number of
565   // target bytes to be decoded from one or more delta file windows.
566   // If this number is exceeded while decoding a window, but was not met
567   // before starting on that window, an error will be reported.
568   // If FinishDecoding() is called before this number is met, an error
569   // will also be reported.  This feature is used for decoding the
570   // embedded code table data within a VCDIFF delta file; we want to
571   // stop processing the embedded data once the entire code table has
572   // been decoded, and treat the rest of the available data as part
573   // of the enclosing delta file.
574   size_t planned_target_file_size_;
575 
576   size_t maximum_target_file_size_;
577 
578   size_t maximum_target_window_size_;
579 
580   // Contains the sum of the decoded sizes of all target windows seen so far,
581   // including the expected total size of the current target window in progress
582   // (even if some of the current target window has not yet been decoded.)
583   size_t total_of_target_window_sizes_;
584 
585   // This value is used to ensure the correct order of calls to the interface
586   // functions, i.e., a single call to StartDecoding(), followed by zero or
587   // more calls to DecodeChunk(), followed by a single call to
588   // FinishDecoding().
589   bool start_decoding_was_called_;
590 
591   // If this value is true then the VCD_TARGET flag can be specified to allow
592   // the source segment to be chosen from the previously-decoded target data.
593   // (This is the default behavior.)  If it is false, then specifying the
594   // VCD_TARGET flag is considered an error, and the decoder does not need to
595   // keep in memory any decoded target data prior to the current window.
596   bool allow_vcd_target_;
597 
598   // Making these private avoids implicit copy constructor & assignment operator
599   VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&);  // NOLINT
600   void operator=(const VCDiffStreamingDecoderImpl&);
601 };
602 
603 // *** Methods for VCDiffStreamingDecoderImpl
604 
605 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
606 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
607 
VCDiffStreamingDecoderImpl()608 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
609     : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
610       maximum_target_window_size_(kDefaultMaximumTargetFileSize),
611       allow_vcd_target_(true) {
612   delta_window_.Init(this);
613   Reset();
614 }
615 
616 // Reset() will delete the component objects without reallocating them.
~VCDiffStreamingDecoderImpl()617 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
618 
Reset()619 void VCDiffStreamingDecoderImpl::Reset() {
620   start_decoding_was_called_ = false;
621   dictionary_ptr_ = NULL;
622   dictionary_size_ = 0;
623   vcdiff_version_code_ = '\0';
624   planned_target_file_size_ = kUnlimitedBytes;
625   total_of_target_window_sizes_ = 0;
626   addr_cache_.reset();
627   custom_code_table_.reset();
628   custom_code_table_decoder_.reset();
629   delta_window_.Reset();
630 }
631 
TruncateToBeginningOfWindow()632 void VCDiffStreamingDecoderImpl::TruncateToBeginningOfWindow() {
633   // Conserve the data for the current window that has been partially decoded.
634   decoded_target_.erase(0, delta_window_.target_window_start_pos());
635   delta_window_.set_target_window_start_pos(0);
636 }
637 
StartDecoding(const char * dictionary_ptr,size_t dictionary_size)638 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
639                                                size_t dictionary_size) {
640   if (start_decoding_was_called_) {
641     LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()"
642                 << LOG_ENDL;
643     return;
644   }
645   unparsed_bytes_.clear();
646   decoded_target_.clear();  // delta_window_.Reset() depends on this
647   Reset();
648   dictionary_ptr_ = dictionary_ptr;
649   dictionary_size_ = dictionary_size;
650   start_decoding_was_called_ = true;
651 }
652 
653 // Reads the VCDiff delta file header section as described in RFC section 4.1:
654 //
655 //     Header1                                  - byte = 0xD6 (ASCII 'V' | 0x80)
656 //     Header2                                  - byte = 0xC3 (ASCII 'C' | 0x80)
657 //     Header3                                  - byte = 0xC4 (ASCII 'D' | 0x80)
658 //     Header4                                  - byte
659 //     Hdr_Indicator                            - byte
660 //     [Secondary compressor ID]                - byte
661 //     [Length of code table data]              - integer
662 //     [Code table data]
663 //
664 // Initializes the code table and address cache objects.  Returns RESULT_ERROR
665 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was
666 // reached before the entire header could be read.  (The latter may be an error
667 // condition if there is no more data available.)  Otherwise, returns
668 // RESULT_SUCCESS, and removes the header bytes from the data string.
669 //
670 // It's relatively inefficient to expect this function to parse any number of
671 // input bytes available, down to 1 byte, but it is necessary in case the input
672 // is not a properly formatted VCDIFF delta file.  If the entire input consists
673 // of two bytes "12", then we should recognize that it does not match the
674 // initial VCDIFF magic number "VCD" and report an error, rather than waiting
675 // indefinitely for more input that will never arrive.
676 //
ReadDeltaFileHeader(ParseableChunk * data)677 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
678     ParseableChunk* data) {
679   if (FoundFileHeader()) {
680     return RESULT_SUCCESS;
681   }
682   size_t data_size = data->UnparsedSize();
683   const DeltaFileHeader* header =
684       reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
685   bool wrong_magic_number = false;
686   switch (data_size) {
687     // Verify only the bytes that are available.
688     default:
689       // Found header contents up to and including VCDIFF version
690       vcdiff_version_code_ = header->header4;
691       if ((vcdiff_version_code_ != 0x00) &&  // Draft standard VCDIFF (RFC 3284)
692           (vcdiff_version_code_ != 'S')) {   // Enhancements for SDCH protocol
693         LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL;
694         return RESULT_ERROR;
695       }
696       // fall through
697     case 3:
698       if (header->header3 != 0xC4) {  // magic value 'D' | 0x80
699         wrong_magic_number = true;
700       }
701       // fall through
702     case 2:
703       if (header->header2 != 0xC3) {  // magic value 'C' | 0x80
704         wrong_magic_number = true;
705       }
706       // fall through
707     case 1:
708       if (header->header1 != 0xD6) {  // magic value 'V' | 0x80
709         wrong_magic_number = true;
710       }
711       // fall through
712     case 0:
713       if (wrong_magic_number) {
714         LOG(ERROR) << "Did not find VCDIFF header bytes; "
715                       "input is not a VCDIFF delta file" << LOG_ENDL;
716         return RESULT_ERROR;
717       }
718       if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
719   }
720   // Secondary compressor not supported.
721   if (header->hdr_indicator & VCD_DECOMPRESS) {
722     LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL;
723     return RESULT_ERROR;
724   }
725   if (header->hdr_indicator & VCD_CODETABLE) {
726     int bytes_parsed = InitCustomCodeTable(
727         data->UnparsedData() + sizeof(DeltaFileHeader),
728         data->End());
729     switch (bytes_parsed) {
730       case RESULT_ERROR:
731         return RESULT_ERROR;
732       case RESULT_END_OF_DATA:
733         return RESULT_END_OF_DATA;
734       default:
735         data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
736     }
737   } else {
738     addr_cache_.reset(new VCDiffAddressCache);
739     // addr_cache_->Init() will be called
740     // from VCDiffStreamingDecoderImpl::DecodeChunk()
741     data->Advance(sizeof(DeltaFileHeader));
742   }
743   return RESULT_SUCCESS;
744 }
745 
InitCustomCodeTable(const char * data_start,const char * data_end)746 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
747                                                     const char* data_end) {
748   // A custom code table is being specified.  Parse the variable-length
749   // cache sizes and begin parsing the encoded custom code table.
750   int32_t near_cache_size = 0, same_cache_size = 0;
751   VCDiffHeaderParser header_parser(data_start, data_end);
752   if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
753     return header_parser.GetResult();
754   }
755   if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
756     return header_parser.GetResult();
757   }
758   custom_code_table_.reset(new struct VCDiffCodeTableData);
759   memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
760   custom_code_table_string_.clear();
761   addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
762   // addr_cache_->Init() will be called
763   // from VCDiffStreamingDecoderImpl::DecodeChunk()
764 
765   // If we reach this point (the start of the custom code table)
766   // without encountering a RESULT_END_OF_DATA condition, then we won't call
767   // ReadDeltaFileHeader() again for this delta file.
768   //
769   // Instantiate a recursive decoder to interpret the custom code table
770   // as a VCDIFF encoding of the default code table.
771   custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
772   custom_code_table_decoder_->StartDecoding(
773       reinterpret_cast<const char*>(
774           &VCDiffCodeTableData::kDefaultCodeTableData),
775       sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
776   custom_code_table_decoder_->SetPlannedTargetFileSize(
777       sizeof(*custom_code_table_));
778   return static_cast<int>(header_parser.ParsedSize());
779 }
780 
ReadCustomCodeTable(ParseableChunk * data)781 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
782     ParseableChunk* data) {
783   if (!custom_code_table_decoder_.get()) {
784     return RESULT_SUCCESS;
785   }
786   if (!custom_code_table_.get()) {
787     LOG(DFATAL) << "Internal error:  custom_code_table_decoder_ is set,"
788                    " but custom_code_table_ is NULL" << LOG_ENDL;
789     return RESULT_ERROR;
790   }
791   OutputString<string> output_string(&custom_code_table_string_);
792   if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
793                                                data->UnparsedSize(),
794                                                &output_string)) {
795     return RESULT_ERROR;
796   }
797   if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
798     // Skip over the consumed data.
799     data->Finish();
800     return RESULT_END_OF_DATA;
801   }
802   if (!custom_code_table_decoder_->FinishDecoding()) {
803     return RESULT_ERROR;
804   }
805   if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
806     LOG(DFATAL) << "Decoded custom code table size ("
807                 << custom_code_table_string_.length()
808                 << ") does not match size of a code table ("
809                 << sizeof(*custom_code_table_) << ")" << LOG_ENDL;
810     return RESULT_ERROR;
811   }
812   memcpy(custom_code_table_.get(),
813          custom_code_table_string_.data(),
814          sizeof(*custom_code_table_));
815   custom_code_table_string_.clear();
816   // Skip over the consumed data.
817   data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
818   custom_code_table_decoder_.reset();
819   delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
820   return RESULT_SUCCESS;
821 }
822 
823 namespace {
824 
825 class TrackNewOutputText {
826  public:
827   typedef std::string string;
828 
TrackNewOutputText(const string & decoded_target)829   explicit TrackNewOutputText(const string& decoded_target)
830       : decoded_target_(decoded_target),
831       initial_decoded_target_size_(decoded_target.size()) { }
832 
AppendNewOutputText(size_t target_bytes_remaining,OutputStringInterface * output_string)833   void AppendNewOutputText(size_t target_bytes_remaining,
834                            OutputStringInterface* output_string) {
835     const size_t bytes_decoded_this_chunk =
836         decoded_target_.size() - initial_decoded_target_size_;
837     if (bytes_decoded_this_chunk > 0) {
838       if (target_bytes_remaining > 0) {
839         // The decoder is midway through decoding a target window.  Resize
840         // output_string to match the expected length.  The interface guarantees
841         // not to resize the output_string more than once per target window
842         // decoded.
843         output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
844                                               + target_bytes_remaining);
845       }
846       output_string->append(
847           decoded_target_.data() + initial_decoded_target_size_,
848           bytes_decoded_this_chunk);
849     }
850   }
851 
852  private:
853   const string& decoded_target_;
854   size_t initial_decoded_target_size_;
855 };
856 
857 }  // anonymous namespace
858 
DecodeChunk(const char * data,size_t len,OutputStringInterface * output_string)859 bool VCDiffStreamingDecoderImpl::DecodeChunk(
860     const char* data,
861     size_t len,
862     OutputStringInterface* output_string) {
863   if (!start_decoding_was_called_) {
864     LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL;
865     Reset();
866     return false;
867   }
868   ParseableChunk parseable_chunk(data, len);
869   if (!unparsed_bytes_.empty()) {
870     unparsed_bytes_.append(data, len);
871     parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
872                                   unparsed_bytes_.size());
873   }
874   TrackNewOutputText output_tracker(decoded_target_);
875   VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
876   if (RESULT_SUCCESS == result) {
877     result = ReadCustomCodeTable(&parseable_chunk);
878   }
879   if (RESULT_SUCCESS == result) {
880     result = delta_window_.DecodeWindows(&parseable_chunk);
881   }
882   if (RESULT_ERROR == result) {
883     Reset();  // Don't allow further DecodeChunk calls
884     return false;
885   }
886   unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
887                          parseable_chunk.UnparsedSize());
888   output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(),
889                                      output_string);
890   if (!allow_vcd_target()) {
891     // VCD_TARGET will never be used to reference target data beyond the start
892     // of the current window, so throw away any earlier target data.
893     TruncateToBeginningOfWindow();
894   }
895   return true;
896 }
897 
898 // Finishes decoding after all data has been received.  Returns true
899 // if decoding of the entire stream was successful.
FinishDecoding()900 bool VCDiffStreamingDecoderImpl::FinishDecoding() {
901   bool success = true;
902   if (!start_decoding_was_called_) {
903     LOG(WARNING) << "FinishDecoding() called before StartDecoding(),"
904                     " or called after DecodeChunk() returned false"
905                  << LOG_ENDL;
906     success = false;
907   } else if (!IsDecodingComplete()) {
908     LOG(ERROR) << "FinishDecoding() called before parsing entire"
909                   " delta file window" << LOG_ENDL;
910     success = false;
911   }
912   // Reset the object state for the next decode operation
913   Reset();
914   return success;
915 }
916 
TargetWindowWouldExceedSizeLimits(size_t window_size) const917 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
918     size_t window_size) const {
919   if (window_size > maximum_target_window_size_) {
920     LOG(ERROR) << "Length of target window (" << window_size
921                << ") exceeds limit of " << maximum_target_window_size_
922                << " bytes" << LOG_ENDL;
923     return true;
924   }
925   if (HasPlannedTargetFileSize()) {
926     // The logical expression to check would be:
927     //
928     //   total_of_target_window_sizes_ + window_size > planned_target_file_size_
929     //
930     // but the addition might cause an integer overflow if target_bytes_to_add
931     // is very large.  So it is better to check target_bytes_to_add against
932     // the remaining planned target bytes.
933     size_t remaining_planned_target_file_size =
934         planned_target_file_size_ - total_of_target_window_sizes_;
935     if (window_size > remaining_planned_target_file_size) {
936       LOG(ERROR) << "Length of target window (" << window_size
937                  << " bytes) plus previous windows ("
938                  << total_of_target_window_sizes_
939                  << " bytes) would exceed planned size of "
940                  << planned_target_file_size_ << " bytes" << LOG_ENDL;
941       return true;
942     }
943   }
944   size_t remaining_maximum_target_bytes =
945       maximum_target_file_size_ - total_of_target_window_sizes_;
946   if (window_size > remaining_maximum_target_bytes) {
947     LOG(ERROR) << "Length of target window (" << window_size
948                << " bytes) plus previous windows ("
949                << total_of_target_window_sizes_
950                << " bytes) would exceed maximum target file size of "
951                << maximum_target_file_size_ << " bytes" << LOG_ENDL;
952     return true;
953   }
954   return false;
955 }
956 
957 // *** Methods for VCDiffDeltaFileWindow
958 
Reset()959 void VCDiffDeltaFileWindow::Reset() {
960   found_header_ = false;
961 
962   // Mark the start of the current target window.
963   target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
964   target_window_length_ = 0;
965 
966   source_segment_ptr_ = NULL;
967   source_segment_length_ = 0;
968 
969   instructions_and_sizes_.Invalidate();
970   data_for_add_and_run_.Invalidate();
971   addresses_for_copy_.Invalidate();
972 
973   interleaved_bytes_expected_ = 0;
974 
975   has_checksum_ = false;
976   expected_checksum_ = 0;
977 }
978 
SetUpWindowSections(VCDiffHeaderParser * header_parser)979 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
980     VCDiffHeaderParser* header_parser) {
981   size_t add_and_run_data_length = 0;
982   size_t instructions_and_sizes_length = 0;
983   size_t addresses_length = 0;
984   if (!header_parser->ParseSectionLengths(has_checksum_,
985                                           &add_and_run_data_length,
986                                           &instructions_and_sizes_length,
987                                           &addresses_length,
988                                           &expected_checksum_)) {
989     return header_parser->GetResult();
990   }
991   if (parent_->AllowInterleaved() &&
992       (add_and_run_data_length == 0) &&
993       (addresses_length == 0)) {
994     // The interleaved format is being used.
995     interleaved_bytes_expected_ =
996         static_cast<int>(instructions_and_sizes_length);
997     UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
998                                      header_parser->End());
999   } else {
1000     // If interleaved format is not used, then the whole window contents
1001     // must be available before decoding can begin.  If only part of
1002     // the current window is available, then report end of data
1003     // and re-parse the whole header when DecodeChunk() is called again.
1004     if (header_parser->UnparsedSize() < (add_and_run_data_length +
1005                                          instructions_and_sizes_length +
1006                                          addresses_length)) {
1007       return RESULT_END_OF_DATA;
1008     }
1009     data_for_add_and_run_.Init(header_parser->UnparsedData(),
1010                                add_and_run_data_length);
1011     instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1012                                  instructions_and_sizes_length);
1013     addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1014     if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
1015       LOG(ERROR) << "The end of the instructions section "
1016                      "does not match the end of the delta window" << LOG_ENDL;
1017       return RESULT_ERROR;
1018     }
1019   }
1020   reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1021                instructions_and_sizes_.End());
1022   return RESULT_SUCCESS;
1023 }
1024 
1025 // Here are the elements of the delta window header to be parsed,
1026 // from section 4 of the RFC:
1027 //
1028 //     Window1
1029 //         Win_Indicator                            - byte
1030 //         [Source segment size]                    - integer
1031 //         [Source segment position]                - integer
1032 //         The delta encoding of the target window
1033 //             Length of the delta encoding         - integer
1034 //             The delta encoding
1035 //                 Size of the target window        - integer
1036 //                 Delta_Indicator                  - byte
1037 //                 Length of data for ADDs and RUNs - integer
1038 //                 Length of instructions and sizes - integer
1039 //                 Length of addresses for COPYs    - integer
1040 //                 Data section for ADDs and RUNs   - array of bytes
1041 //                 Instructions and sizes section   - array of bytes
1042 //                 Addresses section for COPYs      - array of bytes
1043 //
ReadHeader(ParseableChunk * parseable_chunk)1044 VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1045     ParseableChunk* parseable_chunk) {
1046   std::string* decoded_target = parent_->decoded_target();
1047   VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1048                                    parseable_chunk->End());
1049   size_t source_segment_position = 0;
1050   unsigned char win_indicator = 0;
1051   if (!header_parser.ParseWinIndicatorAndSourceSegment(
1052           parent_->dictionary_size(),
1053           decoded_target->size(),
1054           parent_->allow_vcd_target(),
1055           &win_indicator,
1056           &source_segment_length_,
1057           &source_segment_position)) {
1058     return header_parser.GetResult();
1059   }
1060   has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1061   if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1062     return header_parser.GetResult();
1063   }
1064   if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1065     // An error has been logged by TargetWindowWouldExceedSizeLimits().
1066     return RESULT_ERROR;
1067   }
1068   header_parser.ParseDeltaIndicator();
1069   VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1070   if (RESULT_SUCCESS != setup_return_code) {
1071     return setup_return_code;
1072   }
1073   // Reserve enough space in the output string for the current target window.
1074   decoded_target->reserve(target_window_start_pos_ + target_window_length_);
1075   // Get a pointer to the start of the source segment.
1076   if (win_indicator & VCD_SOURCE) {
1077     source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1078   } else if (win_indicator & VCD_TARGET) {
1079     // This assignment must happen after the reserve().
1080     // decoded_target should not be resized again while processing this window,
1081     // so source_segment_ptr_ should remain valid.
1082     source_segment_ptr_ = decoded_target->data() + source_segment_position;
1083   }
1084   // The whole window header was found and parsed successfully.
1085   found_header_ = true;
1086   parseable_chunk->Advance(header_parser.ParsedSize());
1087   parent_->AddToTotalTargetWindowSize(target_window_length_);
1088   return RESULT_SUCCESS;
1089 }
1090 
UpdateInstructionPointer(ParseableChunk * parseable_chunk)1091 void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1092     ParseableChunk* parseable_chunk) {
1093   if (IsInterleaved()) {
1094     size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1095     // Reduce expected instruction segment length by bytes parsed
1096     interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1097     parseable_chunk->Advance(bytes_parsed);
1098   }
1099 }
1100 
TargetBytesDecoded()1101 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1102   return parent_->decoded_target()->size() - target_window_start_pos_;
1103 }
1104 
TargetBytesRemaining()1105 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1106   if (target_window_length_ == 0) {
1107     // There is no window being decoded at present
1108     return 0;
1109   } else {
1110     return target_window_length_ - TargetBytesDecoded();
1111   }
1112 }
1113 
CopyBytes(const char * data,size_t size)1114 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
1115   parent_->decoded_target()->append(data, size);
1116 }
1117 
RunByte(unsigned char byte,size_t size)1118 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1119   parent_->decoded_target()->append(size, byte);
1120 }
1121 
DecodeAdd(size_t size)1122 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1123   if (size > data_for_add_and_run_.UnparsedSize()) {
1124     return RESULT_END_OF_DATA;
1125   }
1126   // Write the next "size" data bytes
1127   CopyBytes(data_for_add_and_run_.UnparsedData(), size);
1128   data_for_add_and_run_.Advance(size);
1129   return RESULT_SUCCESS;
1130 }
1131 
DecodeRun(size_t size)1132 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1133   if (data_for_add_and_run_.Empty()) {
1134     return RESULT_END_OF_DATA;
1135   }
1136   // Write "size" copies of the next data byte
1137   RunByte(*data_for_add_and_run_.UnparsedData(), size);
1138   data_for_add_and_run_.Advance(1);
1139   return RESULT_SUCCESS;
1140 }
1141 
DecodeCopy(size_t size,unsigned char mode)1142 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1143                                                unsigned char mode) {
1144   // Keep track of the number of target bytes decoded as a local variable
1145   // to avoid recalculating it each time it is needed.
1146   size_t target_bytes_decoded = TargetBytesDecoded();
1147   const VCDAddress here_address =
1148       static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1149   const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1150       here_address,
1151       mode,
1152       addresses_for_copy_.UnparsedDataAddr(),
1153       addresses_for_copy_.End());
1154   switch (decoded_address) {
1155     case RESULT_ERROR:
1156       LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL;
1157       return RESULT_ERROR;
1158     case RESULT_END_OF_DATA:
1159       return RESULT_END_OF_DATA;
1160     default:
1161       if ((decoded_address < 0) || (decoded_address > here_address)) {
1162         LOG(DFATAL) << "Internal error: unexpected address " << decoded_address
1163                     << " returned from DecodeAddress, with here_address = "
1164                     << here_address << LOG_ENDL;
1165         return RESULT_ERROR;
1166       }
1167       break;
1168   }
1169   size_t address = static_cast<size_t>(decoded_address);
1170   if ((address + size) <= source_segment_length_) {
1171     // Copy all data from source segment
1172     CopyBytes(&source_segment_ptr_[address], size);
1173     return RESULT_SUCCESS;
1174   }
1175   // Copy some data from target window...
1176   if (address < source_segment_length_) {
1177     // ... plus some data from source segment
1178     const size_t partial_copy_size = source_segment_length_ - address;
1179     CopyBytes(&source_segment_ptr_[address], partial_copy_size);
1180     target_bytes_decoded += partial_copy_size;
1181     address += partial_copy_size;
1182     size -= partial_copy_size;
1183   }
1184   address -= source_segment_length_;
1185   // address is now based at start of target window
1186   const char* const target_segment_ptr = parent_->decoded_target()->data() +
1187                                          target_window_start_pos_;
1188   while (size > (target_bytes_decoded - address)) {
1189     // Recursive copy that extends into the yet-to-be-copied target data
1190     const size_t partial_copy_size = target_bytes_decoded - address;
1191     CopyBytes(&target_segment_ptr[address], partial_copy_size);
1192     target_bytes_decoded += partial_copy_size;
1193     address += partial_copy_size;
1194     size -= partial_copy_size;
1195   }
1196   CopyBytes(&target_segment_ptr[address], size);
1197   return RESULT_SUCCESS;
1198 }
1199 
DecodeBody(ParseableChunk * parseable_chunk)1200 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1201   if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1202                               != parseable_chunk->UnparsedData())) {
1203     LOG(DFATAL) << "Internal error: interleaved format is used, but the"
1204                    " input pointer does not point to the instructions section"
1205                 << LOG_ENDL;
1206     return RESULT_ERROR;
1207   }
1208   while (TargetBytesDecoded() < target_window_length_) {
1209     int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1210     unsigned char mode = 0;
1211     VCDiffInstructionType instruction =
1212         reader_.GetNextInstruction(&decoded_size, &mode);
1213     switch (instruction) {
1214       case VCD_INSTRUCTION_END_OF_DATA:
1215         UpdateInstructionPointer(parseable_chunk);
1216         return RESULT_END_OF_DATA;
1217       case VCD_INSTRUCTION_ERROR:
1218         return RESULT_ERROR;
1219       default:
1220         break;
1221     }
1222     const size_t size = static_cast<size_t>(decoded_size);
1223     // The value of "size" itself could be enormous (say, INT32_MAX)
1224     // so check it individually against the limit to protect against
1225     // overflow when adding it to something else.
1226     if ((size > target_window_length_) ||
1227         ((size + TargetBytesDecoded()) > target_window_length_)) {
1228       LOG(ERROR) << VCDiffInstructionName(instruction)
1229                  << " with size " << size
1230                  << " plus existing " << TargetBytesDecoded()
1231                  << " bytes of target data exceeds length of target"
1232                     " window (" << target_window_length_ << " bytes)"
1233                  << LOG_ENDL;
1234       return RESULT_ERROR;
1235     }
1236     VCDiffResult result = RESULT_SUCCESS;
1237     switch (instruction) {
1238       case VCD_ADD:
1239         result = DecodeAdd(size);
1240         break;
1241       case VCD_RUN:
1242         result = DecodeRun(size);
1243         break;
1244       case VCD_COPY:
1245         result = DecodeCopy(size, mode);
1246         break;
1247       default:
1248         LOG(DFATAL) << "Unexpected instruction type " << instruction
1249                     << "in opcode stream" << LOG_ENDL;
1250         return RESULT_ERROR;
1251     }
1252     switch (result) {
1253       case RESULT_END_OF_DATA:
1254         reader_.UnGetInstruction();
1255         UpdateInstructionPointer(parseable_chunk);
1256         return RESULT_END_OF_DATA;
1257       case RESULT_ERROR:
1258         return RESULT_ERROR;
1259       case RESULT_SUCCESS:
1260         break;
1261     }
1262   }
1263   if (TargetBytesDecoded() != target_window_length_) {
1264     LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded()
1265                << " bytes) does not match expected size ("
1266                << target_window_length_ << " bytes)" << LOG_ENDL;
1267     return RESULT_ERROR;
1268   }
1269   const char* const target_window_start =
1270       parent_->decoded_target()->data() + target_window_start_pos_;
1271   if (has_checksum_ &&
1272       (ComputeAdler32(target_window_start, target_window_length_)
1273            != expected_checksum_)) {
1274     LOG(ERROR) << "Target data does not match checksum; this could mean "
1275                   "that the wrong dictionary was used" << LOG_ENDL;
1276     return RESULT_ERROR;
1277   }
1278   if (!instructions_and_sizes_.Empty()) {
1279     LOG(ERROR) << "Excess instructions and sizes left over "
1280                   "after decoding target window" << LOG_ENDL;
1281       return RESULT_ERROR;
1282   }
1283   if (!IsInterleaved()) {
1284     // Standard format is being used, with three separate sections for the
1285     // instructions, data, and addresses.
1286     if (!data_for_add_and_run_.Empty()) {
1287       LOG(ERROR) << "Excess ADD/RUN data left over "
1288                     "after decoding target window" << LOG_ENDL;
1289         return RESULT_ERROR;
1290     }
1291     if (!addresses_for_copy_.Empty()) {
1292       LOG(ERROR) << "Excess COPY addresses left over "
1293                     "after decoding target window" << LOG_ENDL;
1294         return RESULT_ERROR;
1295     }
1296     // Reached the end of the window.  Update the ParseableChunk to point to the
1297     // end of the addresses section, which is the last section in the window.
1298     parseable_chunk->SetPosition(addresses_for_copy_.End());
1299   } else {
1300     // Interleaved format is being used.
1301     UpdateInstructionPointer(parseable_chunk);
1302   }
1303   return RESULT_SUCCESS;
1304 }
1305 
DecodeWindows(ParseableChunk * parseable_chunk)1306 VCDiffResult VCDiffDeltaFileWindow::DecodeWindows(
1307     ParseableChunk* parseable_chunk) {
1308   if (!parent_) {
1309     LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() "
1310                    "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL;
1311     return RESULT_ERROR;
1312   }
1313   while (!parseable_chunk->Empty()) {
1314     if (!found_header_) {
1315       switch (ReadHeader(parseable_chunk)) {
1316         case RESULT_END_OF_DATA:
1317           return RESULT_END_OF_DATA;
1318         case RESULT_ERROR:
1319           return RESULT_ERROR;
1320         default:
1321           // Reset address cache between windows (RFC section 5.1)
1322           if (!parent_->addr_cache()->Init()) {
1323             LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL;
1324             return RESULT_ERROR;
1325           }
1326       }
1327     } else {
1328       // We are resuming a window that was partially decoded before a
1329       // RESULT_END_OF_DATA was returned.  This can only happen on the first
1330       // loop iteration, and only if the interleaved format is enabled and used.
1331       if (!IsInterleaved()) {
1332         LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window"
1333                        " when interleaved format is not being used" << LOG_ENDL;
1334         return RESULT_ERROR;
1335       }
1336       UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1337                                        parseable_chunk->End());
1338       reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1339                              instructions_and_sizes_.End());
1340     }
1341     switch (DecodeBody(parseable_chunk)) {
1342       case RESULT_END_OF_DATA:
1343         if (MoreDataExpected()) {
1344           return RESULT_END_OF_DATA;
1345         } else {
1346           LOG(ERROR) << "End of data reached while decoding VCDIFF delta file"
1347                      << LOG_ENDL;
1348           // fall through to RESULT_ERROR case
1349         }
1350       case RESULT_ERROR:
1351         return RESULT_ERROR;
1352       default:
1353         break;  // DecodeBody succeeded
1354     }
1355     // Get ready to read a new delta window
1356     Reset();
1357     if (parent_->ReachedPlannedTargetFileSize()) {
1358       // Found exactly the length we expected.  Stop decoding.
1359       return RESULT_SUCCESS;
1360     }
1361   }
1362   return RESULT_SUCCESS;
1363 }
1364 
1365 // *** Methods for VCDiffStreamingDecoder
1366 
VCDiffStreamingDecoder()1367 VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1368 : impl_(new VCDiffStreamingDecoderImpl) { }
1369 
~VCDiffStreamingDecoder()1370 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1371 
StartDecoding(const char * source,size_t len)1372 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1373   impl_->StartDecoding(source, len);
1374 }
1375 
DecodeChunkToInterface(const char * data,size_t len,OutputStringInterface * output_string)1376 bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1377     const char* data,
1378     size_t len,
1379     OutputStringInterface* output_string) {
1380   return impl_->DecodeChunk(data, len, output_string);
1381 }
1382 
FinishDecoding()1383 bool VCDiffStreamingDecoder::FinishDecoding() {
1384   return impl_->FinishDecoding();
1385 }
1386 
SetMaximumTargetFileSize(size_t new_maximum_target_file_size)1387 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1388     size_t new_maximum_target_file_size) {
1389   return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1390 }
1391 
SetMaximumTargetWindowSize(size_t new_maximum_target_window_size)1392 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1393     size_t new_maximum_target_window_size) {
1394   return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1395 }
1396 
SetAllowVcdTarget(bool allow_vcd_target)1397 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
1398   impl_->SetAllowVcdTarget(allow_vcd_target);
1399 }
1400 
DecodeToInterface(const char * dictionary_ptr,size_t dictionary_size,const string & encoding,OutputStringInterface * target)1401 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1402                                       size_t dictionary_size,
1403                                       const string& encoding,
1404                                       OutputStringInterface* target) {
1405   target->clear();
1406   decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1407   if (!decoder_.DecodeChunkToInterface(encoding.data(),
1408                                        encoding.size(),
1409                                        target)) {
1410     return false;
1411   }
1412   return decoder_.FinishDecoding();
1413 }
1414 
1415 }  // namespace open_vcdiff
1416