1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Implements a Decoder for the format described in
17 // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
18 // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
19 //
20 // The RFC describes the possibility of using a secondary compressor
21 // to further reduce the size of each section of the VCDIFF output.
22 // That feature is not supported in this implementation of the encoder
23 // and decoder.
24 // No secondary compressor types have been publicly registered with
25 // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
26 // in the more than five years since the registry was created, so there
27 // is no standard set of compressor IDs which would be generated by other
28 // encoders or accepted by other decoders.
29
30 #include <config.h>
31 #include "google/vcdecoder.h"
32 #include <stddef.h> // size_t, ptrdiff_t
33 #include <stdint.h> // int32_t
34 #include <string.h> // memcpy, memset
35 #include <memory> // auto_ptr
36 #include <string>
37 #include "addrcache.h"
38 #include "checksum.h"
39 #include "codetable.h"
40 #include "decodetable.h"
41 #include "headerparser.h"
42 #include "logging.h"
43 #include "google/output_string.h"
44 #include "varint_bigendian.h"
45 #include "vcdiff_defs.h"
46
47 namespace open_vcdiff {
48
49 // This class is used to parse delta file windows as described
50 // in RFC sections 4.2 and 4.3. Its methods are not thread-safe.
51 //
52 // Here is the window format copied from the RFC:
53 //
54 // Window1
55 // Win_Indicator - byte
56 // [Source segment size] - integer
57 // [Source segment position] - integer
58 // The delta encoding of the target window
59 // Length of the delta encoding - integer
60 // The delta encoding
61 // Size of the target window - integer
62 // Delta_Indicator - byte
63 // Length of data for ADDs and RUNs - integer
64 // Length of instructions and sizes - integer
65 // Length of addresses for COPYs - integer
66 // Data section for ADDs and RUNs - array of bytes
67 // Instructions and sizes section - array of bytes
68 // Addresses section for COPYs - array of bytes
69 // Window2
70 // ...
71 //
72 // Sample usage:
73 //
74 // VCDiffDeltaFileWindow delta_window_;
75 // delta_window_.Init(parent);
76 // ParseableChunk parseable_chunk(input_buffer,
77 // input_size,
78 // leftover_unencoded_bytes);
79 // switch (delta_window_.DecodeWindows(&parseable_chunk)) {
80 // case RESULT_END_OF_DATA:
81 // <Read more input and retry DecodeWindows later.>
82 // case RESULT_ERROR:
83 // <Handle error case. An error log message has already been generated.>
84 // }
85 //
86 // DecodeWindows consumes as many windows from the input as it can. It only
87 // needs to be placed within a loop if the loop is used to obtain more input
88 // (delta file) data.
89 //
90 class VCDiffDeltaFileWindow {
91 public:
92 VCDiffDeltaFileWindow();
93 ~VCDiffDeltaFileWindow();
94
95 // Init() should be called immediately after constructing the
96 // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be
97 // invoked, or an error will occur.
98 void Init(VCDiffStreamingDecoderImpl* parent);
99
100 // Resets the pointers to the data sections in the current window.
101 void Reset();
102
UseCodeTable(const VCDiffCodeTableData & code_table_data,unsigned char max_mode)103 bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
104 unsigned char max_mode) {
105 return reader_.UseCodeTable(code_table_data, max_mode);
106 }
107
108 // Decodes as many delta windows as possible using the input data from
109 // *parseable_chunk. Appends the decoded target windows to
110 // parent_->decoded_target(). Returns RESULT_SUCCESS on success, or
111 // RESULT_END_OF_DATA if the end of input was reached before the entire window
112 // could be decoded and more input is expected (only possible if
113 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
114 // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_
115 // is undefined; otherwise, parseable_chunk->Advance() is called to point to
116 // the input data position just after the data that has been decoded.
117 //
118 // If planned_target_file_size is not set to kUnlimitedBytes, then the decoder
119 // expects *exactly* this number of target bytes to be decoded from one or
120 // more delta file windows. If this number is met exactly after finishing a
121 // delta window, this function will return RESULT_SUCCESS without processing
122 // any more bytes from data_pointer. If this number is exceeded while
123 // decoding a window, but was not met before starting that window,
124 // then RESULT_ERROR will be returned.
125 //
126 VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk);
127
FoundWindowHeader() const128 bool FoundWindowHeader() const {
129 return found_header_;
130 }
131
MoreDataExpected() const132 bool MoreDataExpected() const {
133 // When parsing an interleaved-format delta file,
134 // every time DecodeBody() exits, interleaved_bytes_expected_
135 // will be decremented by the number of bytes parsed. If it
136 // reaches zero, then there is no more data expected because
137 // the size of the interleaved section (given in the window
138 // header) has been reached.
139 return IsInterleaved() && (interleaved_bytes_expected_ > 0);
140 }
141
target_window_start_pos() const142 size_t target_window_start_pos() const { return target_window_start_pos_; }
143
set_target_window_start_pos(size_t new_start_pos)144 void set_target_window_start_pos(size_t new_start_pos) {
145 target_window_start_pos_ = new_start_pos;
146 }
147
148 // Returns the number of bytes remaining to be decoded in the target window.
149 // If not in the process of decoding a window, returns 0.
150 size_t TargetBytesRemaining();
151
152 private:
153 // Reads the header of the window section as described in RFC sections 4.2 and
154 // 4.3, up to and including the value "Length of addresses for COPYs". If the
155 // entire header is found, this function sets up the DeltaWindowSections
156 // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
157 // that the decoder can begin decoding the opcodes in these sections. Returns
158 // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
159 // available data was reached before the entire header could be read. (The
160 // latter may be an error condition if there is no more data available.)
161 // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
162 // parsed header.
163 //
164 VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
165
166 // After the window header has been parsed as far as the Delta_Indicator,
167 // this function is called to parse the following delta window header fields:
168 //
169 // Length of data for ADDs and RUNs - integer (VarintBE format)
170 // Length of instructions and sizes - integer (VarintBE format)
171 // Length of addresses for COPYs - integer (VarintBE format)
172 //
173 // If has_checksum_ is true, it also looks for the following element:
174 //
175 // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
176 //
177 // It sets up the DeltaWindowSections instructions_and_sizes_,
178 // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format
179 // is being used, all three sections will include the entire window body; if
180 // the standard format is used, three non-overlapping window sections will be
181 // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
182 // if standard format is being used and there is not enough input data to read
183 // the entire window body. Otherwise, returns RESULT_SUCCESS.
184 VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
185
186 // Decodes the body of the window section as described in RFC sections 4.3,
187 // including the sections "Data section for ADDs and RUNs", "Instructions
188 // and sizes section", and "Addresses section for COPYs". These sections
189 // must already have been set up by ReadWindowHeader(). Returns a
190 // non-negative value on success, or RESULT_END_OF_DATA if the end of input
191 // was reached before the entire window could be decoded (only possible if
192 // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
193 // decoding. Appends as much of the decoded target window as possible to
194 // parent->decoded_target().
195 //
196 int DecodeBody(ParseableChunk* parseable_chunk);
197
198 // Returns the number of bytes already decoded into the target window.
199 size_t TargetBytesDecoded();
200
201 // Decodes a single ADD instruction, updating parent_->decoded_target_.
202 VCDiffResult DecodeAdd(size_t size);
203
204 // Decodes a single RUN instruction, updating parent_->decoded_target_.
205 VCDiffResult DecodeRun(size_t size);
206
207 // Decodes a single COPY instruction, updating parent_->decoded_target_.
208 VCDiffResult DecodeCopy(size_t size, unsigned char mode);
209
210 // When using the interleaved format, this function is called both on parsing
211 // the header and on resuming after a RESULT_END_OF_DATA was returned from a
212 // previous call to DecodeBody(). It sets up all three section pointers to
213 // reference the same interleaved stream of instructions, sizes, addresses,
214 // and data. These pointers must be reset every time that work resumes on a
215 // delta window, because the input data string may have been changed or
216 // resized since DecodeBody() last returned.
UpdateInterleavedSectionPointers(const char * data_pos,const char * data_end)217 void UpdateInterleavedSectionPointers(const char* data_pos,
218 const char* data_end) {
219 const ptrdiff_t available_data = data_end - data_pos;
220 // Don't read past the end of currently-available data
221 if (available_data > interleaved_bytes_expected_) {
222 instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
223 } else {
224 instructions_and_sizes_.Init(data_pos, available_data);
225 }
226 data_for_add_and_run_.Init(&instructions_and_sizes_);
227 addresses_for_copy_.Init(&instructions_and_sizes_);
228 }
229
230 // If true, the interleaved format described in AllowInterleaved() is used
231 // for the current delta file. Only valid after ReadWindowHeader() has been
232 // called and returned a positive number (i.e., the whole header was parsed),
233 // but before the window has finished decoding.
234 //
IsInterleaved() const235 bool IsInterleaved() const {
236 // If the sections are interleaved, both addresses_for_copy_ and
237 // data_for_add_and_run_ should point at instructions_and_sizes_.
238 return !addresses_for_copy_.IsOwned();
239 }
240
241 // Executes a single COPY or ADD instruction, appending data to
242 // parent_->decoded_target().
243 void CopyBytes(const char* data, size_t size);
244
245 // Executes a single RUN instruction, appending data to
246 // parent_->decoded_target().
247 void RunByte(unsigned char byte, size_t size);
248
249 // Advance *parseable_chunk to point to the current position in the
250 // instructions/sizes section. If interleaved format is used, then
251 // decrement the number of expected bytes in the instructions/sizes section
252 // by the number of instruction/size bytes parsed.
253 void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
254
255 // The parent object which was passed to Init().
256 VCDiffStreamingDecoderImpl* parent_;
257
258 // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
259 // has been called and succeeded in parsing the delta window header, but the
260 // entire window has not yet been decoded.
261 bool found_header_;
262
263 // Contents and length of the current source window. source_segment_ptr_
264 // will be non-NULL if (a) the window section header for the current window
265 // has been read, but the window has not yet finished decoding; or
266 // (b) the window did not specify a source segment.
267 const char* source_segment_ptr_;
268 size_t source_segment_length_;
269
270 // The delta encoding window sections as defined in RFC section 4.3.
271 // The pointer for each section will be incremented as data is consumed and
272 // decoded from that section. If the interleaved format is used,
273 // data_for_add_and_run_ and addresses_for_copy_ will both point to
274 // instructions_and_sizes_; otherwise, they will be separate data sections.
275 //
276 DeltaWindowSection instructions_and_sizes_;
277 DeltaWindowSection data_for_add_and_run_;
278 DeltaWindowSection addresses_for_copy_;
279
280 // The expected bytes left to decode in instructions_and_sizes_. Only used
281 // for the interleaved format.
282 int interleaved_bytes_expected_;
283
284 // The expected length of the target window once it has been decoded.
285 size_t target_window_length_;
286
287 // The index in decoded_target at which the first byte of the current
288 // target window was/will be written.
289 size_t target_window_start_pos_;
290
291 // If has_checksum_ is true, then expected_checksum_ contains an Adler32
292 // checksum of the target window data. This is an extension included in the
293 // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
294 bool has_checksum_;
295 VCDChecksum expected_checksum_;
296
297 VCDiffCodeTableReader reader_;
298
299 // Making these private avoids implicit copy constructor & assignment operator
300 VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT
301 void operator=(const VCDiffDeltaFileWindow&);
302 };
303
304 // *** Inline methods for VCDiffDeltaFileWindow
305
VCDiffDeltaFileWindow()306 inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
307 Reset();
308 }
309
~VCDiffDeltaFileWindow()310 inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
311
Init(VCDiffStreamingDecoderImpl * parent)312 inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
313 parent_ = parent;
314 }
315
316 class VCDiffStreamingDecoderImpl {
317 public:
318 typedef std::string string;
319
320 // The default maximum target file size (and target window size) if
321 // SetMaximumTargetFileSize() is not called.
322 static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB
323
324 // The largest value that can be passed to SetMaximumTargetWindowSize().
325 // Using a larger value will result in an error.
326 static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX
327
328 // A constant that is the default value for planned_target_file_size_,
329 // indicating that the decoder does not have an expected length
330 // for the target data.
331 static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
332
333 VCDiffStreamingDecoderImpl();
334 ~VCDiffStreamingDecoderImpl();
335
336 // Resets all member variables to their initial states.
337 void Reset();
338
339 // These functions are identical to their counterparts
340 // in VCDiffStreamingDecoder.
341 //
342 void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
343
344 bool DecodeChunk(const char* data,
345 size_t len,
346 OutputStringInterface* output_string);
347
348 bool FinishDecoding();
349
350 // If true, the version of VCDIFF used in the current delta file allows
351 // for the interleaved format, in which instructions, addresses and data
352 // are all sent interleaved in the instructions section of each window
353 // rather than being sent in separate sections. This is not part of
354 // the VCDIFF draft standard, so we've defined a special version code
355 // 'S' which implies that this feature is available. Even if interleaving
356 // is supported, it is not mandatory; interleaved format will be implied
357 // if the address and data sections are both zero-length.
358 //
AllowInterleaved() const359 bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
360
361 // If true, the version of VCDIFF used in the current delta file allows
362 // each delta window to contain an Adler32 checksum of the target window data.
363 // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
364 // this checksum will appear as a variable-length integer, just after the
365 // "length of addresses for COPYs" value and before the window data sections.
366 // It is possible for some windows in a delta file to use the checksum feature
367 // and for others not to use it (and leave the flag bit set to 0.)
368 // Just as with AllowInterleaved(), this extension is not part of the draft
369 // standard and is only available when the version code 'S' is specified.
370 //
AllowChecksum() const371 bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
372
SetMaximumTargetFileSize(size_t new_maximum_target_file_size)373 bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) {
374 maximum_target_file_size_ = new_maximum_target_file_size;
375 return true;
376 }
377
SetMaximumTargetWindowSize(size_t new_maximum_target_window_size)378 bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) {
379 if (new_maximum_target_window_size > kTargetSizeLimit) {
380 LOG(ERROR) << "Specified maximum target window size "
381 << new_maximum_target_window_size << " exceeds limit of "
382 << kTargetSizeLimit << " bytes" << LOG_ENDL;
383 return false;
384 }
385 maximum_target_window_size_ = new_maximum_target_window_size;
386 return true;
387 }
388
389 // See description of planned_target_file_size_, below.
HasPlannedTargetFileSize() const390 bool HasPlannedTargetFileSize() const {
391 return planned_target_file_size_ != kUnlimitedBytes;
392 }
393
SetPlannedTargetFileSize(size_t planned_target_file_size)394 void SetPlannedTargetFileSize(size_t planned_target_file_size) {
395 planned_target_file_size_ = planned_target_file_size;
396 }
397
AddToTotalTargetWindowSize(size_t window_size)398 void AddToTotalTargetWindowSize(size_t window_size) {
399 total_of_target_window_sizes_ += window_size;
400 }
401
402 // Checks to see whether the decoded target data has reached its planned size.
ReachedPlannedTargetFileSize() const403 bool ReachedPlannedTargetFileSize() const {
404 if (!HasPlannedTargetFileSize()) {
405 return false;
406 }
407 // The planned target file size should not have been exceeded.
408 // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of
409 // each target window would not make the target file exceed that limit, and
410 // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
411 // exceeds the advertised target window size.
412 if (total_of_target_window_sizes_ > planned_target_file_size_) {
413 LOG(DFATAL) << "Internal error: Decoded data size "
414 << total_of_target_window_sizes_
415 << " exceeds planned target file size "
416 << planned_target_file_size_ << LOG_ENDL;
417 return true;
418 }
419 return total_of_target_window_sizes_ == planned_target_file_size_;
420 }
421
422 // Checks to see whether adding a new target window of the specified size
423 // would exceed the planned target file size, the maximum target file size,
424 // or the maximum target window size. If so, logs an error and returns true;
425 // otherwise, returns false.
426 bool TargetWindowWouldExceedSizeLimits(size_t window_size) const;
427
428 // Returns the amount of input data passed to the last DecodeChunk()
429 // that was not consumed by the decoder. This is essential if
430 // SetPlannedTargetFileSize() is being used, in order to preserve the
431 // remaining input data stream once the planned target file has been decoded.
GetUnconsumedDataSize() const432 size_t GetUnconsumedDataSize() const {
433 return unparsed_bytes_.size();
434 }
435
436 // This function will return true if the decoder has parsed a complete delta
437 // file header plus zero or more delta file windows, with no data left over.
438 // It will also return true if no delta data at all was decoded. If these
439 // conditions are not met, then FinishDecoding() should not be called.
IsDecodingComplete() const440 bool IsDecodingComplete() const {
441 if (!FoundFileHeader()) {
442 // No complete delta file header has been parsed yet. DecodeChunk()
443 // may have received some data that it hasn't yet parsed, in which case
444 // decoding is incomplete.
445 return unparsed_bytes_.empty();
446 } else if (custom_code_table_decoder_.get()) {
447 // The decoder is in the middle of parsing a custom code table.
448 return false;
449 } else if (delta_window_.FoundWindowHeader()) {
450 // The decoder is in the middle of parsing an interleaved format delta
451 // window.
452 return false;
453 } else if (ReachedPlannedTargetFileSize()) {
454 // The decoder found exactly the planned number of bytes. In this case
455 // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
456 // data after the end of the delta file.
457 return true;
458 } else {
459 // No complete delta file window has been parsed yet. DecodeChunk()
460 // may have received some data that it hasn't yet parsed, in which case
461 // decoding is incomplete.
462 return unparsed_bytes_.empty();
463 }
464 }
465
dictionary_ptr() const466 const char* dictionary_ptr() const { return dictionary_ptr_; }
467
dictionary_size() const468 size_t dictionary_size() const { return dictionary_size_; }
469
addr_cache()470 VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
471
decoded_target()472 string* decoded_target() { return &decoded_target_; }
473
allow_vcd_target() const474 bool allow_vcd_target() const { return allow_vcd_target_; }
475
SetAllowVcdTarget(bool allow_vcd_target)476 void SetAllowVcdTarget(bool allow_vcd_target) {
477 if (start_decoding_was_called_) {
478 LOG(DFATAL) << "SetAllowVcdTarget() called after StartDecoding()"
479 << LOG_ENDL;
480 return;
481 }
482 allow_vcd_target_ = allow_vcd_target;
483 }
484
485 // Removes the contents of decoded_target_ that precede the beginning of the
486 // current window.
487 void TruncateToBeginningOfWindow();
488
489 private:
490 // Reads the VCDiff delta file header section as described in RFC section 4.1,
491 // except the custom code table data. Returns RESULT_ERROR if an error
492 // occurred, or RESULT_END_OF_DATA if the end of available data was reached
493 // before the entire header could be read. (The latter may be an error
494 // condition if there is no more data available.) Otherwise, advances
495 // data->position_ past the header and returns RESULT_SUCCESS.
496 //
497 VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
498
499 // Indicates whether or not the header has already been read.
FoundFileHeader() const500 bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
501
502 // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
503 // file header, this function parses the custom cache sizes and initializes
504 // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
505 // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an
506 // error occurred, or RESULT_END_OF_DATA if the end of available data was
507 // reached before the custom cache sizes could be read. Otherwise, returns
508 // the number of bytes read.
509 //
510 int InitCustomCodeTable(const char* data_start, const char* data_end);
511
512 // If a custom code table was specified in the header section that was parsed
513 // by ReadDeltaFileHeader(), this function makes a recursive call to another
514 // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
515 // custom code table is expected to be supplied as an embedded VCDIFF
516 // encoding that uses the standard code table. Returns RESULT_ERROR if an
517 // error occurs, or RESULT_END_OF_DATA if the end of available data was
518 // reached before the entire custom code table could be read. Otherwise,
519 // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
520 // custom code table. If the function returns RESULT_SUCCESS or
521 // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
522 //
523 VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
524
525 // Contents and length of the source (dictionary) data.
526 const char* dictionary_ptr_;
527 size_t dictionary_size_;
528
529 // This string will be used to store any unparsed bytes left over when
530 // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
531 // It will also be used to concatenate those unparsed bytes with the data
532 // supplied to the next call to DecodeChunk(), so that they appear in
533 // contiguous memory.
534 string unparsed_bytes_;
535
536 // The portion of the target file that has been decoded so far. This will be
537 // used to fill the output string for DecodeChunk(), and will also be used to
538 // execute COPY instructions that reference target data. Since the source
539 // window can come from a range of addresses in the previously decoded target
540 // data, the entire target file needs to be available to the decoder, not just
541 // the current target window.
542 string decoded_target_;
543
544 // The VCDIFF version byte (also known as "header4") from the
545 // delta file header.
546 unsigned char vcdiff_version_code_;
547
548 VCDiffDeltaFileWindow delta_window_;
549
550 std::auto_ptr<VCDiffAddressCache> addr_cache_;
551
552 // Will be NULL unless a custom code table has been defined.
553 std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
554
555 // Used to receive the decoded custom code table.
556 string custom_code_table_string_;
557
558 // If a custom code table is specified, it will be expressed
559 // as an embedded VCDIFF delta file which uses the default code table
560 // as the source file (dictionary). Use a child decoder object
561 // to decode that delta file.
562 std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
563
564 // If set, then the decoder is expecting *exactly* this number of
565 // target bytes to be decoded from one or more delta file windows.
566 // If this number is exceeded while decoding a window, but was not met
567 // before starting on that window, an error will be reported.
568 // If FinishDecoding() is called before this number is met, an error
569 // will also be reported. This feature is used for decoding the
570 // embedded code table data within a VCDIFF delta file; we want to
571 // stop processing the embedded data once the entire code table has
572 // been decoded, and treat the rest of the available data as part
573 // of the enclosing delta file.
574 size_t planned_target_file_size_;
575
576 size_t maximum_target_file_size_;
577
578 size_t maximum_target_window_size_;
579
580 // Contains the sum of the decoded sizes of all target windows seen so far,
581 // including the expected total size of the current target window in progress
582 // (even if some of the current target window has not yet been decoded.)
583 size_t total_of_target_window_sizes_;
584
585 // This value is used to ensure the correct order of calls to the interface
586 // functions, i.e., a single call to StartDecoding(), followed by zero or
587 // more calls to DecodeChunk(), followed by a single call to
588 // FinishDecoding().
589 bool start_decoding_was_called_;
590
591 // If this value is true then the VCD_TARGET flag can be specified to allow
592 // the source segment to be chosen from the previously-decoded target data.
593 // (This is the default behavior.) If it is false, then specifying the
594 // VCD_TARGET flag is considered an error, and the decoder does not need to
595 // keep in memory any decoded target data prior to the current window.
596 bool allow_vcd_target_;
597
598 // Making these private avoids implicit copy constructor & assignment operator
599 VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT
600 void operator=(const VCDiffStreamingDecoderImpl&);
601 };
602
603 // *** Methods for VCDiffStreamingDecoderImpl
604
605 const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize;
606 const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes;
607
VCDiffStreamingDecoderImpl()608 VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl()
609 : maximum_target_file_size_(kDefaultMaximumTargetFileSize),
610 maximum_target_window_size_(kDefaultMaximumTargetFileSize),
611 allow_vcd_target_(true) {
612 delta_window_.Init(this);
613 Reset();
614 }
615
616 // Reset() will delete the component objects without reallocating them.
~VCDiffStreamingDecoderImpl()617 VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
618
Reset()619 void VCDiffStreamingDecoderImpl::Reset() {
620 start_decoding_was_called_ = false;
621 dictionary_ptr_ = NULL;
622 dictionary_size_ = 0;
623 vcdiff_version_code_ = '\0';
624 planned_target_file_size_ = kUnlimitedBytes;
625 total_of_target_window_sizes_ = 0;
626 addr_cache_.reset();
627 custom_code_table_.reset();
628 custom_code_table_decoder_.reset();
629 delta_window_.Reset();
630 }
631
TruncateToBeginningOfWindow()632 void VCDiffStreamingDecoderImpl::TruncateToBeginningOfWindow() {
633 // Conserve the data for the current window that has been partially decoded.
634 decoded_target_.erase(0, delta_window_.target_window_start_pos());
635 delta_window_.set_target_window_start_pos(0);
636 }
637
StartDecoding(const char * dictionary_ptr,size_t dictionary_size)638 void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
639 size_t dictionary_size) {
640 if (start_decoding_was_called_) {
641 LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()"
642 << LOG_ENDL;
643 return;
644 }
645 unparsed_bytes_.clear();
646 decoded_target_.clear(); // delta_window_.Reset() depends on this
647 Reset();
648 dictionary_ptr_ = dictionary_ptr;
649 dictionary_size_ = dictionary_size;
650 start_decoding_was_called_ = true;
651 }
652
653 // Reads the VCDiff delta file header section as described in RFC section 4.1:
654 //
655 // Header1 - byte = 0xD6 (ASCII 'V' | 0x80)
656 // Header2 - byte = 0xC3 (ASCII 'C' | 0x80)
657 // Header3 - byte = 0xC4 (ASCII 'D' | 0x80)
658 // Header4 - byte
659 // Hdr_Indicator - byte
660 // [Secondary compressor ID] - byte
661 // [Length of code table data] - integer
662 // [Code table data]
663 //
664 // Initializes the code table and address cache objects. Returns RESULT_ERROR
665 // if an error occurred, and RESULT_END_OF_DATA if the end of available data was
666 // reached before the entire header could be read. (The latter may be an error
667 // condition if there is no more data available.) Otherwise, returns
668 // RESULT_SUCCESS, and removes the header bytes from the data string.
669 //
670 // It's relatively inefficient to expect this function to parse any number of
671 // input bytes available, down to 1 byte, but it is necessary in case the input
672 // is not a properly formatted VCDIFF delta file. If the entire input consists
673 // of two bytes "12", then we should recognize that it does not match the
674 // initial VCDIFF magic number "VCD" and report an error, rather than waiting
675 // indefinitely for more input that will never arrive.
676 //
ReadDeltaFileHeader(ParseableChunk * data)677 VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
678 ParseableChunk* data) {
679 if (FoundFileHeader()) {
680 return RESULT_SUCCESS;
681 }
682 size_t data_size = data->UnparsedSize();
683 const DeltaFileHeader* header =
684 reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
685 bool wrong_magic_number = false;
686 switch (data_size) {
687 // Verify only the bytes that are available.
688 default:
689 // Found header contents up to and including VCDIFF version
690 vcdiff_version_code_ = header->header4;
691 if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284)
692 (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol
693 LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL;
694 return RESULT_ERROR;
695 }
696 // fall through
697 case 3:
698 if (header->header3 != 0xC4) { // magic value 'D' | 0x80
699 wrong_magic_number = true;
700 }
701 // fall through
702 case 2:
703 if (header->header2 != 0xC3) { // magic value 'C' | 0x80
704 wrong_magic_number = true;
705 }
706 // fall through
707 case 1:
708 if (header->header1 != 0xD6) { // magic value 'V' | 0x80
709 wrong_magic_number = true;
710 }
711 // fall through
712 case 0:
713 if (wrong_magic_number) {
714 LOG(ERROR) << "Did not find VCDIFF header bytes; "
715 "input is not a VCDIFF delta file" << LOG_ENDL;
716 return RESULT_ERROR;
717 }
718 if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
719 }
720 // Secondary compressor not supported.
721 if (header->hdr_indicator & VCD_DECOMPRESS) {
722 LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL;
723 return RESULT_ERROR;
724 }
725 if (header->hdr_indicator & VCD_CODETABLE) {
726 int bytes_parsed = InitCustomCodeTable(
727 data->UnparsedData() + sizeof(DeltaFileHeader),
728 data->End());
729 switch (bytes_parsed) {
730 case RESULT_ERROR:
731 return RESULT_ERROR;
732 case RESULT_END_OF_DATA:
733 return RESULT_END_OF_DATA;
734 default:
735 data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
736 }
737 } else {
738 addr_cache_.reset(new VCDiffAddressCache);
739 // addr_cache_->Init() will be called
740 // from VCDiffStreamingDecoderImpl::DecodeChunk()
741 data->Advance(sizeof(DeltaFileHeader));
742 }
743 return RESULT_SUCCESS;
744 }
745
InitCustomCodeTable(const char * data_start,const char * data_end)746 int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
747 const char* data_end) {
748 // A custom code table is being specified. Parse the variable-length
749 // cache sizes and begin parsing the encoded custom code table.
750 int32_t near_cache_size = 0, same_cache_size = 0;
751 VCDiffHeaderParser header_parser(data_start, data_end);
752 if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
753 return header_parser.GetResult();
754 }
755 if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
756 return header_parser.GetResult();
757 }
758 custom_code_table_.reset(new struct VCDiffCodeTableData);
759 memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
760 custom_code_table_string_.clear();
761 addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
762 // addr_cache_->Init() will be called
763 // from VCDiffStreamingDecoderImpl::DecodeChunk()
764
765 // If we reach this point (the start of the custom code table)
766 // without encountering a RESULT_END_OF_DATA condition, then we won't call
767 // ReadDeltaFileHeader() again for this delta file.
768 //
769 // Instantiate a recursive decoder to interpret the custom code table
770 // as a VCDIFF encoding of the default code table.
771 custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
772 custom_code_table_decoder_->StartDecoding(
773 reinterpret_cast<const char*>(
774 &VCDiffCodeTableData::kDefaultCodeTableData),
775 sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
776 custom_code_table_decoder_->SetPlannedTargetFileSize(
777 sizeof(*custom_code_table_));
778 return static_cast<int>(header_parser.ParsedSize());
779 }
780
ReadCustomCodeTable(ParseableChunk * data)781 VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
782 ParseableChunk* data) {
783 if (!custom_code_table_decoder_.get()) {
784 return RESULT_SUCCESS;
785 }
786 if (!custom_code_table_.get()) {
787 LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set,"
788 " but custom_code_table_ is NULL" << LOG_ENDL;
789 return RESULT_ERROR;
790 }
791 OutputString<string> output_string(&custom_code_table_string_);
792 if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
793 data->UnparsedSize(),
794 &output_string)) {
795 return RESULT_ERROR;
796 }
797 if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
798 // Skip over the consumed data.
799 data->Finish();
800 return RESULT_END_OF_DATA;
801 }
802 if (!custom_code_table_decoder_->FinishDecoding()) {
803 return RESULT_ERROR;
804 }
805 if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
806 LOG(DFATAL) << "Decoded custom code table size ("
807 << custom_code_table_string_.length()
808 << ") does not match size of a code table ("
809 << sizeof(*custom_code_table_) << ")" << LOG_ENDL;
810 return RESULT_ERROR;
811 }
812 memcpy(custom_code_table_.get(),
813 custom_code_table_string_.data(),
814 sizeof(*custom_code_table_));
815 custom_code_table_string_.clear();
816 // Skip over the consumed data.
817 data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
818 custom_code_table_decoder_.reset();
819 delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
820 return RESULT_SUCCESS;
821 }
822
823 namespace {
824
825 class TrackNewOutputText {
826 public:
827 typedef std::string string;
828
TrackNewOutputText(const string & decoded_target)829 explicit TrackNewOutputText(const string& decoded_target)
830 : decoded_target_(decoded_target),
831 initial_decoded_target_size_(decoded_target.size()) { }
832
AppendNewOutputText(size_t target_bytes_remaining,OutputStringInterface * output_string)833 void AppendNewOutputText(size_t target_bytes_remaining,
834 OutputStringInterface* output_string) {
835 const size_t bytes_decoded_this_chunk =
836 decoded_target_.size() - initial_decoded_target_size_;
837 if (bytes_decoded_this_chunk > 0) {
838 if (target_bytes_remaining > 0) {
839 // The decoder is midway through decoding a target window. Resize
840 // output_string to match the expected length. The interface guarantees
841 // not to resize the output_string more than once per target window
842 // decoded.
843 output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
844 + target_bytes_remaining);
845 }
846 output_string->append(
847 decoded_target_.data() + initial_decoded_target_size_,
848 bytes_decoded_this_chunk);
849 }
850 }
851
852 private:
853 const string& decoded_target_;
854 size_t initial_decoded_target_size_;
855 };
856
857 } // anonymous namespace
858
DecodeChunk(const char * data,size_t len,OutputStringInterface * output_string)859 bool VCDiffStreamingDecoderImpl::DecodeChunk(
860 const char* data,
861 size_t len,
862 OutputStringInterface* output_string) {
863 if (!start_decoding_was_called_) {
864 LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL;
865 Reset();
866 return false;
867 }
868 ParseableChunk parseable_chunk(data, len);
869 if (!unparsed_bytes_.empty()) {
870 unparsed_bytes_.append(data, len);
871 parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
872 unparsed_bytes_.size());
873 }
874 TrackNewOutputText output_tracker(decoded_target_);
875 VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
876 if (RESULT_SUCCESS == result) {
877 result = ReadCustomCodeTable(&parseable_chunk);
878 }
879 if (RESULT_SUCCESS == result) {
880 result = delta_window_.DecodeWindows(&parseable_chunk);
881 }
882 if (RESULT_ERROR == result) {
883 Reset(); // Don't allow further DecodeChunk calls
884 return false;
885 }
886 unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
887 parseable_chunk.UnparsedSize());
888 output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(),
889 output_string);
890 if (!allow_vcd_target()) {
891 // VCD_TARGET will never be used to reference target data beyond the start
892 // of the current window, so throw away any earlier target data.
893 TruncateToBeginningOfWindow();
894 }
895 return true;
896 }
897
898 // Finishes decoding after all data has been received. Returns true
899 // if decoding of the entire stream was successful.
FinishDecoding()900 bool VCDiffStreamingDecoderImpl::FinishDecoding() {
901 bool success = true;
902 if (!start_decoding_was_called_) {
903 LOG(WARNING) << "FinishDecoding() called before StartDecoding(),"
904 " or called after DecodeChunk() returned false"
905 << LOG_ENDL;
906 success = false;
907 } else if (!IsDecodingComplete()) {
908 LOG(ERROR) << "FinishDecoding() called before parsing entire"
909 " delta file window" << LOG_ENDL;
910 success = false;
911 }
912 // Reset the object state for the next decode operation
913 Reset();
914 return success;
915 }
916
TargetWindowWouldExceedSizeLimits(size_t window_size) const917 bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits(
918 size_t window_size) const {
919 if (window_size > maximum_target_window_size_) {
920 LOG(ERROR) << "Length of target window (" << window_size
921 << ") exceeds limit of " << maximum_target_window_size_
922 << " bytes" << LOG_ENDL;
923 return true;
924 }
925 if (HasPlannedTargetFileSize()) {
926 // The logical expression to check would be:
927 //
928 // total_of_target_window_sizes_ + window_size > planned_target_file_size_
929 //
930 // but the addition might cause an integer overflow if target_bytes_to_add
931 // is very large. So it is better to check target_bytes_to_add against
932 // the remaining planned target bytes.
933 size_t remaining_planned_target_file_size =
934 planned_target_file_size_ - total_of_target_window_sizes_;
935 if (window_size > remaining_planned_target_file_size) {
936 LOG(ERROR) << "Length of target window (" << window_size
937 << " bytes) plus previous windows ("
938 << total_of_target_window_sizes_
939 << " bytes) would exceed planned size of "
940 << planned_target_file_size_ << " bytes" << LOG_ENDL;
941 return true;
942 }
943 }
944 size_t remaining_maximum_target_bytes =
945 maximum_target_file_size_ - total_of_target_window_sizes_;
946 if (window_size > remaining_maximum_target_bytes) {
947 LOG(ERROR) << "Length of target window (" << window_size
948 << " bytes) plus previous windows ("
949 << total_of_target_window_sizes_
950 << " bytes) would exceed maximum target file size of "
951 << maximum_target_file_size_ << " bytes" << LOG_ENDL;
952 return true;
953 }
954 return false;
955 }
956
957 // *** Methods for VCDiffDeltaFileWindow
958
Reset()959 void VCDiffDeltaFileWindow::Reset() {
960 found_header_ = false;
961
962 // Mark the start of the current target window.
963 target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
964 target_window_length_ = 0;
965
966 source_segment_ptr_ = NULL;
967 source_segment_length_ = 0;
968
969 instructions_and_sizes_.Invalidate();
970 data_for_add_and_run_.Invalidate();
971 addresses_for_copy_.Invalidate();
972
973 interleaved_bytes_expected_ = 0;
974
975 has_checksum_ = false;
976 expected_checksum_ = 0;
977 }
978
SetUpWindowSections(VCDiffHeaderParser * header_parser)979 VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
980 VCDiffHeaderParser* header_parser) {
981 size_t add_and_run_data_length = 0;
982 size_t instructions_and_sizes_length = 0;
983 size_t addresses_length = 0;
984 if (!header_parser->ParseSectionLengths(has_checksum_,
985 &add_and_run_data_length,
986 &instructions_and_sizes_length,
987 &addresses_length,
988 &expected_checksum_)) {
989 return header_parser->GetResult();
990 }
991 if (parent_->AllowInterleaved() &&
992 (add_and_run_data_length == 0) &&
993 (addresses_length == 0)) {
994 // The interleaved format is being used.
995 interleaved_bytes_expected_ =
996 static_cast<int>(instructions_and_sizes_length);
997 UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
998 header_parser->End());
999 } else {
1000 // If interleaved format is not used, then the whole window contents
1001 // must be available before decoding can begin. If only part of
1002 // the current window is available, then report end of data
1003 // and re-parse the whole header when DecodeChunk() is called again.
1004 if (header_parser->UnparsedSize() < (add_and_run_data_length +
1005 instructions_and_sizes_length +
1006 addresses_length)) {
1007 return RESULT_END_OF_DATA;
1008 }
1009 data_for_add_and_run_.Init(header_parser->UnparsedData(),
1010 add_and_run_data_length);
1011 instructions_and_sizes_.Init(data_for_add_and_run_.End(),
1012 instructions_and_sizes_length);
1013 addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
1014 if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
1015 LOG(ERROR) << "The end of the instructions section "
1016 "does not match the end of the delta window" << LOG_ENDL;
1017 return RESULT_ERROR;
1018 }
1019 }
1020 reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
1021 instructions_and_sizes_.End());
1022 return RESULT_SUCCESS;
1023 }
1024
1025 // Here are the elements of the delta window header to be parsed,
1026 // from section 4 of the RFC:
1027 //
1028 // Window1
1029 // Win_Indicator - byte
1030 // [Source segment size] - integer
1031 // [Source segment position] - integer
1032 // The delta encoding of the target window
1033 // Length of the delta encoding - integer
1034 // The delta encoding
1035 // Size of the target window - integer
1036 // Delta_Indicator - byte
1037 // Length of data for ADDs and RUNs - integer
1038 // Length of instructions and sizes - integer
1039 // Length of addresses for COPYs - integer
1040 // Data section for ADDs and RUNs - array of bytes
1041 // Instructions and sizes section - array of bytes
1042 // Addresses section for COPYs - array of bytes
1043 //
ReadHeader(ParseableChunk * parseable_chunk)1044 VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
1045 ParseableChunk* parseable_chunk) {
1046 std::string* decoded_target = parent_->decoded_target();
1047 VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
1048 parseable_chunk->End());
1049 size_t source_segment_position = 0;
1050 unsigned char win_indicator = 0;
1051 if (!header_parser.ParseWinIndicatorAndSourceSegment(
1052 parent_->dictionary_size(),
1053 decoded_target->size(),
1054 parent_->allow_vcd_target(),
1055 &win_indicator,
1056 &source_segment_length_,
1057 &source_segment_position)) {
1058 return header_parser.GetResult();
1059 }
1060 has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
1061 if (!header_parser.ParseWindowLengths(&target_window_length_)) {
1062 return header_parser.GetResult();
1063 }
1064 if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) {
1065 // An error has been logged by TargetWindowWouldExceedSizeLimits().
1066 return RESULT_ERROR;
1067 }
1068 header_parser.ParseDeltaIndicator();
1069 VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
1070 if (RESULT_SUCCESS != setup_return_code) {
1071 return setup_return_code;
1072 }
1073 // Reserve enough space in the output string for the current target window.
1074 decoded_target->reserve(target_window_start_pos_ + target_window_length_);
1075 // Get a pointer to the start of the source segment.
1076 if (win_indicator & VCD_SOURCE) {
1077 source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
1078 } else if (win_indicator & VCD_TARGET) {
1079 // This assignment must happen after the reserve().
1080 // decoded_target should not be resized again while processing this window,
1081 // so source_segment_ptr_ should remain valid.
1082 source_segment_ptr_ = decoded_target->data() + source_segment_position;
1083 }
1084 // The whole window header was found and parsed successfully.
1085 found_header_ = true;
1086 parseable_chunk->Advance(header_parser.ParsedSize());
1087 parent_->AddToTotalTargetWindowSize(target_window_length_);
1088 return RESULT_SUCCESS;
1089 }
1090
UpdateInstructionPointer(ParseableChunk * parseable_chunk)1091 void VCDiffDeltaFileWindow::UpdateInstructionPointer(
1092 ParseableChunk* parseable_chunk) {
1093 if (IsInterleaved()) {
1094 size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
1095 // Reduce expected instruction segment length by bytes parsed
1096 interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
1097 parseable_chunk->Advance(bytes_parsed);
1098 }
1099 }
1100
TargetBytesDecoded()1101 inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
1102 return parent_->decoded_target()->size() - target_window_start_pos_;
1103 }
1104
TargetBytesRemaining()1105 size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
1106 if (target_window_length_ == 0) {
1107 // There is no window being decoded at present
1108 return 0;
1109 } else {
1110 return target_window_length_ - TargetBytesDecoded();
1111 }
1112 }
1113
CopyBytes(const char * data,size_t size)1114 inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) {
1115 parent_->decoded_target()->append(data, size);
1116 }
1117
RunByte(unsigned char byte,size_t size)1118 inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
1119 parent_->decoded_target()->append(size, byte);
1120 }
1121
DecodeAdd(size_t size)1122 VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
1123 if (size > data_for_add_and_run_.UnparsedSize()) {
1124 return RESULT_END_OF_DATA;
1125 }
1126 // Write the next "size" data bytes
1127 CopyBytes(data_for_add_and_run_.UnparsedData(), size);
1128 data_for_add_and_run_.Advance(size);
1129 return RESULT_SUCCESS;
1130 }
1131
DecodeRun(size_t size)1132 VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
1133 if (data_for_add_and_run_.Empty()) {
1134 return RESULT_END_OF_DATA;
1135 }
1136 // Write "size" copies of the next data byte
1137 RunByte(*data_for_add_and_run_.UnparsedData(), size);
1138 data_for_add_and_run_.Advance(1);
1139 return RESULT_SUCCESS;
1140 }
1141
DecodeCopy(size_t size,unsigned char mode)1142 VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
1143 unsigned char mode) {
1144 // Keep track of the number of target bytes decoded as a local variable
1145 // to avoid recalculating it each time it is needed.
1146 size_t target_bytes_decoded = TargetBytesDecoded();
1147 const VCDAddress here_address =
1148 static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
1149 const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
1150 here_address,
1151 mode,
1152 addresses_for_copy_.UnparsedDataAddr(),
1153 addresses_for_copy_.End());
1154 switch (decoded_address) {
1155 case RESULT_ERROR:
1156 LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL;
1157 return RESULT_ERROR;
1158 case RESULT_END_OF_DATA:
1159 return RESULT_END_OF_DATA;
1160 default:
1161 if ((decoded_address < 0) || (decoded_address > here_address)) {
1162 LOG(DFATAL) << "Internal error: unexpected address " << decoded_address
1163 << " returned from DecodeAddress, with here_address = "
1164 << here_address << LOG_ENDL;
1165 return RESULT_ERROR;
1166 }
1167 break;
1168 }
1169 size_t address = static_cast<size_t>(decoded_address);
1170 if ((address + size) <= source_segment_length_) {
1171 // Copy all data from source segment
1172 CopyBytes(&source_segment_ptr_[address], size);
1173 return RESULT_SUCCESS;
1174 }
1175 // Copy some data from target window...
1176 if (address < source_segment_length_) {
1177 // ... plus some data from source segment
1178 const size_t partial_copy_size = source_segment_length_ - address;
1179 CopyBytes(&source_segment_ptr_[address], partial_copy_size);
1180 target_bytes_decoded += partial_copy_size;
1181 address += partial_copy_size;
1182 size -= partial_copy_size;
1183 }
1184 address -= source_segment_length_;
1185 // address is now based at start of target window
1186 const char* const target_segment_ptr = parent_->decoded_target()->data() +
1187 target_window_start_pos_;
1188 while (size > (target_bytes_decoded - address)) {
1189 // Recursive copy that extends into the yet-to-be-copied target data
1190 const size_t partial_copy_size = target_bytes_decoded - address;
1191 CopyBytes(&target_segment_ptr[address], partial_copy_size);
1192 target_bytes_decoded += partial_copy_size;
1193 address += partial_copy_size;
1194 size -= partial_copy_size;
1195 }
1196 CopyBytes(&target_segment_ptr[address], size);
1197 return RESULT_SUCCESS;
1198 }
1199
DecodeBody(ParseableChunk * parseable_chunk)1200 int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
1201 if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
1202 != parseable_chunk->UnparsedData())) {
1203 LOG(DFATAL) << "Internal error: interleaved format is used, but the"
1204 " input pointer does not point to the instructions section"
1205 << LOG_ENDL;
1206 return RESULT_ERROR;
1207 }
1208 while (TargetBytesDecoded() < target_window_length_) {
1209 int32_t decoded_size = VCD_INSTRUCTION_ERROR;
1210 unsigned char mode = 0;
1211 VCDiffInstructionType instruction =
1212 reader_.GetNextInstruction(&decoded_size, &mode);
1213 switch (instruction) {
1214 case VCD_INSTRUCTION_END_OF_DATA:
1215 UpdateInstructionPointer(parseable_chunk);
1216 return RESULT_END_OF_DATA;
1217 case VCD_INSTRUCTION_ERROR:
1218 return RESULT_ERROR;
1219 default:
1220 break;
1221 }
1222 const size_t size = static_cast<size_t>(decoded_size);
1223 // The value of "size" itself could be enormous (say, INT32_MAX)
1224 // so check it individually against the limit to protect against
1225 // overflow when adding it to something else.
1226 if ((size > target_window_length_) ||
1227 ((size + TargetBytesDecoded()) > target_window_length_)) {
1228 LOG(ERROR) << VCDiffInstructionName(instruction)
1229 << " with size " << size
1230 << " plus existing " << TargetBytesDecoded()
1231 << " bytes of target data exceeds length of target"
1232 " window (" << target_window_length_ << " bytes)"
1233 << LOG_ENDL;
1234 return RESULT_ERROR;
1235 }
1236 VCDiffResult result = RESULT_SUCCESS;
1237 switch (instruction) {
1238 case VCD_ADD:
1239 result = DecodeAdd(size);
1240 break;
1241 case VCD_RUN:
1242 result = DecodeRun(size);
1243 break;
1244 case VCD_COPY:
1245 result = DecodeCopy(size, mode);
1246 break;
1247 default:
1248 LOG(DFATAL) << "Unexpected instruction type " << instruction
1249 << "in opcode stream" << LOG_ENDL;
1250 return RESULT_ERROR;
1251 }
1252 switch (result) {
1253 case RESULT_END_OF_DATA:
1254 reader_.UnGetInstruction();
1255 UpdateInstructionPointer(parseable_chunk);
1256 return RESULT_END_OF_DATA;
1257 case RESULT_ERROR:
1258 return RESULT_ERROR;
1259 case RESULT_SUCCESS:
1260 break;
1261 }
1262 }
1263 if (TargetBytesDecoded() != target_window_length_) {
1264 LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded()
1265 << " bytes) does not match expected size ("
1266 << target_window_length_ << " bytes)" << LOG_ENDL;
1267 return RESULT_ERROR;
1268 }
1269 const char* const target_window_start =
1270 parent_->decoded_target()->data() + target_window_start_pos_;
1271 if (has_checksum_ &&
1272 (ComputeAdler32(target_window_start, target_window_length_)
1273 != expected_checksum_)) {
1274 LOG(ERROR) << "Target data does not match checksum; this could mean "
1275 "that the wrong dictionary was used" << LOG_ENDL;
1276 return RESULT_ERROR;
1277 }
1278 if (!instructions_and_sizes_.Empty()) {
1279 LOG(ERROR) << "Excess instructions and sizes left over "
1280 "after decoding target window" << LOG_ENDL;
1281 return RESULT_ERROR;
1282 }
1283 if (!IsInterleaved()) {
1284 // Standard format is being used, with three separate sections for the
1285 // instructions, data, and addresses.
1286 if (!data_for_add_and_run_.Empty()) {
1287 LOG(ERROR) << "Excess ADD/RUN data left over "
1288 "after decoding target window" << LOG_ENDL;
1289 return RESULT_ERROR;
1290 }
1291 if (!addresses_for_copy_.Empty()) {
1292 LOG(ERROR) << "Excess COPY addresses left over "
1293 "after decoding target window" << LOG_ENDL;
1294 return RESULT_ERROR;
1295 }
1296 // Reached the end of the window. Update the ParseableChunk to point to the
1297 // end of the addresses section, which is the last section in the window.
1298 parseable_chunk->SetPosition(addresses_for_copy_.End());
1299 } else {
1300 // Interleaved format is being used.
1301 UpdateInstructionPointer(parseable_chunk);
1302 }
1303 return RESULT_SUCCESS;
1304 }
1305
DecodeWindows(ParseableChunk * parseable_chunk)1306 VCDiffResult VCDiffDeltaFileWindow::DecodeWindows(
1307 ParseableChunk* parseable_chunk) {
1308 if (!parent_) {
1309 LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() "
1310 "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL;
1311 return RESULT_ERROR;
1312 }
1313 while (!parseable_chunk->Empty()) {
1314 if (!found_header_) {
1315 switch (ReadHeader(parseable_chunk)) {
1316 case RESULT_END_OF_DATA:
1317 return RESULT_END_OF_DATA;
1318 case RESULT_ERROR:
1319 return RESULT_ERROR;
1320 default:
1321 // Reset address cache between windows (RFC section 5.1)
1322 if (!parent_->addr_cache()->Init()) {
1323 LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL;
1324 return RESULT_ERROR;
1325 }
1326 }
1327 } else {
1328 // We are resuming a window that was partially decoded before a
1329 // RESULT_END_OF_DATA was returned. This can only happen on the first
1330 // loop iteration, and only if the interleaved format is enabled and used.
1331 if (!IsInterleaved()) {
1332 LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window"
1333 " when interleaved format is not being used" << LOG_ENDL;
1334 return RESULT_ERROR;
1335 }
1336 UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
1337 parseable_chunk->End());
1338 reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
1339 instructions_and_sizes_.End());
1340 }
1341 switch (DecodeBody(parseable_chunk)) {
1342 case RESULT_END_OF_DATA:
1343 if (MoreDataExpected()) {
1344 return RESULT_END_OF_DATA;
1345 } else {
1346 LOG(ERROR) << "End of data reached while decoding VCDIFF delta file"
1347 << LOG_ENDL;
1348 // fall through to RESULT_ERROR case
1349 }
1350 case RESULT_ERROR:
1351 return RESULT_ERROR;
1352 default:
1353 break; // DecodeBody succeeded
1354 }
1355 // Get ready to read a new delta window
1356 Reset();
1357 if (parent_->ReachedPlannedTargetFileSize()) {
1358 // Found exactly the length we expected. Stop decoding.
1359 return RESULT_SUCCESS;
1360 }
1361 }
1362 return RESULT_SUCCESS;
1363 }
1364
1365 // *** Methods for VCDiffStreamingDecoder
1366
VCDiffStreamingDecoder()1367 VCDiffStreamingDecoder::VCDiffStreamingDecoder()
1368 : impl_(new VCDiffStreamingDecoderImpl) { }
1369
~VCDiffStreamingDecoder()1370 VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
1371
StartDecoding(const char * source,size_t len)1372 void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
1373 impl_->StartDecoding(source, len);
1374 }
1375
DecodeChunkToInterface(const char * data,size_t len,OutputStringInterface * output_string)1376 bool VCDiffStreamingDecoder::DecodeChunkToInterface(
1377 const char* data,
1378 size_t len,
1379 OutputStringInterface* output_string) {
1380 return impl_->DecodeChunk(data, len, output_string);
1381 }
1382
FinishDecoding()1383 bool VCDiffStreamingDecoder::FinishDecoding() {
1384 return impl_->FinishDecoding();
1385 }
1386
SetMaximumTargetFileSize(size_t new_maximum_target_file_size)1387 bool VCDiffStreamingDecoder::SetMaximumTargetFileSize(
1388 size_t new_maximum_target_file_size) {
1389 return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size);
1390 }
1391
SetMaximumTargetWindowSize(size_t new_maximum_target_window_size)1392 bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize(
1393 size_t new_maximum_target_window_size) {
1394 return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size);
1395 }
1396
SetAllowVcdTarget(bool allow_vcd_target)1397 void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) {
1398 impl_->SetAllowVcdTarget(allow_vcd_target);
1399 }
1400
DecodeToInterface(const char * dictionary_ptr,size_t dictionary_size,const string & encoding,OutputStringInterface * target)1401 bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
1402 size_t dictionary_size,
1403 const string& encoding,
1404 OutputStringInterface* target) {
1405 target->clear();
1406 decoder_.StartDecoding(dictionary_ptr, dictionary_size);
1407 if (!decoder_.DecodeChunkToInterface(encoding.data(),
1408 encoding.size(),
1409 target)) {
1410 return false;
1411 }
1412 return decoder_.FinishDecoding();
1413 }
1414
1415 } // namespace open_vcdiff
1416