• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #ifndef OPEN_VCDIFF_HEADERPARSER_H_
17 #define OPEN_VCDIFF_HEADERPARSER_H_
18 
19 #include <config.h>
20 #include <stddef.h>  // NULL
21 #include <stdint.h>  // int32_t, uint32_t
22 #include "checksum.h"  // VCDChecksum
23 #include "vcdiff_defs.h"  // VCDiffResult
24 
25 namespace open_vcdiff {
26 
27 // This class contains a contiguous memory buffer with start and end pointers,
28 // as well as a position pointer which shows how much of the buffer has been
29 // parsed and how much remains.
30 //
31 // Because no virtual destructor is defined for ParseableChunk, a pointer to
32 // a child class of ParseableChunk must be destroyed using its specific type,
33 // rather than as a ParseableChunk*.
34 class ParseableChunk {
35  public:
ParseableChunk(const char * data_start,size_t data_size)36   ParseableChunk(const char* data_start, size_t data_size) {
37     SetDataBuffer(data_start, data_size);
38   }
39 
End()40   const char* End() const { return end_; }
41 
42   // The number of bytes remaining to be parsed.  This is not necessarily the
43   // same as the initial size of the buffer; it changes with each call to
44   // Advance().
UnparsedSize()45   size_t UnparsedSize() const {
46     return end_ - position_;
47   }
48 
49   // The number of bytes that have already been parsed.
ParsedSize()50   size_t ParsedSize() const {
51     return position_ - start_;
52   }
53 
Empty()54   bool Empty() const { return 0 == UnparsedSize(); }
55 
56   // The start of the data remaining to be parsed.
UnparsedData()57   const char* UnparsedData() const { return position_; }
58 
59   // Returns a pointer to the start of the data remaining to be parsed.
UnparsedDataAddr()60   const char** UnparsedDataAddr() { return &position_; }
61 
62   // Moves the parsing position forward by number_of_bytes.
63   void Advance(size_t number_of_bytes);
64 
65   // Jumps the parsing position to a new location.
66   void SetPosition(const char* position);
67 
68   // Jumps the parsing position to the end of the data chunk.
Finish()69   void Finish() {
70     position_ = end_;
71   }
72 
73   // Jumps the parsing position so that there are now number_of_bytes
74   // bytes left to parse.  This number should be smaller than the size of data
75   // to be parsed before the function was called.
76   void FinishExcept(size_t number_of_bytes);
77 
SetDataBuffer(const char * data_start,size_t data_size)78   void SetDataBuffer(const char* data_start, size_t data_size) {
79     start_ = data_start;
80     end_ = data_start + data_size;
81     position_ = start_;
82   }
83 
84  private:
85   const char* start_;
86   const char* end_;
87 
88   // The current parsing position within the data chunk.
89   // Must always respect start_ <= position_ <= end_.
90   const char* position_;
91 
92   // Making these private avoids implicit copy constructor & assignment operator
93   ParseableChunk(const ParseableChunk&);
94   void operator=(const ParseableChunk&);
95 };
96 
97 // Represents one of the three sections in the delta window, as described in
98 // RFC section 4.3:
99 //     * Data section for ADDs and RUNs
100 //     * Instructions and sizes section
101 //     * Addresses section for COPYs
102 // When using the interleaved format, data and addresses are pulled from the
103 // instructions and sizes section rather than being stored in separate sections.
104 // For that reason, this class allows one DeltaWindowSection to be based on
105 // another, such that the same position pointer is shared by both sections;
106 // i.e., UnparsedDataAddr() returns the same value for both objects.
107 // To achieve this end, one extra level of indirection (a pointer to a
108 // ParseableChunk object) is added.
109 class DeltaWindowSection {
110  public:
DeltaWindowSection()111   DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { }
112 
~DeltaWindowSection()113   ~DeltaWindowSection() {
114     FreeChunk();
115   }
116 
Init(const char * data_start,size_t data_size)117   void Init(const char* data_start, size_t data_size) {
118     if (owned_ && parseable_chunk_) {
119       // Reuse the already-allocated ParseableChunk object.
120       parseable_chunk_->SetDataBuffer(data_start, data_size);
121     } else {
122       parseable_chunk_ = new ParseableChunk(data_start, data_size);
123       owned_ = true;
124     }
125   }
126 
Init(DeltaWindowSection * original)127   void Init(DeltaWindowSection* original) {
128     FreeChunk();
129     parseable_chunk_ = original->parseable_chunk_;
130     owned_ = false;
131   }
132 
Invalidate()133   void Invalidate() { FreeChunk(); }
134 
IsOwned()135   bool IsOwned() const { return owned_; }
136 
137   // The following functions just pass their arguments to the underlying
138   // ParseableChunk object.
139 
End()140   const char* End() const {
141     return parseable_chunk_->End();
142   }
143 
UnparsedSize()144   size_t UnparsedSize() const {
145     return parseable_chunk_->UnparsedSize();
146   }
147 
ParsedSize()148   size_t ParsedSize() const {
149     return parseable_chunk_->ParsedSize();
150   }
151 
Empty()152   bool Empty() const {
153     return parseable_chunk_->Empty();
154   }
155 
UnparsedData()156   const char* UnparsedData() const {
157     return parseable_chunk_->UnparsedData();
158   }
159 
UnparsedDataAddr()160   const char** UnparsedDataAddr() {
161     return parseable_chunk_->UnparsedDataAddr();
162   }
163 
Advance(size_t number_of_bytes)164   void Advance(size_t number_of_bytes) {
165     return parseable_chunk_->Advance(number_of_bytes);
166   }
167  private:
FreeChunk()168   void FreeChunk() {
169     if (owned_) {
170       delete parseable_chunk_;
171     }
172     parseable_chunk_ = NULL;
173   }
174 
175   // Will be NULL until Init() has been called.  If owned_ is true, this will
176   // point to a ParseableChunk object that has been allocated with "new" and
177   // must be deleted by this DeltaWindowSection object.  If owned_ is false,
178   // this points at the parseable_chunk_ owned by a different DeltaWindowSection
179   // object.  In this case, it is important to free the DeltaWindowSection which
180   // does not own the ParseableChunk before (or simultaneously to) freeing the
181   // DeltaWindowSection that owns it, or else deleted memory may be accessed.
182   ParseableChunk* parseable_chunk_;
183   bool owned_;
184 
185   // Making these private avoids implicit copy constructor & assignment operator
186   DeltaWindowSection(const DeltaWindowSection&);
187   void operator=(const DeltaWindowSection&);
188 };
189 
190 // Used to parse the bytes and Varints that make up the delta file header
191 // or delta window header.
192 class VCDiffHeaderParser {
193  public:
194   // header_start should be the start of the header to be parsed;
195   // data_end is the position just after the last byte of available data
196   // (which may extend far past the end of the header.)
197   VCDiffHeaderParser(const char* header_start, const char* data_end);
198 
199   // One of these functions should be called for each element of the header.
200   // variable_description is a description of the value that we are attempting
201   // to parse, and will only be used to create descriptive error messages.
202   // If the function returns true, then the element was parsed successfully
203   // and its value has been placed in *value.  If the function returns false,
204   // then *value is unchanged, and GetResult() can be called to return the
205   // reason that the element could not be parsed, which will be either
206   // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end
207   // was reached before the end of the element to be parsed.)  Once one of these
208   // functions has returned false, further calls to any of the Parse...
209   // functions will also return false without performing any additional actions.
210   // Typical usage is as follows:
211   //     int32_t segment_length = 0;
212   //     if (!header_parser.ParseInt32("segment length", &segment_length)) {
213   //       return header_parser.GetResult();
214   //     }
215   //
216   // The following example takes advantage of the fact that calling a Parse...
217   // function after an error or end-of-data condition is legal and does nothing.
218   // It can thus parse more than one element in a row and check the status
219   // afterwards.  If the first call to ParseInt32() fails, the second will have
220   // no effect:
221   //
222   //     int32_t segment_length = 0, segment_position = 0;
223   //     header_parser.ParseInt32("segment length", &segment_length));
224   //     header_parser.ParseInt32("segment position", &segment_position));
225   //     if (RESULT_SUCCESS != header_parser.GetResult()) {
226   //       return header_parser.GetResult();
227   //     }
228   //
229   bool ParseByte(unsigned char* value);
230   bool ParseInt32(const char* variable_description, int32_t* value);
231   bool ParseUInt32(const char* variable_description, uint32_t* value);
232   bool ParseChecksum(const char* variable_description, VCDChecksum* value);
233   bool ParseSize(const char* variable_description, size_t* value);
234 
235   // Parses the first three elements of the delta window header:
236   //
237   //     Win_Indicator                            - byte
238   //     [Source segment size]                    - integer (VarintBE format)
239   //     [Source segment position]                - integer (VarintBE format)
240   //
241   // Returns true if the values were parsed successfully and the values were
242   // found to be acceptable.  Returns false otherwise, in which case
243   // GetResult() can be called to return the reason that the two values
244   // could not be validated.  This will be either RESULT_ERROR (an error
245   // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was
246   // reached before the end of the values to be parsed.)  If return value is
247   // true, then *win_indicator, *source_segment_length, and
248   // *source_segment_position are populated with the parsed values.  Otherwise,
249   // the values of these output arguments are undefined.
250   //
251   // dictionary_size: The size of the dictionary (source) file.  Used to
252   //     validate the limits of source_segment_length and
253   //     source_segment_position if the source segment is taken from the
254   //     dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.)
255   // decoded_target_size: The size of the target data that has been decoded
256   //     so far, including all target windows.  Used to validate the limits of
257   //     source_segment_length and source_segment_position if the source segment
258   //     is taken from the target (i.e., if the parsed *win_indicator equals
259   //     VCD_TARGET.)
260   // allow_vcd_target: If this argument is false, and the parsed *win_indicator
261   //     is VCD_TARGET, then an error is produced; if true, VCD_TARGET is
262   //     allowed.
263   // win_indicator (output): Points to a single unsigned char (not an array)
264   //     that will receive the parsed value of Win_Indicator.
265   // source_segment_length (output): The parsed length of the source segment.
266   // source_segment_position (output): The parsed zero-based index in the
267   //     source/target file from which the source segment is to be taken.
268   bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size,
269                                          size_t decoded_target_size,
270                                          bool allow_vcd_target,
271                                          unsigned char* win_indicator,
272                                          size_t* source_segment_length,
273                                          size_t* source_segment_position);
274 
275   // Parses the following two elements of the delta window header:
276   //
277   //     Length of the delta encoding             - integer (VarintBE format)
278   //     Size of the target window                - integer (VarintBE format)
279   //
280   // Return conditions and values are the same as for
281   // ParseWinIndicatorAndSourceSegment(), above.
282   //
283   bool ParseWindowLengths(size_t* target_window_length);
284 
285   // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS.
286   // Returns a pointer to the end of the delta window (which might not point to
287   // a valid memory location if there is insufficient input data.)
288   //
289   const char* EndOfDeltaWindow() const;
290 
291   // Parses the following element of the delta window header:
292   //
293   //     Delta_Indicator                          - byte
294   //
295   // Because none of the bits in Delta_Indicator are used by this implementation
296   // of VCDIFF, this function does not have an output argument to return the
297   // value of that field.  It may return RESULT_SUCCESS, RESULT_ERROR, or
298   // RESULT_END_OF_DATA as with the other Parse...() functions.
299   //
300   bool ParseDeltaIndicator();
301 
302   // Parses the following 3 elements of the delta window header:
303   //
304   //     Length of data for ADDs and RUNs - integer (VarintBE format)
305   //     Length of instructions and sizes - integer (VarintBE format)
306   //     Length of addresses for COPYs    - integer (VarintBE format)
307   //
308   // If has_checksum is true, it also looks for the following element:
309   //
310   //     Adler32 checksum            - unsigned 32-bit integer (VarintBE format)
311   //
312   // Return conditions and values are the same as for
313   // ParseWinIndicatorAndSourceSegment(), above.
314   //
315   bool ParseSectionLengths(bool has_checksum,
316                            size_t* add_and_run_data_length,
317                            size_t* instructions_and_sizes_length,
318                            size_t* addresses_length,
319                            VCDChecksum* checksum);
320 
321   // If one of the Parse... functions returned false, this function
322   // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA)
323   // describing the reason for the most recent parse failure.  If none of the
324   // Parse... functions has returned false, returns RESULT_SUCCESS.
GetResult()325   VCDiffResult GetResult() const {
326     return return_code_;
327   }
328 
329   // The following functions just pass their arguments to the underlying
330   // ParseableChunk object.
331 
End()332   const char* End() const {
333     return parseable_chunk_.End();
334   }
335 
UnparsedSize()336   size_t UnparsedSize() const {
337     return parseable_chunk_.UnparsedSize();
338   }
339 
ParsedSize()340   size_t ParsedSize() const {
341     return parseable_chunk_.ParsedSize();
342   }
343 
UnparsedData()344   const char* UnparsedData() const {
345     return parseable_chunk_.UnparsedData();
346   }
347 
348  private:
349   // Parses two variable-length integers representing the source segment length
350   // and source segment position (== offset.)  Checks whether the source segment
351   // length and position would cause it to exceed the size of the source file or
352   // target file.  Returns true if the values were parsed successfully and the
353   // values were found to be acceptable.  Returns false otherwise, in which case
354   // GetResult() can be called to return the reason that the two values could
355   // not be validated, which will be either RESULT_ERROR (an error occurred and
356   // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before
357   // the end of the integers to be parsed.)
358   // from_size: The requested size of the source segment.
359   // from_boundary_name: A NULL-terminated string naming the end of the
360   //     source or target file, used in error messages.
361   // from_name: A NULL-terminated string naming the source or target file,
362   //     also used in error messages.
363   // source_segment_length (output): The parsed length of the source segment.
364   // source_segment_position (output): The parsed zero-based index in the
365   //     source/target file from which the source segment is to be taken.
366   //
367   bool ParseSourceSegmentLengthAndPosition(size_t from_size,
368                                            const char* from_boundary_name,
369                                            const char* from_name,
370                                            size_t* source_segment_length,
371                                            size_t* source_segment_position);
372 
373   ParseableChunk parseable_chunk_;
374 
375   // Contains the result code of the last Parse...() operation that failed
376   // (RESULT_ERROR or RESULT_END_OF_DATA).  If no Parse...() method has been
377   // called, or if all calls to Parse...() were successful, then this contains
378   // RESULT_SUCCESS.
379   VCDiffResult return_code_;
380 
381   // Will be zero until ParseWindowLengths() has been called.  After
382   // ParseWindowLengths() has been called successfully, this contains the
383   // parsed length of the delta encoding.
384   size_t delta_encoding_length_;
385 
386   // Will be NULL until ParseWindowLengths() has been called.  After
387   // ParseWindowLengths() has been called successfully, this points to the
388   // beginning of the section of the current window titled "The delta encoding"
389   // in the RFC, i.e., to the position just after the length of the delta
390   // encoding.
391   const char* delta_encoding_start_;
392 
393   // Making these private avoids implicit copy constructor & assignment operator
394   VCDiffHeaderParser(const VCDiffHeaderParser&);
395   void operator=(const VCDiffHeaderParser&);
396 };
397 
398 }  // namespace open_vcdiff
399 
400 #endif  // OPEN_VCDIFF_HEADERPARSER_H_
401