1 // Copyright 2008 Google Inc. 2 // Author: Lincoln Smith 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 #ifndef OPEN_VCDIFF_HEADERPARSER_H_ 17 #define OPEN_VCDIFF_HEADERPARSER_H_ 18 19 #include <config.h> 20 #include <stddef.h> // NULL 21 #include <stdint.h> // int32_t, uint32_t 22 #include "checksum.h" // VCDChecksum 23 #include "vcdiff_defs.h" // VCDiffResult 24 25 namespace open_vcdiff { 26 27 // This class contains a contiguous memory buffer with start and end pointers, 28 // as well as a position pointer which shows how much of the buffer has been 29 // parsed and how much remains. 30 // 31 // Because no virtual destructor is defined for ParseableChunk, a pointer to 32 // a child class of ParseableChunk must be destroyed using its specific type, 33 // rather than as a ParseableChunk*. 34 class ParseableChunk { 35 public: ParseableChunk(const char * data_start,size_t data_size)36 ParseableChunk(const char* data_start, size_t data_size) { 37 SetDataBuffer(data_start, data_size); 38 } 39 End()40 const char* End() const { return end_; } 41 42 // The number of bytes remaining to be parsed. This is not necessarily the 43 // same as the initial size of the buffer; it changes with each call to 44 // Advance(). UnparsedSize()45 size_t UnparsedSize() const { 46 return end_ - position_; 47 } 48 49 // The number of bytes that have already been parsed. ParsedSize()50 size_t ParsedSize() const { 51 return position_ - start_; 52 } 53 Empty()54 bool Empty() const { return 0 == UnparsedSize(); } 55 56 // The start of the data remaining to be parsed. UnparsedData()57 const char* UnparsedData() const { return position_; } 58 59 // Returns a pointer to the start of the data remaining to be parsed. UnparsedDataAddr()60 const char** UnparsedDataAddr() { return &position_; } 61 62 // Moves the parsing position forward by number_of_bytes. 63 void Advance(size_t number_of_bytes); 64 65 // Jumps the parsing position to a new location. 66 void SetPosition(const char* position); 67 68 // Jumps the parsing position to the end of the data chunk. Finish()69 void Finish() { 70 position_ = end_; 71 } 72 73 // Jumps the parsing position so that there are now number_of_bytes 74 // bytes left to parse. This number should be smaller than the size of data 75 // to be parsed before the function was called. 76 void FinishExcept(size_t number_of_bytes); 77 SetDataBuffer(const char * data_start,size_t data_size)78 void SetDataBuffer(const char* data_start, size_t data_size) { 79 start_ = data_start; 80 end_ = data_start + data_size; 81 position_ = start_; 82 } 83 84 private: 85 const char* start_; 86 const char* end_; 87 88 // The current parsing position within the data chunk. 89 // Must always respect start_ <= position_ <= end_. 90 const char* position_; 91 92 // Making these private avoids implicit copy constructor & assignment operator 93 ParseableChunk(const ParseableChunk&); 94 void operator=(const ParseableChunk&); 95 }; 96 97 // Represents one of the three sections in the delta window, as described in 98 // RFC section 4.3: 99 // * Data section for ADDs and RUNs 100 // * Instructions and sizes section 101 // * Addresses section for COPYs 102 // When using the interleaved format, data and addresses are pulled from the 103 // instructions and sizes section rather than being stored in separate sections. 104 // For that reason, this class allows one DeltaWindowSection to be based on 105 // another, such that the same position pointer is shared by both sections; 106 // i.e., UnparsedDataAddr() returns the same value for both objects. 107 // To achieve this end, one extra level of indirection (a pointer to a 108 // ParseableChunk object) is added. 109 class DeltaWindowSection { 110 public: DeltaWindowSection()111 DeltaWindowSection() : parseable_chunk_(NULL), owned_(true) { } 112 ~DeltaWindowSection()113 ~DeltaWindowSection() { 114 FreeChunk(); 115 } 116 Init(const char * data_start,size_t data_size)117 void Init(const char* data_start, size_t data_size) { 118 if (owned_ && parseable_chunk_) { 119 // Reuse the already-allocated ParseableChunk object. 120 parseable_chunk_->SetDataBuffer(data_start, data_size); 121 } else { 122 parseable_chunk_ = new ParseableChunk(data_start, data_size); 123 owned_ = true; 124 } 125 } 126 Init(DeltaWindowSection * original)127 void Init(DeltaWindowSection* original) { 128 FreeChunk(); 129 parseable_chunk_ = original->parseable_chunk_; 130 owned_ = false; 131 } 132 Invalidate()133 void Invalidate() { FreeChunk(); } 134 IsOwned()135 bool IsOwned() const { return owned_; } 136 137 // The following functions just pass their arguments to the underlying 138 // ParseableChunk object. 139 End()140 const char* End() const { 141 return parseable_chunk_->End(); 142 } 143 UnparsedSize()144 size_t UnparsedSize() const { 145 return parseable_chunk_->UnparsedSize(); 146 } 147 ParsedSize()148 size_t ParsedSize() const { 149 return parseable_chunk_->ParsedSize(); 150 } 151 Empty()152 bool Empty() const { 153 return parseable_chunk_->Empty(); 154 } 155 UnparsedData()156 const char* UnparsedData() const { 157 return parseable_chunk_->UnparsedData(); 158 } 159 UnparsedDataAddr()160 const char** UnparsedDataAddr() { 161 return parseable_chunk_->UnparsedDataAddr(); 162 } 163 Advance(size_t number_of_bytes)164 void Advance(size_t number_of_bytes) { 165 return parseable_chunk_->Advance(number_of_bytes); 166 } 167 private: FreeChunk()168 void FreeChunk() { 169 if (owned_) { 170 delete parseable_chunk_; 171 } 172 parseable_chunk_ = NULL; 173 } 174 175 // Will be NULL until Init() has been called. If owned_ is true, this will 176 // point to a ParseableChunk object that has been allocated with "new" and 177 // must be deleted by this DeltaWindowSection object. If owned_ is false, 178 // this points at the parseable_chunk_ owned by a different DeltaWindowSection 179 // object. In this case, it is important to free the DeltaWindowSection which 180 // does not own the ParseableChunk before (or simultaneously to) freeing the 181 // DeltaWindowSection that owns it, or else deleted memory may be accessed. 182 ParseableChunk* parseable_chunk_; 183 bool owned_; 184 185 // Making these private avoids implicit copy constructor & assignment operator 186 DeltaWindowSection(const DeltaWindowSection&); 187 void operator=(const DeltaWindowSection&); 188 }; 189 190 // Used to parse the bytes and Varints that make up the delta file header 191 // or delta window header. 192 class VCDiffHeaderParser { 193 public: 194 // header_start should be the start of the header to be parsed; 195 // data_end is the position just after the last byte of available data 196 // (which may extend far past the end of the header.) 197 VCDiffHeaderParser(const char* header_start, const char* data_end); 198 199 // One of these functions should be called for each element of the header. 200 // variable_description is a description of the value that we are attempting 201 // to parse, and will only be used to create descriptive error messages. 202 // If the function returns true, then the element was parsed successfully 203 // and its value has been placed in *value. If the function returns false, 204 // then *value is unchanged, and GetResult() can be called to return the 205 // reason that the element could not be parsed, which will be either 206 // RESULT_ERROR (an error occurred), or RESULT_END_OF_DATA (the limit data_end 207 // was reached before the end of the element to be parsed.) Once one of these 208 // functions has returned false, further calls to any of the Parse... 209 // functions will also return false without performing any additional actions. 210 // Typical usage is as follows: 211 // int32_t segment_length = 0; 212 // if (!header_parser.ParseInt32("segment length", &segment_length)) { 213 // return header_parser.GetResult(); 214 // } 215 // 216 // The following example takes advantage of the fact that calling a Parse... 217 // function after an error or end-of-data condition is legal and does nothing. 218 // It can thus parse more than one element in a row and check the status 219 // afterwards. If the first call to ParseInt32() fails, the second will have 220 // no effect: 221 // 222 // int32_t segment_length = 0, segment_position = 0; 223 // header_parser.ParseInt32("segment length", &segment_length)); 224 // header_parser.ParseInt32("segment position", &segment_position)); 225 // if (RESULT_SUCCESS != header_parser.GetResult()) { 226 // return header_parser.GetResult(); 227 // } 228 // 229 bool ParseByte(unsigned char* value); 230 bool ParseInt32(const char* variable_description, int32_t* value); 231 bool ParseUInt32(const char* variable_description, uint32_t* value); 232 bool ParseChecksum(const char* variable_description, VCDChecksum* value); 233 bool ParseSize(const char* variable_description, size_t* value); 234 235 // Parses the first three elements of the delta window header: 236 // 237 // Win_Indicator - byte 238 // [Source segment size] - integer (VarintBE format) 239 // [Source segment position] - integer (VarintBE format) 240 // 241 // Returns true if the values were parsed successfully and the values were 242 // found to be acceptable. Returns false otherwise, in which case 243 // GetResult() can be called to return the reason that the two values 244 // could not be validated. This will be either RESULT_ERROR (an error 245 // occurred and was logged), or RESULT_END_OF_DATA (the limit data_end was 246 // reached before the end of the values to be parsed.) If return value is 247 // true, then *win_indicator, *source_segment_length, and 248 // *source_segment_position are populated with the parsed values. Otherwise, 249 // the values of these output arguments are undefined. 250 // 251 // dictionary_size: The size of the dictionary (source) file. Used to 252 // validate the limits of source_segment_length and 253 // source_segment_position if the source segment is taken from the 254 // dictionary (i.e., if the parsed *win_indicator equals VCD_SOURCE.) 255 // decoded_target_size: The size of the target data that has been decoded 256 // so far, including all target windows. Used to validate the limits of 257 // source_segment_length and source_segment_position if the source segment 258 // is taken from the target (i.e., if the parsed *win_indicator equals 259 // VCD_TARGET.) 260 // allow_vcd_target: If this argument is false, and the parsed *win_indicator 261 // is VCD_TARGET, then an error is produced; if true, VCD_TARGET is 262 // allowed. 263 // win_indicator (output): Points to a single unsigned char (not an array) 264 // that will receive the parsed value of Win_Indicator. 265 // source_segment_length (output): The parsed length of the source segment. 266 // source_segment_position (output): The parsed zero-based index in the 267 // source/target file from which the source segment is to be taken. 268 bool ParseWinIndicatorAndSourceSegment(size_t dictionary_size, 269 size_t decoded_target_size, 270 bool allow_vcd_target, 271 unsigned char* win_indicator, 272 size_t* source_segment_length, 273 size_t* source_segment_position); 274 275 // Parses the following two elements of the delta window header: 276 // 277 // Length of the delta encoding - integer (VarintBE format) 278 // Size of the target window - integer (VarintBE format) 279 // 280 // Return conditions and values are the same as for 281 // ParseWinIndicatorAndSourceSegment(), above. 282 // 283 bool ParseWindowLengths(size_t* target_window_length); 284 285 // May only be called after ParseWindowLengths() has returned RESULT_SUCCESS. 286 // Returns a pointer to the end of the delta window (which might not point to 287 // a valid memory location if there is insufficient input data.) 288 // 289 const char* EndOfDeltaWindow() const; 290 291 // Parses the following element of the delta window header: 292 // 293 // Delta_Indicator - byte 294 // 295 // Because none of the bits in Delta_Indicator are used by this implementation 296 // of VCDIFF, this function does not have an output argument to return the 297 // value of that field. It may return RESULT_SUCCESS, RESULT_ERROR, or 298 // RESULT_END_OF_DATA as with the other Parse...() functions. 299 // 300 bool ParseDeltaIndicator(); 301 302 // Parses the following 3 elements of the delta window header: 303 // 304 // Length of data for ADDs and RUNs - integer (VarintBE format) 305 // Length of instructions and sizes - integer (VarintBE format) 306 // Length of addresses for COPYs - integer (VarintBE format) 307 // 308 // If has_checksum is true, it also looks for the following element: 309 // 310 // Adler32 checksum - unsigned 32-bit integer (VarintBE format) 311 // 312 // Return conditions and values are the same as for 313 // ParseWinIndicatorAndSourceSegment(), above. 314 // 315 bool ParseSectionLengths(bool has_checksum, 316 size_t* add_and_run_data_length, 317 size_t* instructions_and_sizes_length, 318 size_t* addresses_length, 319 VCDChecksum* checksum); 320 321 // If one of the Parse... functions returned false, this function 322 // can be used to find the result code (RESULT_ERROR or RESULT_END_OF_DATA) 323 // describing the reason for the most recent parse failure. If none of the 324 // Parse... functions has returned false, returns RESULT_SUCCESS. GetResult()325 VCDiffResult GetResult() const { 326 return return_code_; 327 } 328 329 // The following functions just pass their arguments to the underlying 330 // ParseableChunk object. 331 End()332 const char* End() const { 333 return parseable_chunk_.End(); 334 } 335 UnparsedSize()336 size_t UnparsedSize() const { 337 return parseable_chunk_.UnparsedSize(); 338 } 339 ParsedSize()340 size_t ParsedSize() const { 341 return parseable_chunk_.ParsedSize(); 342 } 343 UnparsedData()344 const char* UnparsedData() const { 345 return parseable_chunk_.UnparsedData(); 346 } 347 348 private: 349 // Parses two variable-length integers representing the source segment length 350 // and source segment position (== offset.) Checks whether the source segment 351 // length and position would cause it to exceed the size of the source file or 352 // target file. Returns true if the values were parsed successfully and the 353 // values were found to be acceptable. Returns false otherwise, in which case 354 // GetResult() can be called to return the reason that the two values could 355 // not be validated, which will be either RESULT_ERROR (an error occurred and 356 // was logged), or RESULT_END_OF_DATA (the limit data_end was reached before 357 // the end of the integers to be parsed.) 358 // from_size: The requested size of the source segment. 359 // from_boundary_name: A NULL-terminated string naming the end of the 360 // source or target file, used in error messages. 361 // from_name: A NULL-terminated string naming the source or target file, 362 // also used in error messages. 363 // source_segment_length (output): The parsed length of the source segment. 364 // source_segment_position (output): The parsed zero-based index in the 365 // source/target file from which the source segment is to be taken. 366 // 367 bool ParseSourceSegmentLengthAndPosition(size_t from_size, 368 const char* from_boundary_name, 369 const char* from_name, 370 size_t* source_segment_length, 371 size_t* source_segment_position); 372 373 ParseableChunk parseable_chunk_; 374 375 // Contains the result code of the last Parse...() operation that failed 376 // (RESULT_ERROR or RESULT_END_OF_DATA). If no Parse...() method has been 377 // called, or if all calls to Parse...() were successful, then this contains 378 // RESULT_SUCCESS. 379 VCDiffResult return_code_; 380 381 // Will be zero until ParseWindowLengths() has been called. After 382 // ParseWindowLengths() has been called successfully, this contains the 383 // parsed length of the delta encoding. 384 size_t delta_encoding_length_; 385 386 // Will be NULL until ParseWindowLengths() has been called. After 387 // ParseWindowLengths() has been called successfully, this points to the 388 // beginning of the section of the current window titled "The delta encoding" 389 // in the RFC, i.e., to the position just after the length of the delta 390 // encoding. 391 const char* delta_encoding_start_; 392 393 // Making these private avoids implicit copy constructor & assignment operator 394 VCDiffHeaderParser(const VCDiffHeaderParser&); 395 void operator=(const VCDiffHeaderParser&); 396 }; 397 398 } // namespace open_vcdiff 399 400 #endif // OPEN_VCDIFF_HEADERPARSER_H_ 401