1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include <config.h>
17 #include "google/vcencoder.h"
18 #include <stdlib.h> // free, posix_memalign
19 #include <string.h> // memcpy
20 #include <algorithm>
21 #include <string>
22 #include <vector>
23 #include "blockhash.h"
24 #include "checksum.h"
25 #include "testing.h"
26 #include "varint_bigendian.h"
27 #include "google/vcdecoder.h"
28 #include "vcdiff_defs.h"
29
30 #ifdef HAVE_EXT_ROPE
31 #include <ext/rope>
32 #include "output_string_crope.h"
33 using __gnu_cxx::crope;
34 #endif // HAVE_EXT_ROPE
35
36 #ifdef HAVE_MALLOC_H
37 #include <malloc.h>
38 #endif // HAVE_MALLOC_H
39
40 #ifdef HAVE_SYS_MMAN_H
41 #define _XOPEN_SOURCE 600 // posix_memalign
42 #include <sys/mman.h> // mprotect
43 #endif // HAVE_SYS_MMAN_H
44
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h> // getpagesize
47 #endif // HAVE_UNISTD_H
48
49 namespace open_vcdiff {
50 namespace {
51
52 static const size_t kFileHeaderSize = sizeof(DeltaFileHeader);
53
54 // This is to check the maximum possible encoding size
55 // if using a single ADD instruction, so assume that the
56 // dictionary size, the length of the ADD data, the size
57 // of the target window, and the length of the delta window
58 // are all two-byte Varints, that is, 128 <= length < 4096.
59 // This figure includes three extra bytes for a zero-sized
60 // ADD instruction with a two-byte Varint explicit size.
61 // Any additional COPY & ADD instructions must reduce
62 // the length of the encoding from this maximum.
63 static const size_t kWindowHeaderSize = 21;
64
65 class VerifyEncodedBytesTest : public testing::Test {
66 public:
67 typedef std::string string;
68
VerifyEncodedBytesTest()69 VerifyEncodedBytesTest() : delta_index_(0) { }
~VerifyEncodedBytesTest()70 virtual ~VerifyEncodedBytesTest() { }
71
ExpectByte(unsigned char b)72 void ExpectByte(unsigned char b) {
73 EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_]));
74 ++delta_index_;
75 }
76
ExpectString(const char * s)77 void ExpectString(const char* s) {
78 const size_t size = strlen(s); // don't include terminating NULL char
79 EXPECT_EQ(string(s, size),
80 string(delta_data() + delta_index_, size));
81 delta_index_ += size;
82 }
83
ExpectNoMoreBytes()84 void ExpectNoMoreBytes() {
85 EXPECT_EQ(delta_index_, delta_size());
86 }
87
ExpectSize(size_t size)88 void ExpectSize(size_t size) {
89 const char* delta_size_pos = &delta_[delta_index_];
90 EXPECT_EQ(size,
91 static_cast<size_t>(
92 VarintBE<int32_t>::Parse(delta_data() + delta_size(),
93 &delta_size_pos)));
94 delta_index_ = delta_size_pos - delta_data();
95 }
96
ExpectChecksum(VCDChecksum checksum)97 void ExpectChecksum(VCDChecksum checksum) {
98 const char* delta_checksum_pos = &delta_[delta_index_];
99 EXPECT_EQ(checksum,
100 static_cast<VCDChecksum>(
101 VarintBE<int64_t>::Parse(delta_data() + delta_size(),
102 &delta_checksum_pos)));
103 delta_index_ = delta_checksum_pos - delta_data();
104 }
105
delta_as_const() const106 const string& delta_as_const() const { return delta_; }
delta()107 string* delta() { return &delta_; }
108
delta_data() const109 const char* delta_data() const { return delta_as_const().data(); }
delta_size() const110 size_t delta_size() const { return delta_as_const().size(); }
111
112 private:
113 string delta_;
114 size_t delta_index_;
115 };
116
117 class VCDiffEncoderTest : public VerifyEncodedBytesTest {
118 protected:
119 static const char kDictionary[];
120 static const char kTarget[];
121
122 VCDiffEncoderTest();
~VCDiffEncoderTest()123 virtual ~VCDiffEncoderTest() { }
124
125 void TestWithFixedChunkSize(size_t chunk_size);
126 void TestWithEncodedChunkVector(size_t chunk_size);
127
128 HashedDictionary hashed_dictionary_;
129 VCDiffStreamingEncoder encoder_;
130 VCDiffStreamingDecoder decoder_;
131 VCDiffEncoder simple_encoder_;
132 VCDiffDecoder simple_decoder_;
133
134 string result_target_;
135 };
136
137 const char VCDiffEncoderTest::kDictionary[] =
138 "\"Just the place for a Snark!\" the Bellman cried,\n"
139 "As he landed his crew with care;\n"
140 "Supporting each man on the top of the tide\n"
141 "By a finger entwined in his hair.\n";
142
143 const char VCDiffEncoderTest::kTarget[] =
144 "\"Just the place for a Snark! I have said it twice:\n"
145 "That alone should encourage the crew.\n"
146 "Just the place for a Snark! I have said it thrice:\n"
147 "What I tell you three times is true.\"\n";
148
VCDiffEncoderTest()149 VCDiffEncoderTest::VCDiffEncoderTest()
150 : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
151 encoder_(&hashed_dictionary_,
152 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
153 /* look_for_target_matches = */ true),
154 simple_encoder_(kDictionary, sizeof(kDictionary)) {
155 EXPECT_TRUE(hashed_dictionary_.Init());
156 }
157
TEST_F(VCDiffEncoderTest,EncodeBeforeStartEncoding)158 TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) {
159 EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
160 }
161
TEST_F(VCDiffEncoderTest,FinishBeforeStartEncoding)162 TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) {
163 EXPECT_FALSE(encoder_.FinishEncoding(delta()));
164 }
165
TEST_F(VCDiffEncoderTest,EncodeDecodeNothing)166 TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) {
167 HashedDictionary nothing_dictionary("", 0);
168 EXPECT_TRUE(nothing_dictionary.Init());
169 VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary,
170 VCD_STANDARD_FORMAT,
171 false);
172 EXPECT_TRUE(nothing_encoder.StartEncoding(delta()));
173 EXPECT_TRUE(nothing_encoder.FinishEncoding(delta()));
174 decoder_.StartDecoding("", 0);
175 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
176 delta_size(),
177 &result_target_));
178 EXPECT_TRUE(decoder_.FinishDecoding());
179 EXPECT_TRUE(result_target_.empty());
180 }
181
182 // A NULL dictionary pointer is legal as long as the dictionary size is 0.
TEST_F(VCDiffEncoderTest,EncodeDecodeNullDictionaryPtr)183 TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) {
184 HashedDictionary null_dictionary(NULL, 0);
185 EXPECT_TRUE(null_dictionary.Init());
186 VCDiffStreamingEncoder null_encoder(&null_dictionary,
187 VCD_STANDARD_FORMAT,
188 false);
189 EXPECT_TRUE(null_encoder.StartEncoding(delta()));
190 EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta()));
191 EXPECT_TRUE(null_encoder.FinishEncoding(delta()));
192 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
193 delta_size());
194 decoder_.StartDecoding(NULL, 0);
195 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
196 delta_size(),
197 &result_target_));
198 EXPECT_TRUE(decoder_.FinishDecoding());
199 EXPECT_EQ(kTarget, result_target_);
200 }
201
TEST_F(VCDiffEncoderTest,EncodeDecodeSimple)202 TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) {
203 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
204 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
205 delta_size());
206 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
207 sizeof(kDictionary),
208 delta_as_const(),
209 &result_target_));
210 EXPECT_EQ(kTarget, result_target_);
211 }
212
TEST_F(VCDiffEncoderTest,EncodeDecodeInterleaved)213 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) {
214 simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED);
215 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
216 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
217 delta_size());
218 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
219 sizeof(kDictionary),
220 delta_as_const(),
221 &result_target_));
222 EXPECT_EQ(kTarget, result_target_);
223 }
224
TEST_F(VCDiffEncoderTest,EncodeDecodeInterleavedChecksum)225 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) {
226 simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM);
227 EXPECT_TRUE(simple_encoder_.Encode(kTarget,
228 strlen(kTarget),
229 delta()));
230 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
231 delta_size());
232 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
233 sizeof(kDictionary),
234 delta_as_const(),
235 &result_target_));
236 EXPECT_EQ(kTarget, result_target_);
237 }
238
TEST_F(VCDiffEncoderTest,EncodeDecodeSingleChunk)239 TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) {
240 EXPECT_TRUE(encoder_.StartEncoding(delta()));
241 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
242 EXPECT_TRUE(encoder_.FinishEncoding(delta()));
243 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
244 delta_size());
245 decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
246 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
247 delta_size(),
248 &result_target_));
249 EXPECT_TRUE(decoder_.FinishDecoding());
250 EXPECT_EQ(kTarget, result_target_);
251 }
252
TEST_F(VCDiffEncoderTest,EncodeDecodeSeparate)253 TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) {
254 string delta_start, delta_encode, delta_finish;
255 EXPECT_TRUE(encoder_.StartEncoding(&delta_start));
256 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode));
257 EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish));
258 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
259 delta_start.size() + delta_encode.size() + delta_finish.size());
260 decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
261 EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(),
262 delta_start.size(),
263 &result_target_));
264 EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(),
265 delta_encode.size(),
266 &result_target_));
267 EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(),
268 delta_finish.size(),
269 &result_target_));
270 EXPECT_TRUE(decoder_.FinishDecoding());
271 EXPECT_EQ(kTarget, result_target_);
272 }
273
274 #ifdef HAVE_EXT_ROPE
275 // Test that the crope class can be used in place of a string for encoding
276 // and decoding.
TEST_F(VCDiffEncoderTest,EncodeDecodeCrope)277 TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) {
278 crope delta_crope, result_crope;
279 EXPECT_TRUE(encoder_.StartEncoding(&delta_crope));
280 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope));
281 EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope));
282 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
283 delta_crope.size());
284 decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
285 // crope can't guarantee that its characters are contiguous, so the decoding
286 // has to be done byte-by-byte.
287 for (crope::const_iterator it = delta_crope.begin();
288 it != delta_crope.end(); it++) {
289 const char this_char = *it;
290 EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope));
291 }
292 EXPECT_TRUE(decoder_.FinishDecoding());
293 crope expected_target(kTarget);
294 EXPECT_EQ(expected_target, result_crope);
295 }
296 #endif // HAVE_EXT_ROPE
297
TestWithFixedChunkSize(size_t chunk_size)298 void VCDiffEncoderTest::TestWithFixedChunkSize(size_t chunk_size) {
299 delta()->clear();
300 EXPECT_TRUE(encoder_.StartEncoding(delta()));
301 for (size_t chunk_start_index = 0;
302 chunk_start_index < strlen(kTarget);
303 chunk_start_index += chunk_size) {
304 size_t this_chunk_size = chunk_size;
305 const size_t bytes_available = strlen(kTarget) - chunk_start_index;
306 if (this_chunk_size > bytes_available) {
307 this_chunk_size = bytes_available;
308 }
309 EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
310 this_chunk_size,
311 delta()));
312 }
313 EXPECT_TRUE(encoder_.FinishEncoding(delta()));
314 const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
315 const size_t size_of_windows =
316 strlen(kTarget) + (kWindowHeaderSize * num_windows);
317 EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size());
318 result_target_.clear();
319 decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
320 for (size_t chunk_start_index = 0;
321 chunk_start_index < delta_size();
322 chunk_start_index += chunk_size) {
323 size_t this_chunk_size = chunk_size;
324 const size_t bytes_available = delta_size() - chunk_start_index;
325 if (this_chunk_size > bytes_available) {
326 this_chunk_size = bytes_available;
327 }
328 EXPECT_TRUE(decoder_.DecodeChunk(delta_data() + chunk_start_index,
329 this_chunk_size,
330 &result_target_));
331 }
332 EXPECT_TRUE(decoder_.FinishDecoding());
333 EXPECT_EQ(kTarget, result_target_);
334 }
335
TEST_F(VCDiffEncoderTest,EncodeDecodeFixedChunkSizes)336 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) {
337 // These specific chunk sizes have failed in the past
338 TestWithFixedChunkSize(6);
339 TestWithFixedChunkSize(45);
340 TestWithFixedChunkSize(60);
341
342 // Now loop through all possible chunk sizes
343 for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
344 TestWithFixedChunkSize(chunk_size);
345 }
346 }
347
348 // If --allow_vcd_target=false is specified, the decoder will throw away some of
349 // the internally-stored decoded target beyond the current window. Try
350 // different numbers of encoded window sizes to make sure that this behavior
351 // does not affect the results.
TEST_F(VCDiffEncoderTest,EncodeDecodeFixedChunkSizesNoVcdTarget)352 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) {
353 decoder_.SetAllowVcdTarget(false);
354 // Loop through all possible chunk sizes
355 for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
356 TestWithFixedChunkSize(chunk_size);
357 }
358 }
359
360 // Splits the text to be encoded into fixed-size chunks. Encodes each
361 // chunk and puts it into a vector of strings. Then decodes each string
362 // in the vector and appends the result into result_target_.
TestWithEncodedChunkVector(size_t chunk_size)363 void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) {
364 std::vector<string> encoded_chunks;
365 string this_encoded_chunk;
366 size_t total_chunk_size = 0;
367 EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk));
368 encoded_chunks.push_back(this_encoded_chunk);
369 total_chunk_size += this_encoded_chunk.size();
370 for (size_t chunk_start_index = 0;
371 chunk_start_index < strlen(kTarget);
372 chunk_start_index += chunk_size) {
373 size_t this_chunk_size = chunk_size;
374 const size_t bytes_available = strlen(kTarget) - chunk_start_index;
375 if (this_chunk_size > bytes_available) {
376 this_chunk_size = bytes_available;
377 }
378 this_encoded_chunk.clear();
379 EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
380 this_chunk_size,
381 &this_encoded_chunk));
382 encoded_chunks.push_back(this_encoded_chunk);
383 total_chunk_size += this_encoded_chunk.size();
384 }
385 this_encoded_chunk.clear();
386 EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk));
387 encoded_chunks.push_back(this_encoded_chunk);
388 total_chunk_size += this_encoded_chunk.size();
389 const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
390 const size_t size_of_windows =
391 strlen(kTarget) + (kWindowHeaderSize * num_windows);
392 EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size);
393 result_target_.clear();
394 decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
395 for (std::vector<string>::iterator it = encoded_chunks.begin();
396 it != encoded_chunks.end(); ++it) {
397 EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_));
398 }
399 EXPECT_TRUE(decoder_.FinishDecoding());
400 EXPECT_EQ(kTarget, result_target_);
401 }
402
TEST_F(VCDiffEncoderTest,EncodeDecodeStreamOfChunks)403 TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) {
404 // Loop through all possible chunk sizes
405 for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
406 TestWithEncodedChunkVector(chunk_size);
407 }
408 }
409
410 // Verify that HashedDictionary stores a copy of the dictionary text,
411 // rather than just storing a pointer to it. If the dictionary buffer
412 // is overwritten after creating a HashedDictionary from it, it shouldn't
413 // affect an encoder that uses that HashedDictionary.
TEST_F(VCDiffEncoderTest,DictionaryBufferOverwritten)414 TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) {
415 string dictionary_copy(kDictionary, sizeof(kDictionary));
416 HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size());
417 EXPECT_TRUE(hd_copy.Init());
418 VCDiffStreamingEncoder copy_encoder(&hd_copy,
419 VCD_FORMAT_INTERLEAVED
420 | VCD_FORMAT_CHECKSUM,
421 /* look_for_target_matches = */ true);
422 // Produce a reference version of the encoded text.
423 string delta_before;
424 EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before));
425 EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget,
426 strlen(kTarget),
427 &delta_before));
428 EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before));
429 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
430 delta_before.size());
431
432 // Overwrite the dictionary text with all 'Q' characters.
433 dictionary_copy.replace(0,
434 dictionary_copy.size(),
435 dictionary_copy.size(),
436 'Q');
437 // When the encoder is used on the same target text after overwriting
438 // the dictionary, it should produce the same encoded output.
439 string delta_after;
440 EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after));
441 EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after));
442 EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after));
443 EXPECT_EQ(delta_before, delta_after);
444 }
445
446 // Binary data test part 1: The dictionary and target data should not
447 // be treated as NULL-terminated. An embedded NULL should be handled like
448 // any other byte of data.
TEST_F(VCDiffEncoderTest,DictionaryHasEmbeddedNULLs)449 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) {
450 const char embedded_null_dictionary_text[] =
451 { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 };
452 const char embedded_null_target[] =
453 { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 };
454 CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
455 CHECK_EQ(8, sizeof(embedded_null_target));
456 HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
457 sizeof(embedded_null_dictionary_text));
458 EXPECT_TRUE(embedded_null_dictionary.Init());
459 VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
460 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
461 /* look_for_target_matches = */ true);
462 EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
463 EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
464 sizeof(embedded_null_target),
465 delta()));
466 EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
467 decoder_.StartDecoding(embedded_null_dictionary_text,
468 sizeof(embedded_null_dictionary_text));
469 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
470 delta_size(),
471 &result_target_));
472 EXPECT_TRUE(decoder_.FinishDecoding());
473 EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
474 EXPECT_EQ(string(embedded_null_target,
475 sizeof(embedded_null_target)),
476 result_target_);
477 }
478
479 // Binary data test part 2: An embedded CR or LF should be handled like
480 // any other byte of data. No text-processing of the data should occur.
TEST_F(VCDiffEncoderTest,DictionaryHasEmbeddedNewlines)481 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) {
482 const char embedded_null_dictionary_text[] =
483 { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A };
484 const char embedded_null_target[] =
485 { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 };
486 CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
487 CHECK_EQ(8, sizeof(embedded_null_target));
488 HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
489 sizeof(embedded_null_dictionary_text));
490 EXPECT_TRUE(embedded_null_dictionary.Init());
491 VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
492 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
493 /* look_for_target_matches = */ true);
494 EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
495 EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
496 sizeof(embedded_null_target),
497 delta()));
498 EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
499 decoder_.StartDecoding(embedded_null_dictionary_text,
500 sizeof(embedded_null_dictionary_text));
501 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
502 delta_size(),
503 &result_target_));
504 EXPECT_TRUE(decoder_.FinishDecoding());
505 EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
506 EXPECT_EQ(string(embedded_null_target,
507 sizeof(embedded_null_target)),
508 result_target_);
509 }
510
TEST_F(VCDiffEncoderTest,UsingWideCharacters)511 TEST_F(VCDiffEncoderTest, UsingWideCharacters) {
512 const wchar_t wchar_dictionary_text[] =
513 L"\"Just the place for a Snark!\" the Bellman cried,\n"
514 L"As he landed his crew with care;\n"
515 L"Supporting each man on the top of the tide\n"
516 L"By a finger entwined in his hair.\n";
517
518 const wchar_t wchar_target[] =
519 L"\"Just the place for a Snark! I have said it twice:\n"
520 L"That alone should encourage the crew.\n"
521 L"Just the place for a Snark! I have said it thrice:\n"
522 L"What I tell you three times is true.\"\n";
523
524 HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text,
525 sizeof(wchar_dictionary_text));
526 EXPECT_TRUE(wchar_dictionary.Init());
527 VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary,
528 VCD_FORMAT_INTERLEAVED
529 | VCD_FORMAT_CHECKSUM,
530 /* look_for_target_matches = */ false);
531 EXPECT_TRUE(wchar_encoder.StartEncoding(delta()));
532 EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target,
533 sizeof(wchar_target),
534 delta()));
535 EXPECT_TRUE(wchar_encoder.FinishEncoding(delta()));
536 decoder_.StartDecoding((const char*) wchar_dictionary_text,
537 sizeof(wchar_dictionary_text));
538 EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
539 delta_size(),
540 &result_target_));
541 EXPECT_TRUE(decoder_.FinishDecoding());
542 const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data();
543 EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar));
544 EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar));
545 }
546
547 #if defined(HAVE_MPROTECT) && \
548 (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN))
549 // Bug 1220602: Make sure the encoder doesn't read past the end of the input
550 // buffer.
TEST_F(VCDiffEncoderTest,ShouldNotReadPastEndOfBuffer)551 TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) {
552 const size_t target_size = strlen(kTarget);
553
554 // Allocate two memory pages.
555 const int page_size = getpagesize();
556 void* two_pages = NULL;
557 #ifdef HAVE_POSIX_MEMALIGN
558 posix_memalign(&two_pages, page_size, 2 * page_size);
559 #else // !HAVE_POSIX_MEMALIGN
560 two_pages = memalign(page_size, 2 * page_size);
561 #endif // HAVE_POSIX_MEMALIGN
562 char* const first_page = reinterpret_cast<char*>(two_pages);
563 char* const second_page = first_page + page_size;
564
565 // Place the target string at the end of the first page.
566 char* const target_with_guard = second_page - target_size;
567 memcpy(target_with_guard, kTarget, target_size);
568
569 // Make the second page unreadable.
570 mprotect(second_page, page_size, PROT_NONE);
571
572 // Now perform the encode operation, which will cause a segmentation fault
573 // if it reads past the end of the buffer.
574 EXPECT_TRUE(encoder_.StartEncoding(delta()));
575 EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
576 EXPECT_TRUE(encoder_.FinishEncoding(delta()));
577
578 // Undo the mprotect.
579 mprotect(second_page, page_size, PROT_READ|PROT_WRITE);
580 free(two_pages);
581 }
582
TEST_F(VCDiffEncoderTest,ShouldNotReadPastBeginningOfBuffer)583 TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) {
584 const size_t target_size = strlen(kTarget);
585
586 // Allocate two memory pages.
587 const int page_size = getpagesize();
588 void* two_pages = NULL;
589 #ifdef HAVE_POSIX_MEMALIGN
590 posix_memalign(&two_pages, page_size, 2 * page_size);
591 #else // !HAVE_POSIX_MEMALIGN
592 two_pages = memalign(page_size, 2 * page_size);
593 #endif // HAVE_POSIX_MEMALIGN
594 char* const first_page = reinterpret_cast<char*>(two_pages);
595 char* const second_page = first_page + page_size;
596
597 // Make the first page unreadable.
598 mprotect(first_page, page_size, PROT_NONE);
599
600 // Place the target string at the beginning of the second page.
601 char* const target_with_guard = second_page;
602 memcpy(target_with_guard, kTarget, target_size);
603
604 // Now perform the encode operation, which will cause a segmentation fault
605 // if it reads past the beginning of the buffer.
606 EXPECT_TRUE(encoder_.StartEncoding(delta()));
607 EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
608 EXPECT_TRUE(encoder_.FinishEncoding(delta()));
609
610 // Undo the mprotect.
611 mprotect(first_page, page_size, PROT_READ|PROT_WRITE);
612 free(two_pages);
613 }
614 #endif // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN)
615
616 class VCDiffMatchCountTest : public VerifyEncodedBytesTest {
617 protected:
~VCDiffMatchCountTest()618 virtual ~VCDiffMatchCountTest() { }
619
ExpectMatch(size_t match_size)620 void ExpectMatch(size_t match_size) {
621 if (match_size >= expected_match_counts_.size()) {
622 // Be generous to avoid resizing again
623 expected_match_counts_.resize(match_size * 2, 0);
624 }
625 ++expected_match_counts_[match_size];
626 }
627
VerifyMatchCounts()628 void VerifyMatchCounts() {
629 EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
630 expected_match_counts_.end(),
631 actual_match_counts_.begin()));
632 }
633
634 std::vector<int> expected_match_counts_;
635 std::vector<int> actual_match_counts_;
636 };
637
638 class VCDiffHTML1Test : public VCDiffMatchCountTest {
639 protected:
640 static const char kDictionary[];
641 static const char kTarget[];
642 static const char kRedundantTarget[];
643
644 VCDiffHTML1Test();
~VCDiffHTML1Test()645 virtual ~VCDiffHTML1Test() { }
646
647 void SimpleEncode();
648 void StreamingEncode();
649
650 HashedDictionary hashed_dictionary_;
651 VCDiffStreamingEncoder encoder_;
652 VCDiffStreamingDecoder decoder_;
653 VCDiffEncoder simple_encoder_;
654 VCDiffDecoder simple_decoder_;
655
656 string result_target_;
657 };
658
659 const char VCDiffHTML1Test::kDictionary[] =
660 "<html><font color=red>This part from the dict</font><br>";
661
662 const char VCDiffHTML1Test::kTarget[] =
663 "<html><font color=red>This part from the dict</font><br>\n"
664 "And this part is not...</html>";
665
666 const char VCDiffHTML1Test::kRedundantTarget[] =
667 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
668 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
669 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
670 "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; // 256
671
VCDiffHTML1Test()672 VCDiffHTML1Test::VCDiffHTML1Test()
673 : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
674 encoder_(&hashed_dictionary_,
675 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
676 /* look_for_target_matches = */ true),
677 simple_encoder_(kDictionary, sizeof(kDictionary)) {
678 EXPECT_TRUE(hashed_dictionary_.Init());
679 }
680
SimpleEncode()681 void VCDiffHTML1Test::SimpleEncode() {
682 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
683 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
684 delta_size());
685 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
686 sizeof(kDictionary),
687 delta_as_const(),
688 &result_target_));
689 EXPECT_EQ(kTarget, result_target_);
690 }
691
StreamingEncode()692 void VCDiffHTML1Test::StreamingEncode() {
693 EXPECT_TRUE(encoder_.StartEncoding(delta()));
694 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
695 EXPECT_TRUE(encoder_.FinishEncoding(delta()));
696 }
697
TEST_F(VCDiffHTML1Test,CheckOutputOfSimpleEncoder)698 TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) {
699 SimpleEncode();
700 // These values do not depend on the block size used for encoding
701 ExpectByte(0xD6); // 'V' | 0x80
702 ExpectByte(0xC3); // 'C' | 0x80
703 ExpectByte(0xC4); // 'D' | 0x80
704 ExpectByte(0x00); // Simple encoder never uses interleaved format
705 ExpectByte(0x00); // Hdr_Indicator
706 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary)
707 ExpectByte(sizeof(kDictionary)); // Dictionary length
708 ExpectByte(0x00); // Source segment position: start of dictionary
709 if (BlockHash::kBlockSize < 16) {
710 // A medium block size will catch the "his part " match.
711 ExpectByte(0x22); // Length of the delta encoding
712 ExpectSize(strlen(kTarget)); // Size of the target window
713 ExpectByte(0x00); // Delta_indicator (no compression)
714 ExpectByte(0x16); // Length of the data section
715 ExpectByte(0x05); // Length of the instructions section
716 ExpectByte(0x02); // Length of the address section
717 // Data section
718 ExpectString("\nAnd t"); // Data for 1st ADD
719 ExpectString("is not...</html>"); // Data for 2nd ADD
720 // Instructions section
721 ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0)
722 ExpectByte(0x38); // COPY size (56)
723 ExpectByte(0x07); // ADD size 6
724 ExpectByte(0x19); // COPY size 9 mode VCD_SELF
725 ExpectByte(0x11); // ADD size 16
726 // Address section
727 ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0)
728 ExpectByte(0x17); // COPY address (23) mode VCD_SELF
729 } else if (BlockHash::kBlockSize <= 56) {
730 // Any block size up to 56 will catch the matching prefix string.
731 ExpectByte(0x29); // Length of the delta encoding
732 ExpectSize(strlen(kTarget)); // Size of the target window
733 ExpectByte(0x00); // Delta_indicator (no compression)
734 ExpectByte(0x1F); // Length of the data section
735 ExpectByte(0x04); // Length of the instructions section
736 ExpectByte(0x01); // Length of the address section
737 ExpectString("\nAnd this part is not...</html>"); // Data for ADD
738 // Instructions section
739 ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0)
740 ExpectByte(0x38); // COPY size (56)
741 ExpectByte(0x01); // ADD size 0
742 ExpectByte(0x1F); // Size of ADD (31)
743 // Address section
744 ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0)
745 } else {
746 // The matching string is 56 characters long, and the block size is
747 // 64 or greater, so no match should be found.
748 ExpectSize(strlen(kTarget) + 7); // Delta encoding len
749 ExpectSize(strlen(kTarget)); // Size of the target window
750 ExpectByte(0x00); // Delta_indicator (no compression)
751 ExpectSize(strlen(kTarget)); // Length of the data section
752 ExpectByte(0x02); // Length of the instructions section
753 ExpectByte(0x00); // Length of the address section
754 // Data section
755 ExpectString(kTarget);
756 ExpectByte(0x01); // ADD size 0
757 ExpectSize(strlen(kTarget));
758 }
759 ExpectNoMoreBytes();
760 }
761
TEST_F(VCDiffHTML1Test,MatchCounts)762 TEST_F(VCDiffHTML1Test, MatchCounts) {
763 StreamingEncode();
764 encoder_.GetMatchCounts(&actual_match_counts_);
765 if (BlockHash::kBlockSize < 16) {
766 // A medium block size will catch the "his part " match.
767 ExpectMatch(56);
768 ExpectMatch(9);
769 } else if (BlockHash::kBlockSize <= 56) {
770 // Any block size up to 56 will catch the matching prefix string.
771 ExpectMatch(56);
772 }
773 VerifyMatchCounts();
774 }
775
TEST_F(VCDiffHTML1Test,SimpleEncoderPerformsTargetMatching)776 TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) {
777 EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
778 strlen(kRedundantTarget),
779 delta()));
780 EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
781 delta_size());
782 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
783 sizeof(kDictionary),
784 delta_as_const(),
785 &result_target_));
786 EXPECT_EQ(kRedundantTarget, result_target_);
787 // These values do not depend on the block size used for encoding
788 ExpectByte(0xD6); // 'V' | 0x80
789 ExpectByte(0xC3); // 'C' | 0x80
790 ExpectByte(0xC4); // 'D' | 0x80
791 ExpectByte(0x00); // Simple encoder never uses interleaved format
792 ExpectByte(0x00); // Hdr_Indicator
793 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary)
794 ExpectByte(sizeof(kDictionary)); // Dictionary length
795 ExpectByte(0x00); // Source segment position: start of dictionary
796 ExpectByte(0x0C); // Length of the delta encoding
797 ExpectSize(strlen(kRedundantTarget)); // Size of the target window
798 ExpectByte(0x00); // Delta_indicator (no compression)
799 ExpectByte(0x01); // Length of the data section
800 ExpectByte(0x04); // Length of the instructions section
801 ExpectByte(0x01); // Length of the address section
802 // Data section
803 ExpectString("A"); // Data for ADD
804 // Instructions section
805 ExpectByte(0x02); // ADD size 1
806 ExpectByte(0x23); // COPY size 0 mode VCD_HERE
807 ExpectSize(strlen(kRedundantTarget) - 1); // COPY size 255
808 // Address section
809 ExpectByte(0x01); // COPY address (1) mode VCD_HERE
810 ExpectNoMoreBytes();
811 }
812
TEST_F(VCDiffHTML1Test,SimpleEncoderWithoutTargetMatching)813 TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) {
814 simple_encoder_.SetTargetMatching(false);
815 EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
816 strlen(kRedundantTarget),
817 delta()));
818 EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
819 delta_size());
820 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
821 sizeof(kDictionary),
822 delta_as_const(),
823 &result_target_));
824 EXPECT_EQ(kRedundantTarget, result_target_);
825 // These values do not depend on the block size used for encoding
826 ExpectByte(0xD6); // 'V' | 0x80
827 ExpectByte(0xC3); // 'C' | 0x80
828 ExpectByte(0xC4); // 'D' | 0x80
829 ExpectByte(0x00); // Simple encoder never uses interleaved format
830 ExpectByte(0x00); // Hdr_Indicator
831 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary)
832 ExpectByte(sizeof(kDictionary)); // Dictionary length
833 ExpectByte(0x00); // Source segment position: start of dictionary
834 ExpectSize(strlen(kRedundantTarget) + 0x0A); // Length of the delta encoding
835 ExpectSize(strlen(kRedundantTarget)); // Size of the target window
836 ExpectByte(0x00); // Delta_indicator (no compression)
837 ExpectSize(strlen(kRedundantTarget)); // Length of the data section
838 ExpectByte(0x03); // Length of the instructions section
839 ExpectByte(0x00); // Length of the address section
840 // Data section
841 ExpectString(kRedundantTarget); // Data for ADD
842 // Instructions section
843 ExpectByte(0x01); // ADD size 0
844 ExpectSize(strlen(kRedundantTarget)); // ADD size
845 // Address section empty
846 ExpectNoMoreBytes();
847 }
848
849 #ifdef GTEST_HAS_DEATH_TEST
850 typedef VCDiffHTML1Test VCDiffHTML1DeathTest;
851
TEST_F(VCDiffHTML1DeathTest,NullMatchCounts)852 TEST_F(VCDiffHTML1DeathTest, NullMatchCounts) {
853 EXPECT_DEBUG_DEATH(encoder_.GetMatchCounts(NULL), "GetMatchCounts");
854 }
855 #endif // GTEST_HAS_DEATH_TEST
856
857 class VCDiffHTML2Test : public VCDiffMatchCountTest {
858 protected:
859 static const char kDictionary[];
860 static const char kTarget[];
861
862 VCDiffHTML2Test();
~VCDiffHTML2Test()863 virtual ~VCDiffHTML2Test() { }
864
865 void SimpleEncode();
866 void StreamingEncode();
867
868 HashedDictionary hashed_dictionary_;
869 VCDiffStreamingEncoder encoder_;
870 VCDiffStreamingDecoder decoder_;
871 VCDiffEncoder simple_encoder_;
872 VCDiffDecoder simple_decoder_;
873
874 string result_target_;
875 };
876
877 const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test";
878
879 const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n";
880
VCDiffHTML2Test()881 VCDiffHTML2Test::VCDiffHTML2Test()
882 : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
883 encoder_(&hashed_dictionary_,
884 VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
885 /* look_for_target_matches = */ true),
886 simple_encoder_(kDictionary, sizeof(kDictionary)) {
887 EXPECT_TRUE(hashed_dictionary_.Init());
888 }
889
SimpleEncode()890 void VCDiffHTML2Test::SimpleEncode() {
891 EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
892 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
893 delta_size());
894 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
895 sizeof(kDictionary),
896 delta_as_const(),
897 &result_target_));
898 EXPECT_EQ(kTarget, result_target_);
899 }
900
StreamingEncode()901 void VCDiffHTML2Test::StreamingEncode() {
902 EXPECT_TRUE(encoder_.StartEncoding(delta()));
903 EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
904 EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
905 delta_size());
906 EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
907 sizeof(kDictionary),
908 delta_as_const(),
909 &result_target_));
910 EXPECT_EQ(kTarget, result_target_);
911 }
912
TEST_F(VCDiffHTML2Test,VerifyOutputOfSimpleEncoder)913 TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) {
914 SimpleEncode();
915 // These values do not depend on the block size used for encoding
916 ExpectByte(0xD6); // 'V' | 0x80
917 ExpectByte(0xC3); // 'C' | 0x80
918 ExpectByte(0xC4); // 'D' | 0x80
919 ExpectByte(0x00); // Simple encoder never uses interleaved format
920 ExpectByte(0x00); // Hdr_Indicator
921 ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary)
922 ExpectByte(sizeof(kDictionary)); // Dictionary length
923 ExpectByte(0x00); // Source segment position: start of dictionary
924 if (BlockHash::kBlockSize <= 8) {
925 ExpectByte(12); // Length of the delta encoding
926 ExpectSize(strlen(kTarget)); // Size of the target window
927 ExpectByte(0x00); // Delta_indicator (no compression)
928 ExpectByte(0x04); // Length of the data section
929 ExpectByte(0x02); // Length of the instructions section
930 ExpectByte(0x01); // Length of the address section
931 ExpectByte('!');
932 ExpectByte('!');
933 ExpectByte('!');
934 ExpectByte('\n');
935 ExpectByte(0x1E); // COPY size 14 mode VCD_SELF
936 ExpectByte(0x05); // ADD size 4
937 ExpectByte(0x03); // COPY address (3) mode VCD_SELF
938 } else {
939 // Larger block sizes will not catch any matches.
940 ExpectSize(strlen(kTarget) + 7); // Delta encoding len
941 ExpectSize(strlen(kTarget)); // Size of the target window
942 ExpectByte(0x00); // Delta_indicator (no compression)
943 ExpectSize(strlen(kTarget)); // Length of the data section
944 ExpectByte(0x02); // Length of the instructions section
945 ExpectByte(0x00); // Length of the address section
946 // Data section
947 ExpectString(kTarget);
948 ExpectByte(0x01); // ADD size 0
949 ExpectSize(strlen(kTarget));
950 }
951 ExpectNoMoreBytes();
952 }
953
TEST_F(VCDiffHTML2Test,VerifyOutputWithChecksum)954 TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) {
955 StreamingEncode();
956 const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget));
957 CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum));
958 // These values do not depend on the block size used for encoding
959 ExpectByte(0xD6); // 'V' | 0x80
960 ExpectByte(0xC3); // 'C' | 0x80
961 ExpectByte(0xC4); // 'D' | 0x80
962 ExpectByte('S'); // Format extensions
963 ExpectByte(0x00); // Hdr_Indicator
964 ExpectByte(VCD_SOURCE | VCD_CHECKSUM); // Win_Indicator
965 ExpectByte(sizeof(kDictionary)); // Dictionary length
966 ExpectByte(0x00); // Source segment position: start of dictionary
967 if (BlockHash::kBlockSize <= 8) {
968 ExpectByte(17); // Length of the delta encoding
969 ExpectSize(strlen(kTarget)); // Size of the target window
970 ExpectByte(0x00); // Delta_indicator (no compression)
971 ExpectByte(0x00); // Length of the data section
972 ExpectByte(0x07); // Length of the instructions section
973 ExpectByte(0x00); // Length of the address section
974 ExpectChecksum(html2_checksum);
975 ExpectByte(0x1E); // COPY size 14 mode VCD_SELF
976 ExpectByte(0x03); // COPY address (3) mode VCD_SELF
977 ExpectByte(0x05); // ADD size 4
978 ExpectByte('!');
979 ExpectByte('!');
980 ExpectByte('!');
981 ExpectByte('\n');
982 } else {
983 // Larger block sizes will not catch any matches.
984 ExpectSize(strlen(kTarget) + 12); // Delta encoding len
985 ExpectSize(strlen(kTarget)); // Size of the target window
986 ExpectByte(0x00); // Delta_indicator (no compression)
987 ExpectByte(0x00); // Length of the data section
988 ExpectSize(0x02 + strlen(kTarget)); // Interleaved
989 ExpectByte(0x00); // Length of the address section
990 ExpectChecksum(html2_checksum);
991 // Data section
992 ExpectByte(0x01); // ADD size 0
993 ExpectSize(strlen(kTarget));
994 ExpectString(kTarget);
995 }
996 ExpectNoMoreBytes();
997 }
998
TEST_F(VCDiffHTML2Test,MatchCounts)999 TEST_F(VCDiffHTML2Test, MatchCounts) {
1000 StreamingEncode();
1001 encoder_.GetMatchCounts(&actual_match_counts_);
1002 if (BlockHash::kBlockSize <= 8) {
1003 ExpectMatch(14);
1004 }
1005 VerifyMatchCounts();
1006 }
1007
1008 } // anonymous namespace
1009 } // namespace open_vcdiff
1010