1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include <config.h>
17 #include "vcdiffengine.h"
18 #include <string.h> // memset, strlen
19 #include <algorithm>
20 #include <string>
21 #include <vector>
22 #include "addrcache.h"
23 #include "blockhash.h"
24 #include "encodetable.h"
25 #include "google/output_string.h"
26 #include "rolling_hash.h"
27 #include "testing.h"
28 #include "varint_bigendian.h"
29 #include "vcdiff_defs.h"
30
31 namespace open_vcdiff {
32
33 namespace {
34
35 class VCDiffEngineTestBase : public testing::Test {
36 protected:
37 typedef std::string string;
38
39 // Some common definitions and helper functions used in the various tests
40 // for VCDiffEngine.
41 static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
42
VCDiffEngineTestBase()43 VCDiffEngineTestBase() : interleaved_(false),
44 diff_output_string_(&diff_),
45 verify_position_(0),
46 saved_total_size_position_(0),
47 saved_delta_encoding_position_(0),
48 saved_section_sizes_position_(0),
49 data_bytes_(0),
50 instruction_bytes_(0),
51 address_bytes_(0) { }
52
~VCDiffEngineTestBase()53 virtual ~VCDiffEngineTestBase() { }
54
TearDown()55 virtual void TearDown() {
56 VerifyMatchCounts();
57 }
58
59 // Copy string_without_spaces into newly allocated result buffer,
60 // but pad its contents with space characters so that every character
61 // in string_without_spaces corresponds to (block_size - 1)
62 // spaces in the result, followed by that character.
63 // For example:
64 // If string_without_spaces begins "The only thing"... and block_size is 4,
65 // then 3 space characters will be inserted
66 // between each letter in the result, as follows:
67 // " T h e o n l y t h i n g"...
68 // This makes testing simpler, because finding a block_size-byte match
69 // between the dictionary and target only depends on the
70 // trailing letter in each block.
71 // If no_initial_padding is true, then the first letter will not have
72 // spaces added in front of it.
MakeEachLetterABlock(const char * string_without_spaces,const char ** result,int block_size,bool no_initial_padding)73 static void MakeEachLetterABlock(const char* string_without_spaces,
74 const char** result,
75 int block_size,
76 bool no_initial_padding) {
77 const size_t length_without_spaces = strlen(string_without_spaces);
78 char* padded_text = new char[(block_size * length_without_spaces) + 1];
79 memset(padded_text, ' ', block_size * length_without_spaces);
80 char* padded_text_ptr = padded_text;
81 if (!no_initial_padding) {
82 padded_text_ptr += block_size - 1;
83 }
84 for (size_t i = 0; i < length_without_spaces; ++i) {
85 *padded_text_ptr = string_without_spaces[i];
86 padded_text_ptr += block_size;
87 }
88 *(padded_text_ptr - block_size + 1) = '\0';
89 *result = padded_text;
90 }
91
92 // These functions iterate through the decoded output and expect
93 // simple elements: bytes or variable-length integers.
ExpectByte(char byte)94 void ExpectByte(char byte) {
95 EXPECT_GT(diff_.size(), verify_position_);
96 EXPECT_EQ(byte, diff_[verify_position_]);
97 ++verify_position_;
98 }
99
ExpectVarint(int32_t expected_value)100 size_t ExpectVarint(int32_t expected_value) {
101 EXPECT_GT(diff_.size(), verify_position_);
102 const char* const original_position = &diff_[verify_position_];
103 const char* new_position = original_position;
104 const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
105 int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
106 &new_position);
107 EXPECT_LE(0, parsed_value);
108 size_t parsed_length = new_position - original_position;
109 EXPECT_EQ(expected_value, parsed_value);
110 EXPECT_EQ(expected_length, parsed_length);
111 verify_position_ += parsed_length;
112 return parsed_length;
113 }
114
ExpectSize(size_t size)115 size_t ExpectSize(size_t size) {
116 return ExpectVarint(static_cast<int32_t>(size));
117 }
118
ExpectStringLength(const char * s)119 size_t ExpectStringLength(const char* s) {
120 return ExpectSize(strlen(s));
121 }
122
SkipVarint()123 void SkipVarint() {
124 EXPECT_GT(diff_.size(), verify_position_);
125 const char* const original_position = &diff_[verify_position_];
126 const char* new_position = original_position;
127 VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
128 size_t parsed_length = new_position - original_position;
129 verify_position_ += parsed_length;
130 }
131
ExpectDataByte(char byte)132 void ExpectDataByte(char byte) {
133 ExpectByte(byte);
134 if (interleaved_) {
135 ++instruction_bytes_;
136 } else {
137 ++data_bytes_;
138 }
139 }
140
ExpectDataString(const char * expected_string)141 void ExpectDataString(const char *expected_string) {
142 const char* ptr = expected_string;
143 while (*ptr) {
144 ExpectDataByte(*ptr);
145 ++ptr;
146 }
147 }
148
ExpectDataStringWithBlockSpacing(const char * expected_string,bool trailing_spaces)149 void ExpectDataStringWithBlockSpacing(const char *expected_string,
150 bool trailing_spaces) {
151 const char* ptr = expected_string;
152 while (*ptr) {
153 for (int i = 0; i < (kBlockSize - 1); ++i) {
154 ExpectDataByte(' ');
155 }
156 ExpectDataByte(*ptr);
157 ++ptr;
158 }
159 if (trailing_spaces) {
160 for (int i = 0; i < (kBlockSize - 1); ++i) {
161 ExpectDataByte(' ');
162 }
163 }
164 }
165
ExpectInstructionByte(char byte)166 void ExpectInstructionByte(char byte) {
167 ExpectByte(byte);
168 ++instruction_bytes_;
169 }
170
ExpectInstructionVarint(int32_t value)171 void ExpectInstructionVarint(int32_t value) {
172 instruction_bytes_ += ExpectVarint(value);
173 }
174
ExpectAddressByte(char byte)175 void ExpectAddressByte(char byte) {
176 ExpectByte(byte);
177 if (interleaved_) {
178 ++instruction_bytes_;
179 } else {
180 ++address_bytes_;
181 }
182 }
183
ExpectAddressVarint(int32_t value)184 void ExpectAddressVarint(int32_t value) {
185 if (interleaved_) {
186 instruction_bytes_ += ExpectVarint(value);
187 } else {
188 address_bytes_ += ExpectVarint(value);
189 }
190 }
191
192 // The following functions leverage the fact that the encoder uses
193 // the default code table and cache sizes. They are able to search for
194 // instructions of a particular size. The logic for mapping from
195 // instruction type, mode, and size to opcode value is very different here
196 // from the logic used in encodetable.{h,cc}, so hopefully
197 // this version will help validate that the other is correct.
198 // This version uses conditional statements, while encodetable.h
199 // looks up values in a mapping table.
ExpectAddress(int32_t address,int copy_mode)200 void ExpectAddress(int32_t address, int copy_mode) {
201 if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
202 ExpectAddressVarint(address);
203 } else {
204 ExpectAddressByte(address);
205 }
206 }
207
ExpectAddInstruction(int size)208 void ExpectAddInstruction(int size) {
209 if (size <= 18) {
210 ExpectInstructionByte(0x01 + size);
211 } else {
212 ExpectInstructionByte(0x01);
213 ExpectInstructionVarint(size);
214 }
215 }
216
ExpectCopyInstruction(int size,int mode)217 void ExpectCopyInstruction(int size, int mode) {
218 if ((size >= 4) && (size <= 16)) {
219 ExpectInstructionByte(0x10 + (0x10 * mode) + size);
220 } else {
221 ExpectInstructionByte(0x13 + (0x10 * mode));
222 ExpectInstructionVarint(size);
223 }
224 ExpectMatch(size);
225 }
226
ExpectAddCopyInstruction(int add_size,int copy_size,int copy_mode)227 bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
228 if (!default_cache_.IsSameMode(copy_mode) &&
229 (add_size <= 4) &&
230 (copy_size >= 4) &&
231 (copy_size <= 6)) {
232 ExpectInstructionByte(0x9C +
233 (0x0C * copy_mode) +
234 (0x03 * add_size) +
235 copy_size);
236 ExpectMatch(copy_size);
237 return true;
238 } else if (default_cache_.IsSameMode(copy_mode) &&
239 (add_size <= 4) &&
240 (copy_size == 4)) {
241 ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
242 ExpectMatch(copy_size);
243 return true;
244 } else {
245 ExpectAddInstruction(add_size);
246 return false;
247 }
248 }
249
ExpectAddInstructionForStringLength(const char * s)250 void ExpectAddInstructionForStringLength(const char* s) {
251 ExpectAddInstruction(static_cast<int>(strlen(s)));
252 }
253
254 // Call this function before beginning to iterate through the diff string
255 // using the Expect... functions.
256 // text must be NULL-terminated.
VerifyHeaderForDictionaryAndTargetText(const char * dictionary,const char * target_text)257 void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
258 const char* target_text) {
259 ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary)
260 ExpectStringLength(dictionary);
261 ExpectByte(0x00); // Source segment position: start of dictionary
262 saved_total_size_position_ = verify_position_;
263 SkipVarint(); // Length of the delta encoding
264 saved_delta_encoding_position_ = verify_position_;
265 ExpectStringLength(target_text);
266 ExpectByte(0x00); // Delta_indicator (no compression)
267 saved_section_sizes_position_ = verify_position_;
268 SkipVarint(); // length of data for ADDs and RUNs
269 SkipVarint(); // length of instructions section
270 SkipVarint(); // length of addresses for COPYs
271 }
272
273 // Call this function before beginning to iterating through the entire
274 // diff string using the Expect... functions. It makes sure that the
275 // size totals in the window header match the number of bytes that
276 // were parsed.
VerifySizes()277 void VerifySizes() {
278 EXPECT_EQ(verify_position_, diff_.size());
279 const size_t delta_encoding_size = verify_position_ -
280 saved_delta_encoding_position_;
281 verify_position_ = saved_total_size_position_;
282 ExpectSize(delta_encoding_size);
283 verify_position_ = saved_section_sizes_position_;
284 ExpectSize(data_bytes_);
285 ExpectSize(instruction_bytes_);
286 ExpectSize(address_bytes_);
287 }
288
ExpectMatch(size_t match_size)289 void ExpectMatch(size_t match_size) {
290 if (match_size >= expected_match_counts_.size()) {
291 // Be generous to avoid resizing again
292 expected_match_counts_.resize(match_size * 2, 0);
293 }
294 ++expected_match_counts_[match_size];
295 }
296
VerifyMatchCounts()297 void VerifyMatchCounts() {
298 EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
299 expected_match_counts_.end(),
300 actual_match_counts_.begin()));
301 }
302
303 bool interleaved_;
304 string diff_;
305 OutputString<string> diff_output_string_;
306 size_t verify_position_;
307 size_t saved_total_size_position_;
308 size_t saved_delta_encoding_position_;
309 size_t saved_section_sizes_position_;
310 size_t data_bytes_;
311 size_t instruction_bytes_;
312 size_t address_bytes_;
313 VCDiffAddressCache default_cache_; // Used for finding mode values
314 std::vector<int> expected_match_counts_;
315 std::vector<int> actual_match_counts_;
316 };
317
318 class VCDiffEngineTest : public VCDiffEngineTestBase {
319 protected:
VCDiffEngineTest()320 VCDiffEngineTest() :
321 engine_(dictionary_, strlen(dictionary_)) {
322 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
323 }
324
~VCDiffEngineTest()325 virtual ~VCDiffEngineTest() { }
326
327
SetUpTestCase()328 static void SetUpTestCase() {
329 MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
330 kBlockSize, false);
331 MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
332 }
333
TearDownTestCase()334 static void TearDownTestCase() {
335 delete[] dictionary_;
336 delete[] target_;
337 }
338
EncodeNothing(bool interleaved,bool target_matching)339 void EncodeNothing(bool interleaved, bool target_matching) {
340 interleaved_ = interleaved;
341 VCDiffCodeTableWriter coder(interleaved);
342 coder.Init(engine_.dictionary_size());
343 engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
344 EXPECT_TRUE(diff_.empty());
345 actual_match_counts_ = coder.match_counts();
346 }
347
348 // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)349 void EncodeText(const char* text, bool interleaved, bool target_matching) {
350 interleaved_ = interleaved;
351 VCDiffCodeTableWriter coder(interleaved);
352 coder.Init(engine_.dictionary_size());
353 engine_.Encode(text,
354 strlen(text),
355 target_matching,
356 &diff_output_string_,
357 &coder);
358 actual_match_counts_ = coder.match_counts();
359 }
360
Encode(bool interleaved,bool target_matching)361 void Encode(bool interleaved, bool target_matching) {
362 EncodeText(target_, interleaved, target_matching);
363 VerifyHeader();
364 }
365
VerifyHeader()366 void VerifyHeader() {
367 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
368 }
369
370 static const char dictionary_without_spaces_[];
371 static const char target_without_spaces_[];
372
373 static const char* dictionary_;
374 static const char* target_;
375
376 const VCDiffEngine engine_;
377 };
378
379 #ifdef GTEST_HAS_DEATH_TEST
380 typedef VCDiffEngineTest VCDiffEngineDeathTest;
381 #endif // GTEST_HAS_DEATH_TEST
382
383 const char VCDiffEngineTest::dictionary_without_spaces_[] =
384 "The only thing we have to fear is fear itself";
385
386 const char VCDiffEngineTest::target_without_spaces_[] =
387 "What we hear is fearsome";
388
389 const char* VCDiffEngineTest::dictionary_ = NULL;
390 const char* VCDiffEngineTest::target_ = NULL;
391
392 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(VCDiffEngineDeathTest,InitCalledTwice)393 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
394 EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
395 "twice");
396 }
397 #endif // GTEST_HAS_DEATH_TEST
398
TEST_F(VCDiffEngineTest,EngineEncodeNothing)399 TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
400 EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
401 }
402
TEST_F(VCDiffEngineTest,EngineEncodeNothingInterleaved)403 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
404 EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
405 }
406
TEST_F(VCDiffEngineTest,EngineEncodeNothingTarget)407 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
408 EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
409 }
410
TEST_F(VCDiffEngineTest,EngineEncodeNothingTargetInterleaved)411 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
412 EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
413 }
414
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlock)415 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
416 const char* small_text = " ";
417 EncodeText(small_text,
418 /* interleaved = */ false,
419 /* target matching = */ false);
420 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
421 // Data for ADDs
422 ExpectDataString(small_text);
423 // Instructions and sizes
424 ExpectAddInstructionForStringLength(small_text);
425 }
426
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlockInterleaved)427 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
428 const char* small_text = " ";
429 EncodeText(small_text,
430 /* interleaved = */ true,
431 /* target matching = */ false);
432 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
433 // Interleaved section
434 ExpectAddInstructionForStringLength(small_text);
435 ExpectDataString(small_text);
436 }
437
TEST_F(VCDiffEngineTest,EngineEncodeSampleText)438 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
439 Encode(/* interleaved = */ false, /* target matching = */ false);
440 // Data for ADDs
441 ExpectDataStringWithBlockSpacing("W", false);
442 ExpectDataByte('t');
443 ExpectDataByte('s');
444 ExpectDataByte('m');
445 // Instructions and sizes
446 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
447 VCD_SELF_MODE)) {
448 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
449 }
450 ExpectAddInstruction(1);
451 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
452 ExpectCopyInstruction(11 * kBlockSize,
453 default_cache_.FirstNearMode());
454 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
455 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
456 }
457 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
458 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
459 }
460 // Addresses for COPY
461 ExpectAddressVarint(18 * kBlockSize); // "ha"
462 ExpectAddressVarint(14 * kBlockSize); // " we h"
463 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
464 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
465 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
466 VerifySizes();
467 }
468
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleaved)469 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
470 Encode(/* interleaved = */ true, /* target matching = */ false);
471 // Interleaved section
472 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
473 VCD_SELF_MODE)) {
474 ExpectDataStringWithBlockSpacing("W", false);
475 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
476 } else {
477 ExpectDataStringWithBlockSpacing("W", false);
478 }
479 ExpectAddressVarint(18 * kBlockSize); // "ha"
480 ExpectAddInstruction(1);
481 ExpectDataByte('t');
482 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
483 ExpectAddressVarint(14 * kBlockSize); // " we h"
484 ExpectCopyInstruction(11 * kBlockSize,
485 default_cache_.FirstNearMode());
486 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
487 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
488 ExpectDataByte('s');
489 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
490 } else {
491 ExpectDataByte('s');
492 }
493 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
494 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
495 ExpectDataByte('m');
496 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
497 } else {
498 ExpectDataByte('m');
499 }
500 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
501 VerifySizes();
502 }
503
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextWithTargetMatching)504 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
505 Encode(/* interleaved = */ false, /* target matching = */ true);
506 // Data for ADDs
507 ExpectDataStringWithBlockSpacing("W", false);
508 ExpectDataByte('t');
509 ExpectDataByte('s');
510 ExpectDataByte('m');
511 // Instructions and sizes
512 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
513 VCD_SELF_MODE)) {
514 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
515 }
516 ExpectAddInstruction(1);
517 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
518 ExpectCopyInstruction(11 * kBlockSize,
519 default_cache_.FirstNearMode());
520 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
521 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
522 }
523 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
524 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
525 }
526 // Addresses for COPY
527 ExpectAddressVarint(18 * kBlockSize); // "ha"
528 ExpectAddressVarint(14 * kBlockSize); // " we h"
529 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
530 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
531 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
532 VerifySizes();
533 }
534
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleavedWithTargetMatching)535 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
536 Encode(/* interleaved = */ true, /* target matching = */ false);
537 // Interleaved section
538 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
539 VCD_SELF_MODE)) {
540 ExpectDataStringWithBlockSpacing("W", false);
541 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
542 } else {
543 ExpectDataStringWithBlockSpacing("W", false);
544 }
545 ExpectAddressVarint(18 * kBlockSize); // "ha"
546 ExpectAddInstruction(1);
547 ExpectDataByte('t');
548 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
549 ExpectAddressVarint(14 * kBlockSize); // " we h"
550 ExpectCopyInstruction(11 * kBlockSize,
551 default_cache_.FirstNearMode());
552 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
553 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
554 ExpectDataByte('s');
555 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
556 } else {
557 ExpectDataByte('s');
558 }
559 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
560 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
561 ExpectDataByte('m');
562 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
563 } else {
564 ExpectDataByte('m');
565 }
566 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
567 VerifySizes();
568 }
569
570 // This test case takes a dictionary containing several instances of the string
571 // "weasel", and a target string which is identical to the dictionary
572 // except that all instances of "weasel" have been replaced with the string
573 // "moon-pie". It tests that COPY instructions are generated for all
574 // boilerplate text (that is, the text between the "moon-pie" instances in
575 // the target) and, if target matching is enabled, that each instance of
576 // "moon-pie" (except the first one) is encoded using a COPY instruction
577 // rather than an ADD.
578 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
579 protected:
580 // kCompressibleTestBlockSize:
581 // The size of the block to create for each letter in the
582 // dictionary and search string for the "compressible text" test.
583 // See MakeEachLetterABlock, below.
584 // If we use kCompressibleTestBlockSize = kBlockSize, then the
585 // encoder will find one match per unique letter in the HTML text.
586 // There are too many examples of "<" in the text for the encoder
587 // to iterate through them all, and some matches are not found.
588 // If we use kCompressibleTextBlockSize = 1, then the boilerplate
589 // text between "weasel" strings in the dictionary and "moon-pie"
590 // strings in the target may not be long enough to be found by
591 // the encoder's block-hash algorithm. A good value, that will give
592 // reproducible results across all block sizes, will be somewhere
593 // in between these extremes.
594 static const int kCompressibleTestBlockSize = kBlockSize / 4;
595 static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
596
WeaselsToMoonpiesTest()597 WeaselsToMoonpiesTest() :
598 engine_(dictionary_, strlen(dictionary_)),
599 match_index_(0),
600 search_dictionary_(dictionary_, strlen(dictionary_)),
601 copied_moonpie_address_(0) {
602 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
603 weasel_positions_[0] = 0;
604 after_weasel_[0] = 0;
605 moonpie_positions_[0] = 0;
606 after_moonpie_[0] = 0;
607 }
608
~WeaselsToMoonpiesTest()609 virtual ~WeaselsToMoonpiesTest() { }
610
SetUpTestCase()611 static void SetUpTestCase() {
612 MakeEachLetterABlock(dictionary_without_spaces_,
613 &dictionary_,
614 kCompressibleTestBlockSize,
615 false);
616 MakeEachLetterABlock(target_without_spaces_,
617 &target_,
618 kCompressibleTestBlockSize,
619 false);
620 MakeEachLetterABlock(weasel_text_without_spaces_,
621 &weasel_text_,
622 kCompressibleTestBlockSize,
623 true);
624 MakeEachLetterABlock(moonpie_text_without_spaces_,
625 &moonpie_text_,
626 kCompressibleTestBlockSize,
627 true);
628 }
629
TearDownTestCase()630 static void TearDownTestCase() {
631 delete[] dictionary_;
632 delete[] target_;
633 delete[] weasel_text_;
634 delete[] moonpie_text_;
635 }
636
637 // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)638 void EncodeText(const char* text, bool interleaved, bool target_matching) {
639 interleaved_ = interleaved;
640 VCDiffCodeTableWriter coder(interleaved);
641 coder.Init(engine_.dictionary_size());
642 engine_.Encode(text,
643 strlen(text),
644 target_matching,
645 &diff_output_string_,
646 &coder);
647 actual_match_counts_ = coder.match_counts();
648 }
649
Encode(bool interleaved,bool target_matching)650 void Encode(bool interleaved, bool target_matching) {
651 EncodeText(target_, interleaved, target_matching);
652 VerifyHeader();
653 }
654
VerifyHeader()655 void VerifyHeader() {
656 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
657 }
658
ExpectCopyForSize(size_t size,int mode)659 void ExpectCopyForSize(size_t size, int mode) {
660 ExpectCopyInstruction(static_cast<int>(size), mode);
661 }
662
ExpectAddForSize(size_t size)663 void ExpectAddForSize(size_t size) {
664 ExpectAddInstruction(static_cast<int>(size));
665 }
666
ExpectAddressVarintForSize(size_t value)667 void ExpectAddressVarintForSize(size_t value) {
668 ExpectAddressVarint(static_cast<int32_t>(value));
669 }
670
FindNextMoonpie(bool include_trailing_spaces)671 void FindNextMoonpie(bool include_trailing_spaces) {
672 ++match_index_;
673 SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
674 AfterLastWeasel()));
675 if (CurrentWeaselPosition() == string::npos) {
676 SetCurrentMoonpiePosition(string::npos);
677 } else {
678 SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
679 + strlen(weasel_text_)
680 + (include_trailing_spaces ?
681 kTrailingSpaces : 0));
682 SetCurrentMoonpiePosition(AfterLastMoonpie()
683 + CurrentBoilerplateLength());
684 SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
685 + strlen(moonpie_text_)
686 + (include_trailing_spaces ?
687 kTrailingSpaces : 0));
688 }
689 }
NoMoreMoonpies() const690 bool NoMoreMoonpies() const {
691 return CurrentMoonpiePosition() == string::npos;
692 }
CurrentWeaselPosition() const693 size_t CurrentWeaselPosition() const {
694 return weasel_positions_[match_index_];
695 }
LastWeaselPosition() const696 size_t LastWeaselPosition() const {
697 return weasel_positions_[match_index_ - 1];
698 }
CurrentMoonpiePosition() const699 size_t CurrentMoonpiePosition() const {
700 return moonpie_positions_[match_index_];
701 }
LastMoonpiePosition() const702 size_t LastMoonpiePosition() const {
703 return moonpie_positions_[match_index_ - 1];
704 }
AfterLastWeasel() const705 size_t AfterLastWeasel() const {
706 CHECK_GE(match_index_, 1);
707 return after_weasel_[match_index_ - 1];
708 }
AfterPreviousWeasel() const709 size_t AfterPreviousWeasel() const {
710 CHECK_GE(match_index_, 2);
711 return after_weasel_[match_index_ - 2];
712 }
AfterLastMoonpie() const713 size_t AfterLastMoonpie() const {
714 CHECK_GE(match_index_, 1);
715 return after_moonpie_[match_index_ - 1];
716 }
AfterPreviousMoonpie() const717 size_t AfterPreviousMoonpie() const {
718 CHECK_GE(match_index_, 2);
719 return after_moonpie_[match_index_ - 2];
720 }
721
SetCurrentWeaselPosition(size_t value)722 void SetCurrentWeaselPosition(size_t value) {
723 weasel_positions_[match_index_] = value;
724 }
SetCurrentAfterWeaselPosition(size_t value)725 void SetCurrentAfterWeaselPosition(size_t value) {
726 after_weasel_[match_index_] = value;
727 }
SetCurrentMoonpiePosition(size_t value)728 void SetCurrentMoonpiePosition(size_t value) {
729 moonpie_positions_[match_index_] = value;
730 }
SetCurrentAfterMoonpiePosition(size_t value)731 void SetCurrentAfterMoonpiePosition(size_t value) {
732 after_moonpie_[match_index_] = value;
733 }
734
735 // Find the length of the text in between the "weasel" strings in the
736 // compressible dictionary, which is the same as the text between
737 // the "moon-pie" strings in the compressible target.
CurrentBoilerplateLength() const738 size_t CurrentBoilerplateLength() const {
739 CHECK_GE(match_index_, 1);
740 return CurrentWeaselPosition() - AfterLastWeasel();
741 }
DistanceFromLastWeasel() const742 size_t DistanceFromLastWeasel() const {
743 CHECK_GE(match_index_, 1);
744 return CurrentWeaselPosition() - LastWeaselPosition();
745 }
DistanceFromLastMoonpie() const746 size_t DistanceFromLastMoonpie() const {
747 CHECK_GE(match_index_, 1);
748 return CurrentMoonpiePosition() - LastMoonpiePosition();
749 }
DistanceBetweenLastTwoWeasels() const750 size_t DistanceBetweenLastTwoWeasels() const {
751 CHECK_GE(match_index_, 2);
752 return AfterLastWeasel() - AfterPreviousWeasel();
753 }
DistanceBetweenLastTwoMoonpies() const754 size_t DistanceBetweenLastTwoMoonpies() const {
755 CHECK_GE(match_index_, 2);
756 return AfterLastMoonpie() - AfterPreviousMoonpie();
757 }
758
759 int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
760 int UpdateCopyModeForMoonpie(int copy_mode) const;
761 int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
762
763 void CopyBoilerplateAndAddMoonpie(int copy_mode);
764 void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
765
766 static const char dictionary_without_spaces_[];
767 static const char target_without_spaces_[];
768 static const char weasel_text_without_spaces_[];
769 static const char moonpie_text_without_spaces_[];
770
771 static const char* dictionary_;
772 static const char* target_;
773 static const char* weasel_text_;
774 static const char* moonpie_text_;
775
776 const VCDiffEngine engine_;
777 size_t weasel_positions_[128];
778 size_t after_weasel_[128];
779 size_t moonpie_positions_[128];
780 size_t after_moonpie_[128];
781 int match_index_;
782 string search_dictionary_;
783 size_t copied_moonpie_address_;
784 };
785
786 // Care is taken in the formulation of the dictionary
787 // to ensure that the surrounding letters do not match; for example,
788 // there are not two instances of the string "weasels". Otherwise,
789 // the matching behavior would not be as predictable.
790 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
791 "<html>\n"
792 "<head>\n"
793 "<meta content=\"text/html; charset=ISO-8859-1\"\n"
794 "http-equiv=\"content-type\">\n"
795 "<title>All about weasels</title>\n"
796 "</head>\n"
797 "<!-- You will notice that the word \"weasel\" may be replaced"
798 " with something else -->\n"
799 "<body>\n"
800 "<h1>All about the weasel: highly compressible HTML text</h1>"
801 "<ul>\n"
802 "<li>Don\'t look a gift weasel in its mouth.</li>\n"
803 "<li>This item makes sure the next occurrence is found.</li>\n"
804 "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
805 "</ul>\n"
806 "<br>\n"
807 "</body>\n"
808 "</html>\n";
809
810 const char WeaselsToMoonpiesTest::target_without_spaces_[] =
811 "<html>\n"
812 "<head>\n"
813 "<meta content=\"text/html; charset=ISO-8859-1\"\n"
814 "http-equiv=\"content-type\">\n"
815 "<title>All about moon-pies</title>\n"
816 "</head>\n"
817 "<!-- You will notice that the word \"moon-pie\" may be replaced"
818 " with something else -->\n"
819 "<body>\n"
820 "<h1>All about the moon-pie: highly compressible HTML text</h1>"
821 "<ul>\n"
822 "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
823 "<li>This item makes sure the next occurrence is found.</li>\n"
824 "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
825 "</ul>\n"
826 "<br>\n"
827 "</body>\n"
828 "</html>\n";
829
830 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
831 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
832
833 const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
834 const char* WeaselsToMoonpiesTest::target_ = NULL;
835 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
836 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
837
FindBoilerplateAddressForCopyMode(int copy_mode) const838 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
839 int copy_mode) const {
840 size_t copy_address = 0;
841 if (default_cache_.IsSelfMode(copy_mode)) {
842 copy_address = AfterLastWeasel();
843 } else if (default_cache_.IsNearMode(copy_mode)) {
844 copy_address = DistanceBetweenLastTwoWeasels();
845 } else if (default_cache_.IsSameMode(copy_mode)) {
846 copy_address = AfterLastWeasel() % 256;
847 }
848 return static_cast<int32_t>(copy_address);
849 }
850
UpdateCopyModeForMoonpie(int copy_mode) const851 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
852 if (copy_mode == default_cache_.FirstSameMode()) {
853 return default_cache_.FirstSameMode()
854 + static_cast<int>((copied_moonpie_address_ / 256) % 3);
855 } else {
856 return copy_mode;
857 }
858 }
859
FindMoonpieAddressForCopyMode(int copy_mode) const860 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
861 int copy_mode) const {
862 size_t copy_address = 0;
863 if (default_cache_.IsHereMode(copy_mode)) {
864 copy_address = DistanceFromLastMoonpie();
865 } else if (default_cache_.IsNearMode(copy_mode)) {
866 copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
867 } else if (default_cache_.IsSameMode(copy_mode)) {
868 copy_address = copied_moonpie_address_ % 256;
869 }
870 return static_cast<int32_t>(copy_address);
871 }
872
873 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
874 // in the encoding.
CopyBoilerplateAndAddMoonpie(int copy_mode)875 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
876 EXPECT_FALSE(NoMoreMoonpies());
877 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
878 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
879 ExpectAddInstructionForStringLength(moonpie_text_);
880 ExpectDataString(moonpie_text_);
881 }
882
883 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
884 // in the encoding. The "moon-pie" text will be copied from the previously
885 // encoded target.
CopyBoilerplateAndCopyMoonpie(int copy_mode,int moonpie_copy_mode)886 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
887 int copy_mode,
888 int moonpie_copy_mode) {
889 EXPECT_FALSE(NoMoreMoonpies());
890 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
891 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
892 moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
893 ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
894 ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
895 moonpie_copy_mode);
896 copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
897 }
898
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleNoTargetMatching)899 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
900 Encode(/* interleaved = */ true, /* target matching = */ false);
901 FindNextMoonpie(false);
902 // Expect all five "weasel"s to be replaced with "moon-pie"s
903 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
904 FindNextMoonpie(false);
905 CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
906 FindNextMoonpie(false);
907 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
908 FindNextMoonpie(false);
909 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
910 FindNextMoonpie(false);
911 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
912 FindNextMoonpie(false);
913 EXPECT_TRUE(NoMoreMoonpies());
914 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
915 default_cache_.FirstNearMode());
916 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
917 VerifySizes();
918 }
919
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleWithTargetMatching)920 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
921 Encode(/* interleaved = */ true, /* target matching = */ true);
922 // Expect all five "weasel"s to be replaced with "moon-pie"s.
923 // Every "moon-pie" after the first one should be copied from the
924 // previously encoded target text.
925 FindNextMoonpie(false);
926 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
927 FindNextMoonpie(true);
928 CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
929 FindNextMoonpie(true);
930 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
931 default_cache_.FirstSameMode());
932 FindNextMoonpie(true);
933 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
934 VCD_HERE_MODE);
935 FindNextMoonpie(true);
936 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
937 default_cache_.FirstSameMode());
938 FindNextMoonpie(true);
939 EXPECT_TRUE(NoMoreMoonpies());
940 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
941 default_cache_.FirstNearMode() + 3);
942 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
943 VerifySizes();
944 }
945
946 } // anonymous namespace
947 } // namespace open-vcdiff
948