1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include <config.h>
17 #include "vcdiffengine.h"
18 #include <string.h> // memset, strlen
19 #include <algorithm>
20 #include <string>
21 #include "addrcache.h"
22 #include "blockhash.h"
23 #include "encodetable.h"
24 #include "google/output_string.h"
25 #include "rolling_hash.h"
26 #include "testing.h"
27 #include "varint_bigendian.h"
28 #include "vcdiff_defs.h"
29
30 namespace open_vcdiff {
31
32 namespace {
33
34 class VCDiffEngineTestBase : public testing::Test {
35 protected:
36 typedef std::string string;
37
38 // Some common definitions and helper functions used in the various tests
39 // for VCDiffEngine.
40 static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
41
VCDiffEngineTestBase()42 VCDiffEngineTestBase() : interleaved_(false),
43 diff_output_string_(&diff_),
44 verify_position_(0),
45 saved_total_size_position_(0),
46 saved_delta_encoding_position_(0),
47 saved_section_sizes_position_(0),
48 data_bytes_(0),
49 instruction_bytes_(0),
50 address_bytes_(0) { }
51
~VCDiffEngineTestBase()52 virtual ~VCDiffEngineTestBase() { }
53
TearDown()54 virtual void TearDown() {
55 }
56
57 // Copy string_without_spaces into newly allocated result buffer,
58 // but pad its contents with space characters so that every character
59 // in string_without_spaces corresponds to (block_size - 1)
60 // spaces in the result, followed by that character.
61 // For example:
62 // If string_without_spaces begins "The only thing"... and block_size is 4,
63 // then 3 space characters will be inserted
64 // between each letter in the result, as follows:
65 // " T h e o n l y t h i n g"...
66 // This makes testing simpler, because finding a block_size-byte match
67 // between the dictionary and target only depends on the
68 // trailing letter in each block.
69 // If no_initial_padding is true, then the first letter will not have
70 // spaces added in front of it.
MakeEachLetterABlock(const char * string_without_spaces,const char ** result,int block_size,bool no_initial_padding)71 static void MakeEachLetterABlock(const char* string_without_spaces,
72 const char** result,
73 int block_size,
74 bool no_initial_padding) {
75 const size_t length_without_spaces = strlen(string_without_spaces);
76 char* padded_text = new char[(block_size * length_without_spaces) + 1];
77 memset(padded_text, ' ', block_size * length_without_spaces);
78 char* padded_text_ptr = padded_text;
79 if (!no_initial_padding) {
80 padded_text_ptr += block_size - 1;
81 }
82 for (size_t i = 0; i < length_without_spaces; ++i) {
83 *padded_text_ptr = string_without_spaces[i];
84 padded_text_ptr += block_size;
85 }
86 *(padded_text_ptr - block_size + 1) = '\0';
87 *result = padded_text;
88 }
89
90 // These functions iterate through the decoded output and expect
91 // simple elements: bytes or variable-length integers.
ExpectByte(char byte)92 void ExpectByte(char byte) {
93 EXPECT_GT(diff_.size(), verify_position_);
94 EXPECT_EQ(byte, diff_[verify_position_]);
95 ++verify_position_;
96 }
97
ExpectVarint(int32_t expected_value)98 size_t ExpectVarint(int32_t expected_value) {
99 EXPECT_GT(diff_.size(), verify_position_);
100 const char* const original_position = &diff_[verify_position_];
101 const char* new_position = original_position;
102 const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
103 int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
104 &new_position);
105 EXPECT_LE(0, parsed_value);
106 size_t parsed_length = new_position - original_position;
107 EXPECT_EQ(expected_value, parsed_value);
108 EXPECT_EQ(expected_length, parsed_length);
109 verify_position_ += parsed_length;
110 return parsed_length;
111 }
112
ExpectSize(size_t size)113 size_t ExpectSize(size_t size) {
114 return ExpectVarint(static_cast<int32_t>(size));
115 }
116
ExpectStringLength(const char * s)117 size_t ExpectStringLength(const char* s) {
118 return ExpectSize(strlen(s));
119 }
120
SkipVarint()121 void SkipVarint() {
122 EXPECT_GT(diff_.size(), verify_position_);
123 const char* const original_position = &diff_[verify_position_];
124 const char* new_position = original_position;
125 VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
126 size_t parsed_length = new_position - original_position;
127 verify_position_ += parsed_length;
128 }
129
ExpectDataByte(char byte)130 void ExpectDataByte(char byte) {
131 ExpectByte(byte);
132 if (interleaved_) {
133 ++instruction_bytes_;
134 } else {
135 ++data_bytes_;
136 }
137 }
138
ExpectDataString(const char * expected_string)139 void ExpectDataString(const char *expected_string) {
140 const char* ptr = expected_string;
141 while (*ptr) {
142 ExpectDataByte(*ptr);
143 ++ptr;
144 }
145 }
146
ExpectDataStringWithBlockSpacing(const char * expected_string,bool trailing_spaces)147 void ExpectDataStringWithBlockSpacing(const char *expected_string,
148 bool trailing_spaces) {
149 const char* ptr = expected_string;
150 while (*ptr) {
151 for (int i = 0; i < (kBlockSize - 1); ++i) {
152 ExpectDataByte(' ');
153 }
154 ExpectDataByte(*ptr);
155 ++ptr;
156 }
157 if (trailing_spaces) {
158 for (int i = 0; i < (kBlockSize - 1); ++i) {
159 ExpectDataByte(' ');
160 }
161 }
162 }
163
ExpectInstructionByte(char byte)164 void ExpectInstructionByte(char byte) {
165 ExpectByte(byte);
166 ++instruction_bytes_;
167 }
168
ExpectInstructionVarint(int32_t value)169 void ExpectInstructionVarint(int32_t value) {
170 instruction_bytes_ += ExpectVarint(value);
171 }
172
ExpectAddressByte(char byte)173 void ExpectAddressByte(char byte) {
174 ExpectByte(byte);
175 if (interleaved_) {
176 ++instruction_bytes_;
177 } else {
178 ++address_bytes_;
179 }
180 }
181
ExpectAddressVarint(int32_t value)182 void ExpectAddressVarint(int32_t value) {
183 if (interleaved_) {
184 instruction_bytes_ += ExpectVarint(value);
185 } else {
186 address_bytes_ += ExpectVarint(value);
187 }
188 }
189
190 // The following functions leverage the fact that the encoder uses
191 // the default code table and cache sizes. They are able to search for
192 // instructions of a particular size. The logic for mapping from
193 // instruction type, mode, and size to opcode value is very different here
194 // from the logic used in encodetable.{h,cc}, so hopefully
195 // this version will help validate that the other is correct.
196 // This version uses conditional statements, while encodetable.h
197 // looks up values in a mapping table.
ExpectAddress(int32_t address,int copy_mode)198 void ExpectAddress(int32_t address, int copy_mode) {
199 if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
200 ExpectAddressVarint(address);
201 } else {
202 ExpectAddressByte(address);
203 }
204 }
205
ExpectAddInstruction(int size)206 void ExpectAddInstruction(int size) {
207 if (size <= 18) {
208 ExpectInstructionByte(0x01 + size);
209 } else {
210 ExpectInstructionByte(0x01);
211 ExpectInstructionVarint(size);
212 }
213 }
214
ExpectCopyInstruction(int size,int mode)215 void ExpectCopyInstruction(int size, int mode) {
216 if ((size >= 4) && (size <= 16)) {
217 ExpectInstructionByte(0x10 + (0x10 * mode) + size);
218 } else {
219 ExpectInstructionByte(0x13 + (0x10 * mode));
220 ExpectInstructionVarint(size);
221 }
222 }
223
ExpectAddCopyInstruction(int add_size,int copy_size,int copy_mode)224 bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
225 if (!default_cache_.IsSameMode(copy_mode) &&
226 (add_size <= 4) &&
227 (copy_size >= 4) &&
228 (copy_size <= 6)) {
229 ExpectInstructionByte(0x9C +
230 (0x0C * copy_mode) +
231 (0x03 * add_size) +
232 copy_size);
233 return true;
234 } else if (default_cache_.IsSameMode(copy_mode) &&
235 (add_size <= 4) &&
236 (copy_size == 4)) {
237 ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
238 return true;
239 } else {
240 ExpectAddInstruction(add_size);
241 return false;
242 }
243 }
244
ExpectAddInstructionForStringLength(const char * s)245 void ExpectAddInstructionForStringLength(const char* s) {
246 ExpectAddInstruction(static_cast<int>(strlen(s)));
247 }
248
249 // Call this function before beginning to iterate through the diff string
250 // using the Expect... functions.
251 // text must be NULL-terminated.
VerifyHeaderForDictionaryAndTargetText(const char * dictionary,const char * target_text)252 void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
253 const char* target_text) {
254 ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary)
255 ExpectStringLength(dictionary);
256 ExpectByte(0x00); // Source segment position: start of dictionary
257 saved_total_size_position_ = verify_position_;
258 SkipVarint(); // Length of the delta encoding
259 saved_delta_encoding_position_ = verify_position_;
260 ExpectStringLength(target_text);
261 ExpectByte(0x00); // Delta_indicator (no compression)
262 saved_section_sizes_position_ = verify_position_;
263 SkipVarint(); // length of data for ADDs and RUNs
264 SkipVarint(); // length of instructions section
265 SkipVarint(); // length of addresses for COPYs
266 }
267
268 // Call this function before beginning to iterating through the entire
269 // diff string using the Expect... functions. It makes sure that the
270 // size totals in the window header match the number of bytes that
271 // were parsed.
VerifySizes()272 void VerifySizes() {
273 EXPECT_EQ(verify_position_, diff_.size());
274 const size_t delta_encoding_size = verify_position_ -
275 saved_delta_encoding_position_;
276 verify_position_ = saved_total_size_position_;
277 ExpectSize(delta_encoding_size);
278 verify_position_ = saved_section_sizes_position_;
279 ExpectSize(data_bytes_);
280 ExpectSize(instruction_bytes_);
281 ExpectSize(address_bytes_);
282 }
283
284 bool interleaved_;
285 string diff_;
286 OutputString<string> diff_output_string_;
287 size_t verify_position_;
288 size_t saved_total_size_position_;
289 size_t saved_delta_encoding_position_;
290 size_t saved_section_sizes_position_;
291 size_t data_bytes_;
292 size_t instruction_bytes_;
293 size_t address_bytes_;
294 VCDiffAddressCache default_cache_; // Used for finding mode values
295 };
296
297 class VCDiffEngineTest : public VCDiffEngineTestBase {
298 protected:
VCDiffEngineTest()299 VCDiffEngineTest() :
300 engine_(dictionary_, strlen(dictionary_)) {
301 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
302 }
303
~VCDiffEngineTest()304 virtual ~VCDiffEngineTest() { }
305
306
SetUpTestCase()307 static void SetUpTestCase() {
308 MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
309 kBlockSize, false);
310 MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
311 }
312
TearDownTestCase()313 static void TearDownTestCase() {
314 delete[] dictionary_;
315 delete[] target_;
316 }
317
EncodeNothing(bool interleaved,bool target_matching)318 void EncodeNothing(bool interleaved, bool target_matching) {
319 interleaved_ = interleaved;
320 VCDiffCodeTableWriter coder(interleaved);
321 coder.Init(engine_.dictionary_size());
322 engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
323 EXPECT_TRUE(diff_.empty());
324 }
325
326 // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)327 void EncodeText(const char* text, bool interleaved, bool target_matching) {
328 interleaved_ = interleaved;
329 VCDiffCodeTableWriter coder(interleaved);
330 coder.Init(engine_.dictionary_size());
331 engine_.Encode(text,
332 strlen(text),
333 target_matching,
334 &diff_output_string_,
335 &coder);
336 }
337
Encode(bool interleaved,bool target_matching)338 void Encode(bool interleaved, bool target_matching) {
339 EncodeText(target_, interleaved, target_matching);
340 VerifyHeader();
341 }
342
VerifyHeader()343 void VerifyHeader() {
344 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
345 }
346
347 static const char dictionary_without_spaces_[];
348 static const char target_without_spaces_[];
349
350 static const char* dictionary_;
351 static const char* target_;
352
353 const VCDiffEngine engine_;
354 };
355
356 #ifdef GTEST_HAS_DEATH_TEST
357 typedef VCDiffEngineTest VCDiffEngineDeathTest;
358 #endif // GTEST_HAS_DEATH_TEST
359
360 const char VCDiffEngineTest::dictionary_without_spaces_[] =
361 "The only thing we have to fear is fear itself";
362
363 const char VCDiffEngineTest::target_without_spaces_[] =
364 "What we hear is fearsome";
365
366 const char* VCDiffEngineTest::dictionary_ = NULL;
367 const char* VCDiffEngineTest::target_ = NULL;
368
369 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(VCDiffEngineDeathTest,InitCalledTwice)370 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
371 EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
372 "twice");
373 }
374 #endif // GTEST_HAS_DEATH_TEST
375
TEST_F(VCDiffEngineTest,EngineEncodeNothing)376 TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
377 EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
378 }
379
TEST_F(VCDiffEngineTest,EngineEncodeNothingInterleaved)380 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
381 EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
382 }
383
TEST_F(VCDiffEngineTest,EngineEncodeNothingTarget)384 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
385 EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
386 }
387
TEST_F(VCDiffEngineTest,EngineEncodeNothingTargetInterleaved)388 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
389 EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
390 }
391
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlock)392 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
393 const char* small_text = " ";
394 EncodeText(small_text,
395 /* interleaved = */ false,
396 /* target matching = */ false);
397 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
398 // Data for ADDs
399 ExpectDataString(small_text);
400 // Instructions and sizes
401 ExpectAddInstructionForStringLength(small_text);
402 }
403
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlockInterleaved)404 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
405 const char* small_text = " ";
406 EncodeText(small_text,
407 /* interleaved = */ true,
408 /* target matching = */ false);
409 VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
410 // Interleaved section
411 ExpectAddInstructionForStringLength(small_text);
412 ExpectDataString(small_text);
413 }
414
TEST_F(VCDiffEngineTest,EngineEncodeSampleText)415 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
416 Encode(/* interleaved = */ false, /* target matching = */ false);
417 // Data for ADDs
418 ExpectDataStringWithBlockSpacing("W", false);
419 ExpectDataByte('t');
420 ExpectDataByte('s');
421 ExpectDataByte('m');
422 // Instructions and sizes
423 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
424 VCD_SELF_MODE)) {
425 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
426 }
427 ExpectAddInstruction(1);
428 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
429 ExpectCopyInstruction(11 * kBlockSize,
430 default_cache_.FirstNearMode());
431 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
432 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
433 }
434 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
435 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
436 }
437 // Addresses for COPY
438 ExpectAddressVarint(18 * kBlockSize); // "ha"
439 ExpectAddressVarint(14 * kBlockSize); // " we h"
440 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
441 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
442 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
443 VerifySizes();
444 }
445
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleaved)446 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
447 Encode(/* interleaved = */ true, /* target matching = */ false);
448 // Interleaved section
449 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
450 VCD_SELF_MODE)) {
451 ExpectDataStringWithBlockSpacing("W", false);
452 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
453 } else {
454 ExpectDataStringWithBlockSpacing("W", false);
455 }
456 ExpectAddressVarint(18 * kBlockSize); // "ha"
457 ExpectAddInstruction(1);
458 ExpectDataByte('t');
459 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
460 ExpectAddressVarint(14 * kBlockSize); // " we h"
461 ExpectCopyInstruction(11 * kBlockSize,
462 default_cache_.FirstNearMode());
463 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
464 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
465 ExpectDataByte('s');
466 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
467 } else {
468 ExpectDataByte('s');
469 }
470 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
471 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
472 ExpectDataByte('m');
473 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
474 } else {
475 ExpectDataByte('m');
476 }
477 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
478 VerifySizes();
479 }
480
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextWithTargetMatching)481 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
482 Encode(/* interleaved = */ false, /* target matching = */ true);
483 // Data for ADDs
484 ExpectDataStringWithBlockSpacing("W", false);
485 ExpectDataByte('t');
486 ExpectDataByte('s');
487 ExpectDataByte('m');
488 // Instructions and sizes
489 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
490 VCD_SELF_MODE)) {
491 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
492 }
493 ExpectAddInstruction(1);
494 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
495 ExpectCopyInstruction(11 * kBlockSize,
496 default_cache_.FirstNearMode());
497 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
498 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
499 }
500 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
501 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
502 }
503 // Addresses for COPY
504 ExpectAddressVarint(18 * kBlockSize); // "ha"
505 ExpectAddressVarint(14 * kBlockSize); // " we h"
506 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
507 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
508 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
509 VerifySizes();
510 }
511
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleavedWithTargetMatching)512 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
513 Encode(/* interleaved = */ true, /* target matching = */ true);
514 // Interleaved section
515 if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
516 VCD_SELF_MODE)) {
517 ExpectDataStringWithBlockSpacing("W", false);
518 ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
519 } else {
520 ExpectDataStringWithBlockSpacing("W", false);
521 }
522 ExpectAddressVarint(18 * kBlockSize); // "ha"
523 ExpectAddInstruction(1);
524 ExpectDataByte('t');
525 ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
526 ExpectAddressVarint(14 * kBlockSize); // " we h"
527 ExpectCopyInstruction(11 * kBlockSize,
528 default_cache_.FirstNearMode());
529 ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
530 if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
531 ExpectDataByte('s');
532 ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
533 } else {
534 ExpectDataByte('s');
535 }
536 ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
537 if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
538 ExpectDataByte('m');
539 ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
540 } else {
541 ExpectDataByte('m');
542 }
543 ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
544 VerifySizes();
545 }
546
547 // This test case takes a dictionary containing several instances of the string
548 // "weasel", and a target string which is identical to the dictionary
549 // except that all instances of "weasel" have been replaced with the string
550 // "moon-pie". It tests that COPY instructions are generated for all
551 // boilerplate text (that is, the text between the "moon-pie" instances in
552 // the target) and, if target matching is enabled, that each instance of
553 // "moon-pie" (except the first one) is encoded using a COPY instruction
554 // rather than an ADD.
555 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
556 protected:
557 // kCompressibleTestBlockSize:
558 // The size of the block to create for each letter in the
559 // dictionary and search string for the "compressible text" test.
560 // See MakeEachLetterABlock, below.
561 // If we use kCompressibleTestBlockSize = kBlockSize, then the
562 // encoder will find one match per unique letter in the HTML text.
563 // There are too many examples of "<" in the text for the encoder
564 // to iterate through them all, and some matches are not found.
565 // If we use kCompressibleTextBlockSize = 1, then the boilerplate
566 // text between "weasel" strings in the dictionary and "moon-pie"
567 // strings in the target may not be long enough to be found by
568 // the encoder's block-hash algorithm. A good value, that will give
569 // reproducible results across all block sizes, will be somewhere
570 // in between these extremes.
571 static const int kCompressibleTestBlockSize = kBlockSize / 4;
572 static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
573
WeaselsToMoonpiesTest()574 WeaselsToMoonpiesTest() :
575 engine_(dictionary_, strlen(dictionary_)),
576 match_index_(0),
577 search_dictionary_(dictionary_, strlen(dictionary_)),
578 copied_moonpie_address_(0) {
579 EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
580 weasel_positions_[0] = 0;
581 after_weasel_[0] = 0;
582 moonpie_positions_[0] = 0;
583 after_moonpie_[0] = 0;
584 }
585
~WeaselsToMoonpiesTest()586 virtual ~WeaselsToMoonpiesTest() { }
587
SetUpTestCase()588 static void SetUpTestCase() {
589 MakeEachLetterABlock(dictionary_without_spaces_,
590 &dictionary_,
591 kCompressibleTestBlockSize,
592 false);
593 MakeEachLetterABlock(target_without_spaces_,
594 &target_,
595 kCompressibleTestBlockSize,
596 false);
597 MakeEachLetterABlock(weasel_text_without_spaces_,
598 &weasel_text_,
599 kCompressibleTestBlockSize,
600 true);
601 MakeEachLetterABlock(moonpie_text_without_spaces_,
602 &moonpie_text_,
603 kCompressibleTestBlockSize,
604 true);
605 }
606
TearDownTestCase()607 static void TearDownTestCase() {
608 delete[] dictionary_;
609 delete[] target_;
610 delete[] weasel_text_;
611 delete[] moonpie_text_;
612 }
613
614 // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)615 void EncodeText(const char* text, bool interleaved, bool target_matching) {
616 interleaved_ = interleaved;
617 VCDiffCodeTableWriter coder(interleaved);
618 coder.Init(engine_.dictionary_size());
619 engine_.Encode(text,
620 strlen(text),
621 target_matching,
622 &diff_output_string_,
623 &coder);
624 }
625
Encode(bool interleaved,bool target_matching)626 void Encode(bool interleaved, bool target_matching) {
627 EncodeText(target_, interleaved, target_matching);
628 VerifyHeader();
629 }
630
VerifyHeader()631 void VerifyHeader() {
632 VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
633 }
634
ExpectCopyForSize(size_t size,int mode)635 void ExpectCopyForSize(size_t size, int mode) {
636 ExpectCopyInstruction(static_cast<int>(size), mode);
637 }
638
ExpectAddForSize(size_t size)639 void ExpectAddForSize(size_t size) {
640 ExpectAddInstruction(static_cast<int>(size));
641 }
642
ExpectAddressVarintForSize(size_t value)643 void ExpectAddressVarintForSize(size_t value) {
644 ExpectAddressVarint(static_cast<int32_t>(value));
645 }
646
FindNextMoonpie(bool include_trailing_spaces)647 void FindNextMoonpie(bool include_trailing_spaces) {
648 ++match_index_;
649 SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
650 AfterLastWeasel()));
651 if (CurrentWeaselPosition() == string::npos) {
652 SetCurrentMoonpiePosition(string::npos);
653 } else {
654 SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
655 + strlen(weasel_text_)
656 + (include_trailing_spaces ?
657 kTrailingSpaces : 0));
658 SetCurrentMoonpiePosition(AfterLastMoonpie()
659 + CurrentBoilerplateLength());
660 SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
661 + strlen(moonpie_text_)
662 + (include_trailing_spaces ?
663 kTrailingSpaces : 0));
664 }
665 }
NoMoreMoonpies() const666 bool NoMoreMoonpies() const {
667 return CurrentMoonpiePosition() == string::npos;
668 }
CurrentWeaselPosition() const669 size_t CurrentWeaselPosition() const {
670 return weasel_positions_[match_index_];
671 }
LastWeaselPosition() const672 size_t LastWeaselPosition() const {
673 return weasel_positions_[match_index_ - 1];
674 }
CurrentMoonpiePosition() const675 size_t CurrentMoonpiePosition() const {
676 return moonpie_positions_[match_index_];
677 }
LastMoonpiePosition() const678 size_t LastMoonpiePosition() const {
679 return moonpie_positions_[match_index_ - 1];
680 }
AfterLastWeasel() const681 size_t AfterLastWeasel() const {
682 CHECK_GE(match_index_, 1);
683 return after_weasel_[match_index_ - 1];
684 }
AfterPreviousWeasel() const685 size_t AfterPreviousWeasel() const {
686 CHECK_GE(match_index_, 2);
687 return after_weasel_[match_index_ - 2];
688 }
AfterLastMoonpie() const689 size_t AfterLastMoonpie() const {
690 CHECK_GE(match_index_, 1);
691 return after_moonpie_[match_index_ - 1];
692 }
AfterPreviousMoonpie() const693 size_t AfterPreviousMoonpie() const {
694 CHECK_GE(match_index_, 2);
695 return after_moonpie_[match_index_ - 2];
696 }
697
SetCurrentWeaselPosition(size_t value)698 void SetCurrentWeaselPosition(size_t value) {
699 weasel_positions_[match_index_] = value;
700 }
SetCurrentAfterWeaselPosition(size_t value)701 void SetCurrentAfterWeaselPosition(size_t value) {
702 after_weasel_[match_index_] = value;
703 }
SetCurrentMoonpiePosition(size_t value)704 void SetCurrentMoonpiePosition(size_t value) {
705 moonpie_positions_[match_index_] = value;
706 }
SetCurrentAfterMoonpiePosition(size_t value)707 void SetCurrentAfterMoonpiePosition(size_t value) {
708 after_moonpie_[match_index_] = value;
709 }
710
711 // Find the length of the text in between the "weasel" strings in the
712 // compressible dictionary, which is the same as the text between
713 // the "moon-pie" strings in the compressible target.
CurrentBoilerplateLength() const714 size_t CurrentBoilerplateLength() const {
715 CHECK_GE(match_index_, 1);
716 return CurrentWeaselPosition() - AfterLastWeasel();
717 }
DistanceFromLastWeasel() const718 size_t DistanceFromLastWeasel() const {
719 CHECK_GE(match_index_, 1);
720 return CurrentWeaselPosition() - LastWeaselPosition();
721 }
DistanceFromLastMoonpie() const722 size_t DistanceFromLastMoonpie() const {
723 CHECK_GE(match_index_, 1);
724 return CurrentMoonpiePosition() - LastMoonpiePosition();
725 }
DistanceBetweenLastTwoWeasels() const726 size_t DistanceBetweenLastTwoWeasels() const {
727 CHECK_GE(match_index_, 2);
728 return AfterLastWeasel() - AfterPreviousWeasel();
729 }
DistanceBetweenLastTwoMoonpies() const730 size_t DistanceBetweenLastTwoMoonpies() const {
731 CHECK_GE(match_index_, 2);
732 return AfterLastMoonpie() - AfterPreviousMoonpie();
733 }
734
735 int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
736 int UpdateCopyModeForMoonpie(int copy_mode) const;
737 int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
738
739 void CopyBoilerplateAndAddMoonpie(int copy_mode);
740 void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
741
742 static const char dictionary_without_spaces_[];
743 static const char target_without_spaces_[];
744 static const char weasel_text_without_spaces_[];
745 static const char moonpie_text_without_spaces_[];
746
747 static const char* dictionary_;
748 static const char* target_;
749 static const char* weasel_text_;
750 static const char* moonpie_text_;
751
752 const VCDiffEngine engine_;
753 size_t weasel_positions_[128];
754 size_t after_weasel_[128];
755 size_t moonpie_positions_[128];
756 size_t after_moonpie_[128];
757 int match_index_;
758 string search_dictionary_;
759 size_t copied_moonpie_address_;
760 };
761
762 // Care is taken in the formulation of the dictionary
763 // to ensure that the surrounding letters do not match; for example,
764 // there are not two instances of the string "weasels". Otherwise,
765 // the matching behavior would not be as predictable.
766 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
767 "<html>\n"
768 "<head>\n"
769 "<meta content=\"text/html; charset=ISO-8859-1\"\n"
770 "http-equiv=\"content-type\">\n"
771 "<title>All about weasels</title>\n"
772 "</head>\n"
773 "<!-- You will notice that the word \"weasel\" may be replaced"
774 " with something else -->\n"
775 "<body>\n"
776 "<h1>All about the weasel: highly compressible HTML text</h1>"
777 "<ul>\n"
778 "<li>Don\'t look a gift weasel in its mouth.</li>\n"
779 "<li>This item makes sure the next occurrence is found.</li>\n"
780 "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
781 "</ul>\n"
782 "<br>\n"
783 "</body>\n"
784 "</html>\n";
785
786 const char WeaselsToMoonpiesTest::target_without_spaces_[] =
787 "<html>\n"
788 "<head>\n"
789 "<meta content=\"text/html; charset=ISO-8859-1\"\n"
790 "http-equiv=\"content-type\">\n"
791 "<title>All about moon-pies</title>\n"
792 "</head>\n"
793 "<!-- You will notice that the word \"moon-pie\" may be replaced"
794 " with something else -->\n"
795 "<body>\n"
796 "<h1>All about the moon-pie: highly compressible HTML text</h1>"
797 "<ul>\n"
798 "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
799 "<li>This item makes sure the next occurrence is found.</li>\n"
800 "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
801 "</ul>\n"
802 "<br>\n"
803 "</body>\n"
804 "</html>\n";
805
806 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
807 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
808
809 const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
810 const char* WeaselsToMoonpiesTest::target_ = NULL;
811 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
812 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
813
FindBoilerplateAddressForCopyMode(int copy_mode) const814 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
815 int copy_mode) const {
816 size_t copy_address = 0;
817 if (default_cache_.IsSelfMode(copy_mode)) {
818 copy_address = AfterLastWeasel();
819 } else if (default_cache_.IsNearMode(copy_mode)) {
820 copy_address = DistanceBetweenLastTwoWeasels();
821 } else if (default_cache_.IsSameMode(copy_mode)) {
822 copy_address = AfterLastWeasel() % 256;
823 }
824 return static_cast<int32_t>(copy_address);
825 }
826
UpdateCopyModeForMoonpie(int copy_mode) const827 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
828 if (copy_mode == default_cache_.FirstSameMode()) {
829 return default_cache_.FirstSameMode()
830 + static_cast<int>((copied_moonpie_address_ / 256) % 3);
831 } else {
832 return copy_mode;
833 }
834 }
835
FindMoonpieAddressForCopyMode(int copy_mode) const836 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
837 int copy_mode) const {
838 size_t copy_address = 0;
839 if (default_cache_.IsHereMode(copy_mode)) {
840 copy_address = DistanceFromLastMoonpie();
841 } else if (default_cache_.IsNearMode(copy_mode)) {
842 copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
843 } else if (default_cache_.IsSameMode(copy_mode)) {
844 copy_address = copied_moonpie_address_ % 256;
845 }
846 return static_cast<int32_t>(copy_address);
847 }
848
849 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
850 // in the encoding.
CopyBoilerplateAndAddMoonpie(int copy_mode)851 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
852 EXPECT_FALSE(NoMoreMoonpies());
853 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
854 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
855 ExpectAddInstructionForStringLength(moonpie_text_);
856 ExpectDataString(moonpie_text_);
857 }
858
859 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
860 // in the encoding. The "moon-pie" text will be copied from the previously
861 // encoded target.
CopyBoilerplateAndCopyMoonpie(int copy_mode,int moonpie_copy_mode)862 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
863 int copy_mode,
864 int moonpie_copy_mode) {
865 EXPECT_FALSE(NoMoreMoonpies());
866 ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
867 ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
868 moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
869 ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
870 ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
871 moonpie_copy_mode);
872 copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
873 }
874
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleNoTargetMatching)875 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
876 Encode(/* interleaved = */ true, /* target matching = */ false);
877 FindNextMoonpie(false);
878 // Expect all five "weasel"s to be replaced with "moon-pie"s
879 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
880 FindNextMoonpie(false);
881 CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
882 FindNextMoonpie(false);
883 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
884 FindNextMoonpie(false);
885 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
886 FindNextMoonpie(false);
887 CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
888 FindNextMoonpie(false);
889 EXPECT_TRUE(NoMoreMoonpies());
890 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
891 default_cache_.FirstNearMode());
892 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
893 VerifySizes();
894 }
895
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleWithTargetMatching)896 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
897 Encode(/* interleaved = */ true, /* target matching = */ true);
898 // Expect all five "weasel"s to be replaced with "moon-pie"s.
899 // Every "moon-pie" after the first one should be copied from the
900 // previously encoded target text.
901 FindNextMoonpie(false);
902 CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
903 FindNextMoonpie(true);
904 CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
905 FindNextMoonpie(true);
906 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
907 default_cache_.FirstSameMode());
908 FindNextMoonpie(true);
909 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
910 VCD_HERE_MODE);
911 FindNextMoonpie(true);
912 CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
913 default_cache_.FirstSameMode());
914 FindNextMoonpie(true);
915 EXPECT_TRUE(NoMoreMoonpies());
916 ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
917 default_cache_.FirstNearMode() + 3);
918 ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
919 VerifySizes();
920 }
921
922 } // anonymous namespace
923 } // namespace open-vcdiff
924