• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include <config.h>
17 #include "vcdiffengine.h"
18 #include <string.h>  // memset, strlen
19 #include <algorithm>
20 #include <string>
21 #include <vector>
22 #include "addrcache.h"
23 #include "blockhash.h"
24 #include "encodetable.h"
25 #include "google/output_string.h"
26 #include "rolling_hash.h"
27 #include "testing.h"
28 #include "varint_bigendian.h"
29 #include "vcdiff_defs.h"
30 
31 namespace open_vcdiff {
32 
33 namespace {
34 
35 class VCDiffEngineTestBase : public testing::Test {
36  protected:
37   typedef std::string string;
38 
39   // Some common definitions and helper functions used in the various tests
40   // for VCDiffEngine.
41   static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
42 
VCDiffEngineTestBase()43   VCDiffEngineTestBase() : interleaved_(false),
44                            diff_output_string_(&diff_),
45                            verify_position_(0),
46                            saved_total_size_position_(0),
47                            saved_delta_encoding_position_(0),
48                            saved_section_sizes_position_(0),
49                            data_bytes_(0),
50                            instruction_bytes_(0),
51                            address_bytes_(0) { }
52 
~VCDiffEngineTestBase()53   virtual ~VCDiffEngineTestBase() { }
54 
TearDown()55   virtual void TearDown() {
56     VerifyMatchCounts();
57   }
58 
59   // Copy string_without_spaces into newly allocated result buffer,
60   // but pad its contents with space characters so that every character
61   // in string_without_spaces corresponds to (block_size - 1)
62   // spaces in the result, followed by that character.
63   // For example:
64   // If string_without_spaces begins "The only thing"... and block_size is 4,
65   // then 3 space characters will be inserted
66   // between each letter in the result, as follows:
67   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
68   // This makes testing simpler, because finding a block_size-byte match
69   // between the dictionary and target only depends on the
70   // trailing letter in each block.
71   // If no_initial_padding is true, then the first letter will not have
72   // spaces added in front of it.
MakeEachLetterABlock(const char * string_without_spaces,const char ** result,int block_size,bool no_initial_padding)73   static void MakeEachLetterABlock(const char* string_without_spaces,
74                                    const char** result,
75                                    int block_size,
76                                    bool no_initial_padding) {
77     const size_t length_without_spaces = strlen(string_without_spaces);
78     char* padded_text = new char[(block_size * length_without_spaces) + 1];
79     memset(padded_text, ' ', block_size * length_without_spaces);
80     char* padded_text_ptr = padded_text;
81     if (!no_initial_padding) {
82       padded_text_ptr += block_size - 1;
83     }
84     for (size_t i = 0; i < length_without_spaces; ++i) {
85       *padded_text_ptr = string_without_spaces[i];
86       padded_text_ptr += block_size;
87     }
88     *(padded_text_ptr - block_size + 1)  = '\0';
89     *result = padded_text;
90   }
91 
92   // These functions iterate through the decoded output and expect
93   // simple elements: bytes or variable-length integers.
ExpectByte(char byte)94   void ExpectByte(char byte) {
95     EXPECT_GT(diff_.size(), verify_position_);
96     EXPECT_EQ(byte, diff_[verify_position_]);
97     ++verify_position_;
98   }
99 
ExpectVarint(int32_t expected_value)100   size_t ExpectVarint(int32_t expected_value) {
101     EXPECT_GT(diff_.size(), verify_position_);
102     const char* const original_position = &diff_[verify_position_];
103     const char* new_position = original_position;
104     const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
105     int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
106                                                     &new_position);
107     EXPECT_LE(0, parsed_value);
108     size_t parsed_length = new_position - original_position;
109     EXPECT_EQ(expected_value, parsed_value);
110     EXPECT_EQ(expected_length, parsed_length);
111     verify_position_ += parsed_length;
112     return parsed_length;
113   }
114 
ExpectSize(size_t size)115   size_t ExpectSize(size_t size) {
116     return ExpectVarint(static_cast<int32_t>(size));
117   }
118 
ExpectStringLength(const char * s)119   size_t ExpectStringLength(const char* s) {
120     return ExpectSize(strlen(s));
121   }
122 
SkipVarint()123   void SkipVarint() {
124     EXPECT_GT(diff_.size(), verify_position_);
125     const char* const original_position = &diff_[verify_position_];
126     const char* new_position = original_position;
127     VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
128     size_t parsed_length = new_position - original_position;
129     verify_position_ += parsed_length;
130   }
131 
ExpectDataByte(char byte)132   void ExpectDataByte(char byte) {
133     ExpectByte(byte);
134     if (interleaved_) {
135       ++instruction_bytes_;
136     } else {
137       ++data_bytes_;
138     }
139   }
140 
ExpectDataString(const char * expected_string)141   void ExpectDataString(const char *expected_string) {
142     const char* ptr = expected_string;
143     while (*ptr) {
144       ExpectDataByte(*ptr);
145       ++ptr;
146     }
147   }
148 
ExpectDataStringWithBlockSpacing(const char * expected_string,bool trailing_spaces)149   void ExpectDataStringWithBlockSpacing(const char *expected_string,
150                                         bool trailing_spaces) {
151     const char* ptr = expected_string;
152     while (*ptr) {
153       for (int i = 0; i < (kBlockSize - 1); ++i) {
154         ExpectDataByte(' ');
155       }
156       ExpectDataByte(*ptr);
157       ++ptr;
158     }
159     if (trailing_spaces) {
160       for (int i = 0; i < (kBlockSize - 1); ++i) {
161         ExpectDataByte(' ');
162       }
163     }
164   }
165 
ExpectInstructionByte(char byte)166   void ExpectInstructionByte(char byte) {
167     ExpectByte(byte);
168     ++instruction_bytes_;
169   }
170 
ExpectInstructionVarint(int32_t value)171   void ExpectInstructionVarint(int32_t value) {
172     instruction_bytes_ += ExpectVarint(value);
173   }
174 
ExpectAddressByte(char byte)175   void ExpectAddressByte(char byte) {
176     ExpectByte(byte);
177     if (interleaved_) {
178       ++instruction_bytes_;
179     } else {
180       ++address_bytes_;
181     }
182   }
183 
ExpectAddressVarint(int32_t value)184   void ExpectAddressVarint(int32_t value) {
185     if (interleaved_) {
186       instruction_bytes_ += ExpectVarint(value);
187     } else {
188       address_bytes_ += ExpectVarint(value);
189     }
190   }
191 
192   // The following functions leverage the fact that the encoder uses
193   // the default code table and cache sizes.  They are able to search for
194   // instructions of a particular size.  The logic for mapping from
195   // instruction type, mode, and size to opcode value is very different here
196   // from the logic used in encodetable.{h,cc}, so hopefully
197   // this version will help validate that the other is correct.
198   // This version uses conditional statements, while encodetable.h
199   // looks up values in a mapping table.
ExpectAddress(int32_t address,int copy_mode)200   void ExpectAddress(int32_t address, int copy_mode) {
201     if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
202       ExpectAddressVarint(address);
203     } else {
204       ExpectAddressByte(address);
205     }
206   }
207 
ExpectAddInstruction(int size)208   void ExpectAddInstruction(int size) {
209     if (size <= 18) {
210       ExpectInstructionByte(0x01 + size);
211     } else {
212       ExpectInstructionByte(0x01);
213       ExpectInstructionVarint(size);
214     }
215   }
216 
ExpectCopyInstruction(int size,int mode)217   void ExpectCopyInstruction(int size, int mode) {
218     if ((size >= 4) && (size <= 16)) {
219       ExpectInstructionByte(0x10 + (0x10 * mode) + size);
220     } else {
221       ExpectInstructionByte(0x13 + (0x10 * mode));
222       ExpectInstructionVarint(size);
223     }
224     ExpectMatch(size);
225   }
226 
ExpectAddCopyInstruction(int add_size,int copy_size,int copy_mode)227   bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
228     if (!default_cache_.IsSameMode(copy_mode) &&
229         (add_size <= 4) &&
230         (copy_size >= 4) &&
231         (copy_size <= 6)) {
232       ExpectInstructionByte(0x9C +
233                             (0x0C * copy_mode) +
234                             (0x03 * add_size) +
235                             copy_size);
236       ExpectMatch(copy_size);
237       return true;
238     } else if (default_cache_.IsSameMode(copy_mode) &&
239                (add_size <= 4) &&
240                (copy_size == 4)) {
241       ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
242       ExpectMatch(copy_size);
243       return true;
244     } else {
245       ExpectAddInstruction(add_size);
246       return false;
247     }
248   }
249 
ExpectAddInstructionForStringLength(const char * s)250   void ExpectAddInstructionForStringLength(const char* s) {
251     ExpectAddInstruction(static_cast<int>(strlen(s)));
252   }
253 
254   // Call this function before beginning to iterate through the diff string
255   // using the Expect... functions.
256   // text must be NULL-terminated.
VerifyHeaderForDictionaryAndTargetText(const char * dictionary,const char * target_text)257   void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
258                                               const char* target_text) {
259     ExpectByte(0x01);  // Win_Indicator: VCD_SOURCE (dictionary)
260     ExpectStringLength(dictionary);
261     ExpectByte(0x00);  // Source segment position: start of dictionary
262     saved_total_size_position_ = verify_position_;
263     SkipVarint();  // Length of the delta encoding
264     saved_delta_encoding_position_ = verify_position_;
265     ExpectStringLength(target_text);
266     ExpectByte(0x00);  // Delta_indicator (no compression)
267     saved_section_sizes_position_ = verify_position_;
268     SkipVarint();  // length of data for ADDs and RUNs
269     SkipVarint();  // length of instructions section
270     SkipVarint();  // length of addresses for COPYs
271   }
272 
273   // Call this function before beginning to iterating through the entire
274   // diff string using the Expect... functions.  It makes sure that the
275   // size totals in the window header match the number of bytes that
276   // were parsed.
VerifySizes()277   void VerifySizes() {
278     EXPECT_EQ(verify_position_, diff_.size());
279     const size_t delta_encoding_size = verify_position_ -
280                                        saved_delta_encoding_position_;
281     verify_position_ = saved_total_size_position_;
282     ExpectSize(delta_encoding_size);
283     verify_position_ = saved_section_sizes_position_;
284     ExpectSize(data_bytes_);
285     ExpectSize(instruction_bytes_);
286     ExpectSize(address_bytes_);
287   }
288 
ExpectMatch(size_t match_size)289   void ExpectMatch(size_t match_size) {
290     if (match_size >= expected_match_counts_.size()) {
291       // Be generous to avoid resizing again
292       expected_match_counts_.resize(match_size * 2, 0);
293     }
294     ++expected_match_counts_[match_size];
295   }
296 
VerifyMatchCounts()297   void VerifyMatchCounts() {
298     EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
299                            expected_match_counts_.end(),
300                            actual_match_counts_.begin()));
301   }
302 
303   bool interleaved_;
304   string diff_;
305   OutputString<string> diff_output_string_;
306   size_t verify_position_;
307   size_t saved_total_size_position_;
308   size_t saved_delta_encoding_position_;
309   size_t saved_section_sizes_position_;
310   size_t data_bytes_;
311   size_t instruction_bytes_;
312   size_t address_bytes_;
313   VCDiffAddressCache default_cache_;  // Used for finding mode values
314   std::vector<int> expected_match_counts_;
315   std::vector<int> actual_match_counts_;
316 };
317 
318 class VCDiffEngineTest : public VCDiffEngineTestBase {
319  protected:
VCDiffEngineTest()320   VCDiffEngineTest() :
321       engine_(dictionary_, strlen(dictionary_)) {
322     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
323   }
324 
~VCDiffEngineTest()325   virtual ~VCDiffEngineTest() { }
326 
327 
SetUpTestCase()328   static void SetUpTestCase() {
329     MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
330                          kBlockSize, false);
331     MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
332   }
333 
TearDownTestCase()334   static void TearDownTestCase() {
335     delete[] dictionary_;
336     delete[] target_;
337   }
338 
EncodeNothing(bool interleaved,bool target_matching)339   void EncodeNothing(bool interleaved, bool target_matching) {
340     interleaved_ = interleaved;
341     VCDiffCodeTableWriter coder(interleaved);
342     coder.Init(engine_.dictionary_size());
343     engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
344     EXPECT_TRUE(diff_.empty());
345     actual_match_counts_ = coder.match_counts();
346   }
347 
348   // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)349   void EncodeText(const char* text, bool interleaved, bool target_matching) {
350     interleaved_ = interleaved;
351     VCDiffCodeTableWriter coder(interleaved);
352     coder.Init(engine_.dictionary_size());
353     engine_.Encode(text,
354                    strlen(text),
355                    target_matching,
356                    &diff_output_string_,
357                    &coder);
358     actual_match_counts_ = coder.match_counts();
359   }
360 
Encode(bool interleaved,bool target_matching)361   void Encode(bool interleaved, bool target_matching) {
362     EncodeText(target_, interleaved, target_matching);
363     VerifyHeader();
364   }
365 
VerifyHeader()366   void VerifyHeader() {
367     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
368   }
369 
370   static const char dictionary_without_spaces_[];
371   static const char target_without_spaces_[];
372 
373   static const char* dictionary_;
374   static const char* target_;
375 
376   const VCDiffEngine engine_;
377 };
378 
379 #ifdef GTEST_HAS_DEATH_TEST
380 typedef VCDiffEngineTest VCDiffEngineDeathTest;
381 #endif  // GTEST_HAS_DEATH_TEST
382 
383 const char VCDiffEngineTest::dictionary_without_spaces_[] =
384     "The only thing we have to fear is fear itself";
385 
386 const char VCDiffEngineTest::target_without_spaces_[] =
387     "What we hear is fearsome";
388 
389 const char* VCDiffEngineTest::dictionary_ = NULL;
390 const char* VCDiffEngineTest::target_ = NULL;
391 
392 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(VCDiffEngineDeathTest,InitCalledTwice)393 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
394   EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
395                      "twice");
396 }
397 #endif  // GTEST_HAS_DEATH_TEST
398 
TEST_F(VCDiffEngineTest,EngineEncodeNothing)399 TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
400   EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
401 }
402 
TEST_F(VCDiffEngineTest,EngineEncodeNothingInterleaved)403 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
404   EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
405 }
406 
TEST_F(VCDiffEngineTest,EngineEncodeNothingTarget)407 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
408   EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
409 }
410 
TEST_F(VCDiffEngineTest,EngineEncodeNothingTargetInterleaved)411 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
412   EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
413 }
414 
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlock)415 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
416   const char* small_text = "  ";
417   EncodeText(small_text,
418              /* interleaved = */ false,
419              /* target matching = */ false);
420   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
421   // Data for ADDs
422   ExpectDataString(small_text);
423   // Instructions and sizes
424   ExpectAddInstructionForStringLength(small_text);
425 }
426 
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlockInterleaved)427 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
428   const char* small_text = "  ";
429   EncodeText(small_text,
430              /* interleaved = */ true,
431              /* target matching = */ false);
432   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
433   // Interleaved section
434   ExpectAddInstructionForStringLength(small_text);
435   ExpectDataString(small_text);
436 }
437 
TEST_F(VCDiffEngineTest,EngineEncodeSampleText)438 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
439   Encode(/* interleaved = */ false, /* target matching = */ false);
440   // Data for ADDs
441   ExpectDataStringWithBlockSpacing("W", false);
442   ExpectDataByte('t');
443   ExpectDataByte('s');
444   ExpectDataByte('m');
445   // Instructions and sizes
446   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
447                                 VCD_SELF_MODE)) {
448     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
449   }
450   ExpectAddInstruction(1);
451   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
452   ExpectCopyInstruction(11 * kBlockSize,
453                         default_cache_.FirstNearMode());
454   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
455     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
456   }
457   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
458     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
459   }
460   // Addresses for COPY
461   ExpectAddressVarint(18 * kBlockSize);  // "ha"
462   ExpectAddressVarint(14 * kBlockSize);  // " we h"
463   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
464   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
465   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
466   VerifySizes();
467 }
468 
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleaved)469 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
470   Encode(/* interleaved = */ true, /* target matching = */ false);
471   // Interleaved section
472   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
473                                 VCD_SELF_MODE)) {
474     ExpectDataStringWithBlockSpacing("W", false);
475     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
476   } else {
477     ExpectDataStringWithBlockSpacing("W", false);
478   }
479   ExpectAddressVarint(18 * kBlockSize);  // "ha"
480   ExpectAddInstruction(1);
481   ExpectDataByte('t');
482   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
483   ExpectAddressVarint(14 * kBlockSize);  // " we h"
484   ExpectCopyInstruction(11 * kBlockSize,
485                         default_cache_.FirstNearMode());
486   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
487   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
488     ExpectDataByte('s');
489     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
490   } else {
491     ExpectDataByte('s');
492   }
493   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
494   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
495     ExpectDataByte('m');
496     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
497   } else {
498     ExpectDataByte('m');
499   }
500   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
501   VerifySizes();
502 }
503 
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextWithTargetMatching)504 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
505   Encode(/* interleaved = */ false, /* target matching = */ true);
506   // Data for ADDs
507   ExpectDataStringWithBlockSpacing("W", false);
508   ExpectDataByte('t');
509   ExpectDataByte('s');
510   ExpectDataByte('m');
511   // Instructions and sizes
512   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
513                                 VCD_SELF_MODE)) {
514     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
515   }
516   ExpectAddInstruction(1);
517   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
518   ExpectCopyInstruction(11 * kBlockSize,
519                         default_cache_.FirstNearMode());
520   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
521     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
522   }
523   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
524     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
525   }
526   // Addresses for COPY
527   ExpectAddressVarint(18 * kBlockSize);  // "ha"
528   ExpectAddressVarint(14 * kBlockSize);  // " we h"
529   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
530   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
531   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
532   VerifySizes();
533 }
534 
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleavedWithTargetMatching)535 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
536   Encode(/* interleaved = */ true, /* target matching = */ false);
537   // Interleaved section
538   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
539                                 VCD_SELF_MODE)) {
540     ExpectDataStringWithBlockSpacing("W", false);
541     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
542   } else {
543     ExpectDataStringWithBlockSpacing("W", false);
544   }
545   ExpectAddressVarint(18 * kBlockSize);  // "ha"
546   ExpectAddInstruction(1);
547   ExpectDataByte('t');
548   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
549   ExpectAddressVarint(14 * kBlockSize);  // " we h"
550   ExpectCopyInstruction(11 * kBlockSize,
551                         default_cache_.FirstNearMode());
552   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
553   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
554     ExpectDataByte('s');
555     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
556   } else {
557     ExpectDataByte('s');
558   }
559   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
560   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
561     ExpectDataByte('m');
562     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
563   } else {
564     ExpectDataByte('m');
565   }
566   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
567   VerifySizes();
568 }
569 
570 // This test case takes a dictionary containing several instances of the string
571 // "weasel", and a target string which is identical to the dictionary
572 // except that all instances of "weasel" have been replaced with the string
573 // "moon-pie".  It tests that COPY instructions are generated for all
574 // boilerplate text (that is, the text between the "moon-pie" instances in
575 // the target) and, if target matching is enabled, that each instance of
576 // "moon-pie" (except the first one) is encoded using a COPY instruction
577 // rather than an ADD.
578 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
579  protected:
580   // kCompressibleTestBlockSize:
581   // The size of the block to create for each letter in the
582   // dictionary and search string for the "compressible text" test.
583   // See MakeEachLetterABlock, below.
584   // If we use kCompressibleTestBlockSize = kBlockSize, then the
585   // encoder will find one match per unique letter in the HTML text.
586   // There are too many examples of "<" in the text for the encoder
587   // to iterate through them all, and some matches are not found.
588   // If we use kCompressibleTextBlockSize = 1, then the boilerplate
589   // text between "weasel" strings in the dictionary and "moon-pie"
590   // strings in the target may not be long enough to be found by
591   // the encoder's block-hash algorithm.  A good value, that will give
592   // reproducible results across all block sizes, will be somewhere
593   // in between these extremes.
594   static const int kCompressibleTestBlockSize = kBlockSize / 4;
595   static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
596 
WeaselsToMoonpiesTest()597   WeaselsToMoonpiesTest() :
598       engine_(dictionary_, strlen(dictionary_)),
599       match_index_(0),
600       search_dictionary_(dictionary_, strlen(dictionary_)),
601       copied_moonpie_address_(0) {
602     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
603     weasel_positions_[0] = 0;
604     after_weasel_[0] = 0;
605     moonpie_positions_[0] = 0;
606     after_moonpie_[0] = 0;
607   }
608 
~WeaselsToMoonpiesTest()609   virtual ~WeaselsToMoonpiesTest() { }
610 
SetUpTestCase()611   static void SetUpTestCase() {
612     MakeEachLetterABlock(dictionary_without_spaces_,
613                          &dictionary_,
614                          kCompressibleTestBlockSize,
615                          false);
616     MakeEachLetterABlock(target_without_spaces_,
617                          &target_,
618                          kCompressibleTestBlockSize,
619                          false);
620     MakeEachLetterABlock(weasel_text_without_spaces_,
621                          &weasel_text_,
622                          kCompressibleTestBlockSize,
623                          true);
624     MakeEachLetterABlock(moonpie_text_without_spaces_,
625                          &moonpie_text_,
626                          kCompressibleTestBlockSize,
627                          true);
628   }
629 
TearDownTestCase()630   static void TearDownTestCase() {
631     delete[] dictionary_;
632     delete[] target_;
633     delete[] weasel_text_;
634     delete[] moonpie_text_;
635   }
636 
637   // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)638   void EncodeText(const char* text, bool interleaved, bool target_matching) {
639     interleaved_ = interleaved;
640     VCDiffCodeTableWriter coder(interleaved);
641     coder.Init(engine_.dictionary_size());
642     engine_.Encode(text,
643                    strlen(text),
644                    target_matching,
645                    &diff_output_string_,
646                    &coder);
647     actual_match_counts_ = coder.match_counts();
648   }
649 
Encode(bool interleaved,bool target_matching)650   void Encode(bool interleaved, bool target_matching) {
651     EncodeText(target_, interleaved, target_matching);
652     VerifyHeader();
653   }
654 
VerifyHeader()655   void VerifyHeader() {
656     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
657   }
658 
ExpectCopyForSize(size_t size,int mode)659   void ExpectCopyForSize(size_t size, int mode) {
660     ExpectCopyInstruction(static_cast<int>(size), mode);
661   }
662 
ExpectAddForSize(size_t size)663   void ExpectAddForSize(size_t size) {
664     ExpectAddInstruction(static_cast<int>(size));
665   }
666 
ExpectAddressVarintForSize(size_t value)667   void ExpectAddressVarintForSize(size_t value) {
668     ExpectAddressVarint(static_cast<int32_t>(value));
669   }
670 
FindNextMoonpie(bool include_trailing_spaces)671   void FindNextMoonpie(bool include_trailing_spaces) {
672     ++match_index_;
673     SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
674                                                      AfterLastWeasel()));
675     if (CurrentWeaselPosition() == string::npos) {
676       SetCurrentMoonpiePosition(string::npos);
677     } else {
678       SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
679                                         + strlen(weasel_text_)
680                                         + (include_trailing_spaces ?
681                                             kTrailingSpaces : 0));
682       SetCurrentMoonpiePosition(AfterLastMoonpie()
683                                     + CurrentBoilerplateLength());
684       SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
685                                         + strlen(moonpie_text_)
686                                         + (include_trailing_spaces ?
687                                             kTrailingSpaces : 0));
688     }
689   }
NoMoreMoonpies() const690   bool NoMoreMoonpies() const {
691     return CurrentMoonpiePosition() == string::npos;
692   }
CurrentWeaselPosition() const693   size_t CurrentWeaselPosition() const {
694     return weasel_positions_[match_index_];
695   }
LastWeaselPosition() const696   size_t LastWeaselPosition() const {
697     return weasel_positions_[match_index_ - 1];
698   }
CurrentMoonpiePosition() const699   size_t CurrentMoonpiePosition() const {
700     return moonpie_positions_[match_index_];
701   }
LastMoonpiePosition() const702   size_t LastMoonpiePosition() const {
703     return moonpie_positions_[match_index_ - 1];
704   }
AfterLastWeasel() const705   size_t AfterLastWeasel() const {
706     CHECK_GE(match_index_, 1);
707     return after_weasel_[match_index_ - 1];
708   }
AfterPreviousWeasel() const709   size_t AfterPreviousWeasel() const {
710     CHECK_GE(match_index_, 2);
711     return after_weasel_[match_index_ - 2];
712   }
AfterLastMoonpie() const713   size_t AfterLastMoonpie() const {
714     CHECK_GE(match_index_, 1);
715     return after_moonpie_[match_index_ - 1];
716   }
AfterPreviousMoonpie() const717   size_t AfterPreviousMoonpie() const {
718     CHECK_GE(match_index_, 2);
719     return after_moonpie_[match_index_ - 2];
720   }
721 
SetCurrentWeaselPosition(size_t value)722   void SetCurrentWeaselPosition(size_t value) {
723     weasel_positions_[match_index_] = value;
724   }
SetCurrentAfterWeaselPosition(size_t value)725   void SetCurrentAfterWeaselPosition(size_t value) {
726     after_weasel_[match_index_] = value;
727   }
SetCurrentMoonpiePosition(size_t value)728   void SetCurrentMoonpiePosition(size_t value) {
729     moonpie_positions_[match_index_] = value;
730   }
SetCurrentAfterMoonpiePosition(size_t value)731   void SetCurrentAfterMoonpiePosition(size_t value) {
732     after_moonpie_[match_index_] = value;
733   }
734 
735   // Find the length of the text in between the "weasel" strings in the
736   // compressible dictionary, which is the same as the text between
737   // the "moon-pie" strings in the compressible target.
CurrentBoilerplateLength() const738   size_t CurrentBoilerplateLength() const {
739     CHECK_GE(match_index_, 1);
740     return CurrentWeaselPosition() - AfterLastWeasel();
741   }
DistanceFromLastWeasel() const742   size_t DistanceFromLastWeasel() const {
743     CHECK_GE(match_index_, 1);
744     return CurrentWeaselPosition() - LastWeaselPosition();
745   }
DistanceFromLastMoonpie() const746   size_t DistanceFromLastMoonpie() const {
747     CHECK_GE(match_index_, 1);
748     return CurrentMoonpiePosition() - LastMoonpiePosition();
749   }
DistanceBetweenLastTwoWeasels() const750   size_t DistanceBetweenLastTwoWeasels() const {
751     CHECK_GE(match_index_, 2);
752     return AfterLastWeasel() - AfterPreviousWeasel();
753   }
DistanceBetweenLastTwoMoonpies() const754   size_t DistanceBetweenLastTwoMoonpies() const {
755     CHECK_GE(match_index_, 2);
756     return AfterLastMoonpie() - AfterPreviousMoonpie();
757   }
758 
759   int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
760   int UpdateCopyModeForMoonpie(int copy_mode) const;
761   int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
762 
763   void CopyBoilerplateAndAddMoonpie(int copy_mode);
764   void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
765 
766   static const char dictionary_without_spaces_[];
767   static const char target_without_spaces_[];
768   static const char weasel_text_without_spaces_[];
769   static const char moonpie_text_without_spaces_[];
770 
771   static const char* dictionary_;
772   static const char* target_;
773   static const char* weasel_text_;
774   static const char* moonpie_text_;
775 
776   const VCDiffEngine engine_;
777   size_t weasel_positions_[128];
778   size_t after_weasel_[128];
779   size_t moonpie_positions_[128];
780   size_t after_moonpie_[128];
781   int match_index_;
782   string search_dictionary_;
783   size_t copied_moonpie_address_;
784 };
785 
786 // Care is taken in the formulation of the dictionary
787 // to ensure that the surrounding letters do not match; for example,
788 // there are not two instances of the string "weasels".  Otherwise,
789 // the matching behavior would not be as predictable.
790 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
791   "<html>\n"
792   "<head>\n"
793   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
794   "http-equiv=\"content-type\">\n"
795   "<title>All about weasels</title>\n"
796   "</head>\n"
797   "<!-- You will notice that the word \"weasel\" may be replaced"
798   " with something else -->\n"
799   "<body>\n"
800   "<h1>All about the weasel: highly compressible HTML text</h1>"
801   "<ul>\n"
802   "<li>Don\'t look a gift weasel in its mouth.</li>\n"
803   "<li>This item makes sure the next occurrence is found.</li>\n"
804   "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
805   "</ul>\n"
806   "<br>\n"
807   "</body>\n"
808   "</html>\n";
809 
810 const char WeaselsToMoonpiesTest::target_without_spaces_[] =
811   "<html>\n"
812   "<head>\n"
813   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
814   "http-equiv=\"content-type\">\n"
815   "<title>All about moon-pies</title>\n"
816   "</head>\n"
817   "<!-- You will notice that the word \"moon-pie\" may be replaced"
818   " with something else -->\n"
819   "<body>\n"
820   "<h1>All about the moon-pie: highly compressible HTML text</h1>"
821   "<ul>\n"
822   "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
823   "<li>This item makes sure the next occurrence is found.</li>\n"
824   "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
825   "</ul>\n"
826   "<br>\n"
827   "</body>\n"
828   "</html>\n";
829 
830 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
831 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
832 
833 const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
834 const char* WeaselsToMoonpiesTest::target_ = NULL;
835 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
836 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
837 
FindBoilerplateAddressForCopyMode(int copy_mode) const838 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
839     int copy_mode) const {
840   size_t copy_address = 0;
841   if (default_cache_.IsSelfMode(copy_mode)) {
842     copy_address = AfterLastWeasel();
843   } else if (default_cache_.IsNearMode(copy_mode)) {
844     copy_address = DistanceBetweenLastTwoWeasels();
845   } else if (default_cache_.IsSameMode(copy_mode)) {
846     copy_address = AfterLastWeasel() % 256;
847   }
848   return static_cast<int32_t>(copy_address);
849 }
850 
UpdateCopyModeForMoonpie(int copy_mode) const851 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
852   if (copy_mode == default_cache_.FirstSameMode()) {
853     return default_cache_.FirstSameMode()
854         + static_cast<int>((copied_moonpie_address_ / 256) % 3);
855   } else {
856     return copy_mode;
857   }
858 }
859 
FindMoonpieAddressForCopyMode(int copy_mode) const860 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
861     int copy_mode) const {
862   size_t copy_address = 0;
863   if (default_cache_.IsHereMode(copy_mode)) {
864     copy_address = DistanceFromLastMoonpie();
865   } else if (default_cache_.IsNearMode(copy_mode)) {
866     copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
867   } else if (default_cache_.IsSameMode(copy_mode)) {
868     copy_address = copied_moonpie_address_ % 256;
869   }
870   return static_cast<int32_t>(copy_address);
871 }
872 
873 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
874 // in the encoding.
CopyBoilerplateAndAddMoonpie(int copy_mode)875 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
876   EXPECT_FALSE(NoMoreMoonpies());
877   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
878   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
879   ExpectAddInstructionForStringLength(moonpie_text_);
880   ExpectDataString(moonpie_text_);
881 }
882 
883 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
884 // in the encoding.  The "moon-pie" text will be copied from the previously
885 // encoded target.
CopyBoilerplateAndCopyMoonpie(int copy_mode,int moonpie_copy_mode)886 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
887     int copy_mode,
888     int moonpie_copy_mode) {
889   EXPECT_FALSE(NoMoreMoonpies());
890   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
891   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
892   moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
893   ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
894   ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
895                 moonpie_copy_mode);
896   copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
897 }
898 
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleNoTargetMatching)899 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
900   Encode(/* interleaved = */ true, /* target matching = */ false);
901   FindNextMoonpie(false);
902   // Expect all five "weasel"s to be replaced with "moon-pie"s
903   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
904   FindNextMoonpie(false);
905   CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
906   FindNextMoonpie(false);
907   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
908   FindNextMoonpie(false);
909   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
910   FindNextMoonpie(false);
911   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
912   FindNextMoonpie(false);
913   EXPECT_TRUE(NoMoreMoonpies());
914   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
915                     default_cache_.FirstNearMode());
916   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
917   VerifySizes();
918 }
919 
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleWithTargetMatching)920 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
921   Encode(/* interleaved = */ true, /* target matching = */ true);
922   // Expect all five "weasel"s to be replaced with "moon-pie"s.
923   // Every "moon-pie" after the first one should be copied from the
924   // previously encoded target text.
925   FindNextMoonpie(false);
926   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
927   FindNextMoonpie(true);
928   CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
929   FindNextMoonpie(true);
930   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
931                                 default_cache_.FirstSameMode());
932   FindNextMoonpie(true);
933   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
934                                 VCD_HERE_MODE);
935   FindNextMoonpie(true);
936   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
937                                 default_cache_.FirstSameMode());
938   FindNextMoonpie(true);
939   EXPECT_TRUE(NoMoreMoonpies());
940   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
941                     default_cache_.FirstNearMode() + 3);
942   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
943   VerifySizes();
944 }
945 
946 }  //  anonymous namespace
947 }  //  namespace open-vcdiff
948