• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include <config.h>
17 #include "vcdiffengine.h"
18 #include <string.h>  // memset, strlen
19 #include <algorithm>
20 #include <string>
21 #include "addrcache.h"
22 #include "blockhash.h"
23 #include "encodetable.h"
24 #include "google/output_string.h"
25 #include "rolling_hash.h"
26 #include "testing.h"
27 #include "varint_bigendian.h"
28 #include "vcdiff_defs.h"
29 
30 namespace open_vcdiff {
31 
32 namespace {
33 
34 class VCDiffEngineTestBase : public testing::Test {
35  protected:
36   typedef std::string string;
37 
38   // Some common definitions and helper functions used in the various tests
39   // for VCDiffEngine.
40   static const int kBlockSize = VCDiffEngine::kMinimumMatchSize;
41 
VCDiffEngineTestBase()42   VCDiffEngineTestBase() : interleaved_(false),
43                            diff_output_string_(&diff_),
44                            verify_position_(0),
45                            saved_total_size_position_(0),
46                            saved_delta_encoding_position_(0),
47                            saved_section_sizes_position_(0),
48                            data_bytes_(0),
49                            instruction_bytes_(0),
50                            address_bytes_(0) { }
51 
~VCDiffEngineTestBase()52   virtual ~VCDiffEngineTestBase() { }
53 
TearDown()54   virtual void TearDown() {
55   }
56 
57   // Copy string_without_spaces into newly allocated result buffer,
58   // but pad its contents with space characters so that every character
59   // in string_without_spaces corresponds to (block_size - 1)
60   // spaces in the result, followed by that character.
61   // For example:
62   // If string_without_spaces begins "The only thing"... and block_size is 4,
63   // then 3 space characters will be inserted
64   // between each letter in the result, as follows:
65   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
66   // This makes testing simpler, because finding a block_size-byte match
67   // between the dictionary and target only depends on the
68   // trailing letter in each block.
69   // If no_initial_padding is true, then the first letter will not have
70   // spaces added in front of it.
MakeEachLetterABlock(const char * string_without_spaces,const char ** result,int block_size,bool no_initial_padding)71   static void MakeEachLetterABlock(const char* string_without_spaces,
72                                    const char** result,
73                                    int block_size,
74                                    bool no_initial_padding) {
75     const size_t length_without_spaces = strlen(string_without_spaces);
76     char* padded_text = new char[(block_size * length_without_spaces) + 1];
77     memset(padded_text, ' ', block_size * length_without_spaces);
78     char* padded_text_ptr = padded_text;
79     if (!no_initial_padding) {
80       padded_text_ptr += block_size - 1;
81     }
82     for (size_t i = 0; i < length_without_spaces; ++i) {
83       *padded_text_ptr = string_without_spaces[i];
84       padded_text_ptr += block_size;
85     }
86     *(padded_text_ptr - block_size + 1)  = '\0';
87     *result = padded_text;
88   }
89 
90   // These functions iterate through the decoded output and expect
91   // simple elements: bytes or variable-length integers.
ExpectByte(char byte)92   void ExpectByte(char byte) {
93     EXPECT_GT(diff_.size(), verify_position_);
94     EXPECT_EQ(byte, diff_[verify_position_]);
95     ++verify_position_;
96   }
97 
ExpectVarint(int32_t expected_value)98   size_t ExpectVarint(int32_t expected_value) {
99     EXPECT_GT(diff_.size(), verify_position_);
100     const char* const original_position = &diff_[verify_position_];
101     const char* new_position = original_position;
102     const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
103     int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
104                                                     &new_position);
105     EXPECT_LE(0, parsed_value);
106     size_t parsed_length = new_position - original_position;
107     EXPECT_EQ(expected_value, parsed_value);
108     EXPECT_EQ(expected_length, parsed_length);
109     verify_position_ += parsed_length;
110     return parsed_length;
111   }
112 
ExpectSize(size_t size)113   size_t ExpectSize(size_t size) {
114     return ExpectVarint(static_cast<int32_t>(size));
115   }
116 
ExpectStringLength(const char * s)117   size_t ExpectStringLength(const char* s) {
118     return ExpectSize(strlen(s));
119   }
120 
SkipVarint()121   void SkipVarint() {
122     EXPECT_GT(diff_.size(), verify_position_);
123     const char* const original_position = &diff_[verify_position_];
124     const char* new_position = original_position;
125     VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
126     size_t parsed_length = new_position - original_position;
127     verify_position_ += parsed_length;
128   }
129 
ExpectDataByte(char byte)130   void ExpectDataByte(char byte) {
131     ExpectByte(byte);
132     if (interleaved_) {
133       ++instruction_bytes_;
134     } else {
135       ++data_bytes_;
136     }
137   }
138 
ExpectDataString(const char * expected_string)139   void ExpectDataString(const char *expected_string) {
140     const char* ptr = expected_string;
141     while (*ptr) {
142       ExpectDataByte(*ptr);
143       ++ptr;
144     }
145   }
146 
ExpectDataStringWithBlockSpacing(const char * expected_string,bool trailing_spaces)147   void ExpectDataStringWithBlockSpacing(const char *expected_string,
148                                         bool trailing_spaces) {
149     const char* ptr = expected_string;
150     while (*ptr) {
151       for (int i = 0; i < (kBlockSize - 1); ++i) {
152         ExpectDataByte(' ');
153       }
154       ExpectDataByte(*ptr);
155       ++ptr;
156     }
157     if (trailing_spaces) {
158       for (int i = 0; i < (kBlockSize - 1); ++i) {
159         ExpectDataByte(' ');
160       }
161     }
162   }
163 
ExpectInstructionByte(char byte)164   void ExpectInstructionByte(char byte) {
165     ExpectByte(byte);
166     ++instruction_bytes_;
167   }
168 
ExpectInstructionVarint(int32_t value)169   void ExpectInstructionVarint(int32_t value) {
170     instruction_bytes_ += ExpectVarint(value);
171   }
172 
ExpectAddressByte(char byte)173   void ExpectAddressByte(char byte) {
174     ExpectByte(byte);
175     if (interleaved_) {
176       ++instruction_bytes_;
177     } else {
178       ++address_bytes_;
179     }
180   }
181 
ExpectAddressVarint(int32_t value)182   void ExpectAddressVarint(int32_t value) {
183     if (interleaved_) {
184       instruction_bytes_ += ExpectVarint(value);
185     } else {
186       address_bytes_ += ExpectVarint(value);
187     }
188   }
189 
190   // The following functions leverage the fact that the encoder uses
191   // the default code table and cache sizes.  They are able to search for
192   // instructions of a particular size.  The logic for mapping from
193   // instruction type, mode, and size to opcode value is very different here
194   // from the logic used in encodetable.{h,cc}, so hopefully
195   // this version will help validate that the other is correct.
196   // This version uses conditional statements, while encodetable.h
197   // looks up values in a mapping table.
ExpectAddress(int32_t address,int copy_mode)198   void ExpectAddress(int32_t address, int copy_mode) {
199     if (default_cache_.WriteAddressAsVarintForMode(copy_mode)) {
200       ExpectAddressVarint(address);
201     } else {
202       ExpectAddressByte(address);
203     }
204   }
205 
ExpectAddInstruction(int size)206   void ExpectAddInstruction(int size) {
207     if (size <= 18) {
208       ExpectInstructionByte(0x01 + size);
209     } else {
210       ExpectInstructionByte(0x01);
211       ExpectInstructionVarint(size);
212     }
213   }
214 
ExpectCopyInstruction(int size,int mode)215   void ExpectCopyInstruction(int size, int mode) {
216     if ((size >= 4) && (size <= 16)) {
217       ExpectInstructionByte(0x10 + (0x10 * mode) + size);
218     } else {
219       ExpectInstructionByte(0x13 + (0x10 * mode));
220       ExpectInstructionVarint(size);
221     }
222   }
223 
ExpectAddCopyInstruction(int add_size,int copy_size,int copy_mode)224   bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
225     if (!default_cache_.IsSameMode(copy_mode) &&
226         (add_size <= 4) &&
227         (copy_size >= 4) &&
228         (copy_size <= 6)) {
229       ExpectInstructionByte(0x9C +
230                             (0x0C * copy_mode) +
231                             (0x03 * add_size) +
232                             copy_size);
233       return true;
234     } else if (default_cache_.IsSameMode(copy_mode) &&
235                (add_size <= 4) &&
236                (copy_size == 4)) {
237       ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
238       return true;
239     } else {
240       ExpectAddInstruction(add_size);
241       return false;
242     }
243   }
244 
ExpectAddInstructionForStringLength(const char * s)245   void ExpectAddInstructionForStringLength(const char* s) {
246     ExpectAddInstruction(static_cast<int>(strlen(s)));
247   }
248 
249   // Call this function before beginning to iterate through the diff string
250   // using the Expect... functions.
251   // text must be NULL-terminated.
VerifyHeaderForDictionaryAndTargetText(const char * dictionary,const char * target_text)252   void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
253                                               const char* target_text) {
254     ExpectByte(0x01);  // Win_Indicator: VCD_SOURCE (dictionary)
255     ExpectStringLength(dictionary);
256     ExpectByte(0x00);  // Source segment position: start of dictionary
257     saved_total_size_position_ = verify_position_;
258     SkipVarint();  // Length of the delta encoding
259     saved_delta_encoding_position_ = verify_position_;
260     ExpectStringLength(target_text);
261     ExpectByte(0x00);  // Delta_indicator (no compression)
262     saved_section_sizes_position_ = verify_position_;
263     SkipVarint();  // length of data for ADDs and RUNs
264     SkipVarint();  // length of instructions section
265     SkipVarint();  // length of addresses for COPYs
266   }
267 
268   // Call this function before beginning to iterating through the entire
269   // diff string using the Expect... functions.  It makes sure that the
270   // size totals in the window header match the number of bytes that
271   // were parsed.
VerifySizes()272   void VerifySizes() {
273     EXPECT_EQ(verify_position_, diff_.size());
274     const size_t delta_encoding_size = verify_position_ -
275                                        saved_delta_encoding_position_;
276     verify_position_ = saved_total_size_position_;
277     ExpectSize(delta_encoding_size);
278     verify_position_ = saved_section_sizes_position_;
279     ExpectSize(data_bytes_);
280     ExpectSize(instruction_bytes_);
281     ExpectSize(address_bytes_);
282   }
283 
284   bool interleaved_;
285   string diff_;
286   OutputString<string> diff_output_string_;
287   size_t verify_position_;
288   size_t saved_total_size_position_;
289   size_t saved_delta_encoding_position_;
290   size_t saved_section_sizes_position_;
291   size_t data_bytes_;
292   size_t instruction_bytes_;
293   size_t address_bytes_;
294   VCDiffAddressCache default_cache_;  // Used for finding mode values
295 };
296 
297 class VCDiffEngineTest : public VCDiffEngineTestBase {
298  protected:
VCDiffEngineTest()299   VCDiffEngineTest() :
300       engine_(dictionary_, strlen(dictionary_)) {
301     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
302   }
303 
~VCDiffEngineTest()304   virtual ~VCDiffEngineTest() { }
305 
306 
SetUpTestCase()307   static void SetUpTestCase() {
308     MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
309                          kBlockSize, false);
310     MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
311   }
312 
TearDownTestCase()313   static void TearDownTestCase() {
314     delete[] dictionary_;
315     delete[] target_;
316   }
317 
EncodeNothing(bool interleaved,bool target_matching)318   void EncodeNothing(bool interleaved, bool target_matching) {
319     interleaved_ = interleaved;
320     VCDiffCodeTableWriter coder(interleaved);
321     coder.Init(engine_.dictionary_size());
322     engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
323     EXPECT_TRUE(diff_.empty());
324   }
325 
326   // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)327   void EncodeText(const char* text, bool interleaved, bool target_matching) {
328     interleaved_ = interleaved;
329     VCDiffCodeTableWriter coder(interleaved);
330     coder.Init(engine_.dictionary_size());
331     engine_.Encode(text,
332                    strlen(text),
333                    target_matching,
334                    &diff_output_string_,
335                    &coder);
336   }
337 
Encode(bool interleaved,bool target_matching)338   void Encode(bool interleaved, bool target_matching) {
339     EncodeText(target_, interleaved, target_matching);
340     VerifyHeader();
341   }
342 
VerifyHeader()343   void VerifyHeader() {
344     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
345   }
346 
347   static const char dictionary_without_spaces_[];
348   static const char target_without_spaces_[];
349 
350   static const char* dictionary_;
351   static const char* target_;
352 
353   const VCDiffEngine engine_;
354 };
355 
356 #ifdef GTEST_HAS_DEATH_TEST
357 typedef VCDiffEngineTest VCDiffEngineDeathTest;
358 #endif  // GTEST_HAS_DEATH_TEST
359 
360 const char VCDiffEngineTest::dictionary_without_spaces_[] =
361     "The only thing we have to fear is fear itself";
362 
363 const char VCDiffEngineTest::target_without_spaces_[] =
364     "What we hear is fearsome";
365 
366 const char* VCDiffEngineTest::dictionary_ = NULL;
367 const char* VCDiffEngineTest::target_ = NULL;
368 
369 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(VCDiffEngineDeathTest,InitCalledTwice)370 TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
371   EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
372                      "twice");
373 }
374 #endif  // GTEST_HAS_DEATH_TEST
375 
TEST_F(VCDiffEngineTest,EngineEncodeNothing)376 TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
377   EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
378 }
379 
TEST_F(VCDiffEngineTest,EngineEncodeNothingInterleaved)380 TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
381   EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
382 }
383 
TEST_F(VCDiffEngineTest,EngineEncodeNothingTarget)384 TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
385   EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
386 }
387 
TEST_F(VCDiffEngineTest,EngineEncodeNothingTargetInterleaved)388 TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
389   EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
390 }
391 
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlock)392 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
393   const char* small_text = "  ";
394   EncodeText(small_text,
395              /* interleaved = */ false,
396              /* target matching = */ false);
397   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
398   // Data for ADDs
399   ExpectDataString(small_text);
400   // Instructions and sizes
401   ExpectAddInstructionForStringLength(small_text);
402 }
403 
TEST_F(VCDiffEngineTest,EngineEncodeSmallerThanOneBlockInterleaved)404 TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
405   const char* small_text = "  ";
406   EncodeText(small_text,
407              /* interleaved = */ true,
408              /* target matching = */ false);
409   VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
410   // Interleaved section
411   ExpectAddInstructionForStringLength(small_text);
412   ExpectDataString(small_text);
413 }
414 
TEST_F(VCDiffEngineTest,EngineEncodeSampleText)415 TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
416   Encode(/* interleaved = */ false, /* target matching = */ false);
417   // Data for ADDs
418   ExpectDataStringWithBlockSpacing("W", false);
419   ExpectDataByte('t');
420   ExpectDataByte('s');
421   ExpectDataByte('m');
422   // Instructions and sizes
423   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
424                                 VCD_SELF_MODE)) {
425     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
426   }
427   ExpectAddInstruction(1);
428   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
429   ExpectCopyInstruction(11 * kBlockSize,
430                         default_cache_.FirstNearMode());
431   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
432     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
433   }
434   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
435     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
436   }
437   // Addresses for COPY
438   ExpectAddressVarint(18 * kBlockSize);  // "ha"
439   ExpectAddressVarint(14 * kBlockSize);  // " we h"
440   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
441   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
442   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
443   VerifySizes();
444 }
445 
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleaved)446 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
447   Encode(/* interleaved = */ true, /* target matching = */ false);
448   // Interleaved section
449   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
450                                 VCD_SELF_MODE)) {
451     ExpectDataStringWithBlockSpacing("W", false);
452     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
453   } else {
454     ExpectDataStringWithBlockSpacing("W", false);
455   }
456   ExpectAddressVarint(18 * kBlockSize);  // "ha"
457   ExpectAddInstruction(1);
458   ExpectDataByte('t');
459   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
460   ExpectAddressVarint(14 * kBlockSize);  // " we h"
461   ExpectCopyInstruction(11 * kBlockSize,
462                         default_cache_.FirstNearMode());
463   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
464   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
465     ExpectDataByte('s');
466     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
467   } else {
468     ExpectDataByte('s');
469   }
470   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
471   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
472     ExpectDataByte('m');
473     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
474   } else {
475     ExpectDataByte('m');
476   }
477   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
478   VerifySizes();
479 }
480 
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextWithTargetMatching)481 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
482   Encode(/* interleaved = */ false, /* target matching = */ true);
483   // Data for ADDs
484   ExpectDataStringWithBlockSpacing("W", false);
485   ExpectDataByte('t');
486   ExpectDataByte('s');
487   ExpectDataByte('m');
488   // Instructions and sizes
489   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
490                                 VCD_SELF_MODE)) {
491     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
492   }
493   ExpectAddInstruction(1);
494   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
495   ExpectCopyInstruction(11 * kBlockSize,
496                         default_cache_.FirstNearMode());
497   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
498     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
499   }
500   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
501     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
502   }
503   // Addresses for COPY
504   ExpectAddressVarint(18 * kBlockSize);  // "ha"
505   ExpectAddressVarint(14 * kBlockSize);  // " we h"
506   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
507   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
508   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
509   VerifySizes();
510 }
511 
TEST_F(VCDiffEngineTest,EngineEncodeSampleTextInterleavedWithTargetMatching)512 TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
513   Encode(/* interleaved = */ true, /* target matching = */ true);
514   // Interleaved section
515   if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
516                                 VCD_SELF_MODE)) {
517     ExpectDataStringWithBlockSpacing("W", false);
518     ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
519   } else {
520     ExpectDataStringWithBlockSpacing("W", false);
521   }
522   ExpectAddressVarint(18 * kBlockSize);  // "ha"
523   ExpectAddInstruction(1);
524   ExpectDataByte('t');
525   ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
526   ExpectAddressVarint(14 * kBlockSize);  // " we h"
527   ExpectCopyInstruction(11 * kBlockSize,
528                         default_cache_.FirstNearMode());
529   ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1));  // "ear is fear"
530   if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
531     ExpectDataByte('s');
532     ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
533   } else {
534     ExpectDataByte('s');
535   }
536   ExpectAddressVarint(4 * kBlockSize);  // "o" from "The only"
537   if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
538     ExpectDataByte('m');
539     ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
540   } else {
541     ExpectDataByte('m');
542   }
543   ExpectAddressVarint(2 * kBlockSize);  // "e" from "The only"
544   VerifySizes();
545 }
546 
547 // This test case takes a dictionary containing several instances of the string
548 // "weasel", and a target string which is identical to the dictionary
549 // except that all instances of "weasel" have been replaced with the string
550 // "moon-pie".  It tests that COPY instructions are generated for all
551 // boilerplate text (that is, the text between the "moon-pie" instances in
552 // the target) and, if target matching is enabled, that each instance of
553 // "moon-pie" (except the first one) is encoded using a COPY instruction
554 // rather than an ADD.
555 class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
556  protected:
557   // kCompressibleTestBlockSize:
558   // The size of the block to create for each letter in the
559   // dictionary and search string for the "compressible text" test.
560   // See MakeEachLetterABlock, below.
561   // If we use kCompressibleTestBlockSize = kBlockSize, then the
562   // encoder will find one match per unique letter in the HTML text.
563   // There are too many examples of "<" in the text for the encoder
564   // to iterate through them all, and some matches are not found.
565   // If we use kCompressibleTextBlockSize = 1, then the boilerplate
566   // text between "weasel" strings in the dictionary and "moon-pie"
567   // strings in the target may not be long enough to be found by
568   // the encoder's block-hash algorithm.  A good value, that will give
569   // reproducible results across all block sizes, will be somewhere
570   // in between these extremes.
571   static const int kCompressibleTestBlockSize = kBlockSize / 4;
572   static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
573 
WeaselsToMoonpiesTest()574   WeaselsToMoonpiesTest() :
575       engine_(dictionary_, strlen(dictionary_)),
576       match_index_(0),
577       search_dictionary_(dictionary_, strlen(dictionary_)),
578       copied_moonpie_address_(0) {
579     EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
580     weasel_positions_[0] = 0;
581     after_weasel_[0] = 0;
582     moonpie_positions_[0] = 0;
583     after_moonpie_[0] = 0;
584   }
585 
~WeaselsToMoonpiesTest()586   virtual ~WeaselsToMoonpiesTest() { }
587 
SetUpTestCase()588   static void SetUpTestCase() {
589     MakeEachLetterABlock(dictionary_without_spaces_,
590                          &dictionary_,
591                          kCompressibleTestBlockSize,
592                          false);
593     MakeEachLetterABlock(target_without_spaces_,
594                          &target_,
595                          kCompressibleTestBlockSize,
596                          false);
597     MakeEachLetterABlock(weasel_text_without_spaces_,
598                          &weasel_text_,
599                          kCompressibleTestBlockSize,
600                          true);
601     MakeEachLetterABlock(moonpie_text_without_spaces_,
602                          &moonpie_text_,
603                          kCompressibleTestBlockSize,
604                          true);
605   }
606 
TearDownTestCase()607   static void TearDownTestCase() {
608     delete[] dictionary_;
609     delete[] target_;
610     delete[] weasel_text_;
611     delete[] moonpie_text_;
612   }
613 
614   // text must be NULL-terminated
EncodeText(const char * text,bool interleaved,bool target_matching)615   void EncodeText(const char* text, bool interleaved, bool target_matching) {
616     interleaved_ = interleaved;
617     VCDiffCodeTableWriter coder(interleaved);
618     coder.Init(engine_.dictionary_size());
619     engine_.Encode(text,
620                    strlen(text),
621                    target_matching,
622                    &diff_output_string_,
623                    &coder);
624   }
625 
Encode(bool interleaved,bool target_matching)626   void Encode(bool interleaved, bool target_matching) {
627     EncodeText(target_, interleaved, target_matching);
628     VerifyHeader();
629   }
630 
VerifyHeader()631   void VerifyHeader() {
632     VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
633   }
634 
ExpectCopyForSize(size_t size,int mode)635   void ExpectCopyForSize(size_t size, int mode) {
636     ExpectCopyInstruction(static_cast<int>(size), mode);
637   }
638 
ExpectAddForSize(size_t size)639   void ExpectAddForSize(size_t size) {
640     ExpectAddInstruction(static_cast<int>(size));
641   }
642 
ExpectAddressVarintForSize(size_t value)643   void ExpectAddressVarintForSize(size_t value) {
644     ExpectAddressVarint(static_cast<int32_t>(value));
645   }
646 
FindNextMoonpie(bool include_trailing_spaces)647   void FindNextMoonpie(bool include_trailing_spaces) {
648     ++match_index_;
649     SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
650                                                      AfterLastWeasel()));
651     if (CurrentWeaselPosition() == string::npos) {
652       SetCurrentMoonpiePosition(string::npos);
653     } else {
654       SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
655                                         + strlen(weasel_text_)
656                                         + (include_trailing_spaces ?
657                                             kTrailingSpaces : 0));
658       SetCurrentMoonpiePosition(AfterLastMoonpie()
659                                     + CurrentBoilerplateLength());
660       SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
661                                         + strlen(moonpie_text_)
662                                         + (include_trailing_spaces ?
663                                             kTrailingSpaces : 0));
664     }
665   }
NoMoreMoonpies() const666   bool NoMoreMoonpies() const {
667     return CurrentMoonpiePosition() == string::npos;
668   }
CurrentWeaselPosition() const669   size_t CurrentWeaselPosition() const {
670     return weasel_positions_[match_index_];
671   }
LastWeaselPosition() const672   size_t LastWeaselPosition() const {
673     return weasel_positions_[match_index_ - 1];
674   }
CurrentMoonpiePosition() const675   size_t CurrentMoonpiePosition() const {
676     return moonpie_positions_[match_index_];
677   }
LastMoonpiePosition() const678   size_t LastMoonpiePosition() const {
679     return moonpie_positions_[match_index_ - 1];
680   }
AfterLastWeasel() const681   size_t AfterLastWeasel() const {
682     CHECK_GE(match_index_, 1);
683     return after_weasel_[match_index_ - 1];
684   }
AfterPreviousWeasel() const685   size_t AfterPreviousWeasel() const {
686     CHECK_GE(match_index_, 2);
687     return after_weasel_[match_index_ - 2];
688   }
AfterLastMoonpie() const689   size_t AfterLastMoonpie() const {
690     CHECK_GE(match_index_, 1);
691     return after_moonpie_[match_index_ - 1];
692   }
AfterPreviousMoonpie() const693   size_t AfterPreviousMoonpie() const {
694     CHECK_GE(match_index_, 2);
695     return after_moonpie_[match_index_ - 2];
696   }
697 
SetCurrentWeaselPosition(size_t value)698   void SetCurrentWeaselPosition(size_t value) {
699     weasel_positions_[match_index_] = value;
700   }
SetCurrentAfterWeaselPosition(size_t value)701   void SetCurrentAfterWeaselPosition(size_t value) {
702     after_weasel_[match_index_] = value;
703   }
SetCurrentMoonpiePosition(size_t value)704   void SetCurrentMoonpiePosition(size_t value) {
705     moonpie_positions_[match_index_] = value;
706   }
SetCurrentAfterMoonpiePosition(size_t value)707   void SetCurrentAfterMoonpiePosition(size_t value) {
708     after_moonpie_[match_index_] = value;
709   }
710 
711   // Find the length of the text in between the "weasel" strings in the
712   // compressible dictionary, which is the same as the text between
713   // the "moon-pie" strings in the compressible target.
CurrentBoilerplateLength() const714   size_t CurrentBoilerplateLength() const {
715     CHECK_GE(match_index_, 1);
716     return CurrentWeaselPosition() - AfterLastWeasel();
717   }
DistanceFromLastWeasel() const718   size_t DistanceFromLastWeasel() const {
719     CHECK_GE(match_index_, 1);
720     return CurrentWeaselPosition() - LastWeaselPosition();
721   }
DistanceFromLastMoonpie() const722   size_t DistanceFromLastMoonpie() const {
723     CHECK_GE(match_index_, 1);
724     return CurrentMoonpiePosition() - LastMoonpiePosition();
725   }
DistanceBetweenLastTwoWeasels() const726   size_t DistanceBetweenLastTwoWeasels() const {
727     CHECK_GE(match_index_, 2);
728     return AfterLastWeasel() - AfterPreviousWeasel();
729   }
DistanceBetweenLastTwoMoonpies() const730   size_t DistanceBetweenLastTwoMoonpies() const {
731     CHECK_GE(match_index_, 2);
732     return AfterLastMoonpie() - AfterPreviousMoonpie();
733   }
734 
735   int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
736   int UpdateCopyModeForMoonpie(int copy_mode) const;
737   int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
738 
739   void CopyBoilerplateAndAddMoonpie(int copy_mode);
740   void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
741 
742   static const char dictionary_without_spaces_[];
743   static const char target_without_spaces_[];
744   static const char weasel_text_without_spaces_[];
745   static const char moonpie_text_without_spaces_[];
746 
747   static const char* dictionary_;
748   static const char* target_;
749   static const char* weasel_text_;
750   static const char* moonpie_text_;
751 
752   const VCDiffEngine engine_;
753   size_t weasel_positions_[128];
754   size_t after_weasel_[128];
755   size_t moonpie_positions_[128];
756   size_t after_moonpie_[128];
757   int match_index_;
758   string search_dictionary_;
759   size_t copied_moonpie_address_;
760 };
761 
762 // Care is taken in the formulation of the dictionary
763 // to ensure that the surrounding letters do not match; for example,
764 // there are not two instances of the string "weasels".  Otherwise,
765 // the matching behavior would not be as predictable.
766 const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
767   "<html>\n"
768   "<head>\n"
769   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
770   "http-equiv=\"content-type\">\n"
771   "<title>All about weasels</title>\n"
772   "</head>\n"
773   "<!-- You will notice that the word \"weasel\" may be replaced"
774   " with something else -->\n"
775   "<body>\n"
776   "<h1>All about the weasel: highly compressible HTML text</h1>"
777   "<ul>\n"
778   "<li>Don\'t look a gift weasel in its mouth.</li>\n"
779   "<li>This item makes sure the next occurrence is found.</li>\n"
780   "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
781   "</ul>\n"
782   "<br>\n"
783   "</body>\n"
784   "</html>\n";
785 
786 const char WeaselsToMoonpiesTest::target_without_spaces_[] =
787   "<html>\n"
788   "<head>\n"
789   "<meta content=\"text/html; charset=ISO-8859-1\"\n"
790   "http-equiv=\"content-type\">\n"
791   "<title>All about moon-pies</title>\n"
792   "</head>\n"
793   "<!-- You will notice that the word \"moon-pie\" may be replaced"
794   " with something else -->\n"
795   "<body>\n"
796   "<h1>All about the moon-pie: highly compressible HTML text</h1>"
797   "<ul>\n"
798   "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
799   "<li>This item makes sure the next occurrence is found.</li>\n"
800   "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
801   "</ul>\n"
802   "<br>\n"
803   "</body>\n"
804   "</html>\n";
805 
806 const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
807 const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
808 
809 const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
810 const char* WeaselsToMoonpiesTest::target_ = NULL;
811 const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
812 const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
813 
FindBoilerplateAddressForCopyMode(int copy_mode) const814 int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
815     int copy_mode) const {
816   size_t copy_address = 0;
817   if (default_cache_.IsSelfMode(copy_mode)) {
818     copy_address = AfterLastWeasel();
819   } else if (default_cache_.IsNearMode(copy_mode)) {
820     copy_address = DistanceBetweenLastTwoWeasels();
821   } else if (default_cache_.IsSameMode(copy_mode)) {
822     copy_address = AfterLastWeasel() % 256;
823   }
824   return static_cast<int32_t>(copy_address);
825 }
826 
UpdateCopyModeForMoonpie(int copy_mode) const827 int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
828   if (copy_mode == default_cache_.FirstSameMode()) {
829     return default_cache_.FirstSameMode()
830         + static_cast<int>((copied_moonpie_address_ / 256) % 3);
831   } else {
832     return copy_mode;
833   }
834 }
835 
FindMoonpieAddressForCopyMode(int copy_mode) const836 int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
837     int copy_mode) const {
838   size_t copy_address = 0;
839   if (default_cache_.IsHereMode(copy_mode)) {
840     copy_address = DistanceFromLastMoonpie();
841   } else if (default_cache_.IsNearMode(copy_mode)) {
842     copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
843   } else if (default_cache_.IsSameMode(copy_mode)) {
844     copy_address = copied_moonpie_address_ % 256;
845   }
846   return static_cast<int32_t>(copy_address);
847 }
848 
849 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
850 // in the encoding.
CopyBoilerplateAndAddMoonpie(int copy_mode)851 void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
852   EXPECT_FALSE(NoMoreMoonpies());
853   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
854   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
855   ExpectAddInstructionForStringLength(moonpie_text_);
856   ExpectDataString(moonpie_text_);
857 }
858 
859 // Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
860 // in the encoding.  The "moon-pie" text will be copied from the previously
861 // encoded target.
CopyBoilerplateAndCopyMoonpie(int copy_mode,int moonpie_copy_mode)862 void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
863     int copy_mode,
864     int moonpie_copy_mode) {
865   EXPECT_FALSE(NoMoreMoonpies());
866   ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
867   ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
868   moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
869   ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
870   ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
871                 moonpie_copy_mode);
872   copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
873 }
874 
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleNoTargetMatching)875 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
876   Encode(/* interleaved = */ true, /* target matching = */ false);
877   FindNextMoonpie(false);
878   // Expect all five "weasel"s to be replaced with "moon-pie"s
879   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
880   FindNextMoonpie(false);
881   CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
882   FindNextMoonpie(false);
883   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
884   FindNextMoonpie(false);
885   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
886   FindNextMoonpie(false);
887   CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
888   FindNextMoonpie(false);
889   EXPECT_TRUE(NoMoreMoonpies());
890   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
891                     default_cache_.FirstNearMode());
892   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
893   VerifySizes();
894 }
895 
TEST_F(WeaselsToMoonpiesTest,EngineEncodeCompressibleWithTargetMatching)896 TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
897   Encode(/* interleaved = */ true, /* target matching = */ true);
898   // Expect all five "weasel"s to be replaced with "moon-pie"s.
899   // Every "moon-pie" after the first one should be copied from the
900   // previously encoded target text.
901   FindNextMoonpie(false);
902   CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
903   FindNextMoonpie(true);
904   CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
905   FindNextMoonpie(true);
906   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
907                                 default_cache_.FirstSameMode());
908   FindNextMoonpie(true);
909   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
910                                 VCD_HERE_MODE);
911   FindNextMoonpie(true);
912   CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
913                                 default_cache_.FirstSameMode());
914   FindNextMoonpie(true);
915   EXPECT_TRUE(NoMoreMoonpies());
916   ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
917                     default_cache_.FirstNearMode() + 3);
918   ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
919   VerifySizes();
920 }
921 
922 }  //  anonymous namespace
923 }  //  namespace open-vcdiff
924