• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include <config.h>
17 #include "blockhash.h"
18 #include <limits.h>  // INT_MIN
19 #include <string.h>  // memcpy, memcmp, strlen
20 #include <iostream>
21 #include <memory>  // auto_ptr
22 #include "encodetable.h"
23 #include "rolling_hash.h"
24 #include "testing.h"
25 
26 namespace open_vcdiff {
27 
28 const int kBlockSize = BlockHash::kBlockSize;
29 
30 class BlockHashTest : public testing::Test {
31  protected:
32   static const int kTimingTestSize = 1 << 21;  // 2M
33   static const int kTimingTestIterations = 32;
34 
BlockHashTest()35   BlockHashTest() {
36     dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
37                                               strlen(sample_text)));
38     th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
39     EXPECT_TRUE(dh_.get() != NULL);
40     EXPECT_TRUE(th_.get() != NULL);
41   }
42 
43   // BlockHashTest is a friend to BlockHash.  Expose the protected functions
44   // that will be tested by the children of BlockHashTest.
BlockContentsMatch(const char * block1,const char * block2)45   static bool BlockContentsMatch(const char* block1, const char* block2) {
46     return BlockHash::BlockContentsMatch(block1, block2);
47   }
48 
FirstMatchingBlock(const BlockHash & block_hash,uint32_t hash_value,const char * block_ptr) const49   int FirstMatchingBlock(const BlockHash& block_hash,
50                          uint32_t hash_value,
51                          const char* block_ptr) const {
52     return block_hash.FirstMatchingBlock(hash_value, block_ptr);
53   }
54 
NextMatchingBlock(const BlockHash & block_hash,int block_number,const char * block_ptr) const55   int NextMatchingBlock(const BlockHash& block_hash,
56                         int block_number,
57                         const char* block_ptr) const {
58     return block_hash.NextMatchingBlock(block_number, block_ptr);
59   }
60 
MatchingBytesToLeft(const char * source_match_start,const char * target_match_start,int max_bytes)61   static int MatchingBytesToLeft(const char* source_match_start,
62                                  const char* target_match_start,
63                                  int max_bytes) {
64     return BlockHash::MatchingBytesToLeft(source_match_start,
65                                           target_match_start,
66                                           max_bytes);
67   }
68 
MatchingBytesToRight(const char * source_match_end,const char * target_match_end,int max_bytes)69   static int MatchingBytesToRight(const char* source_match_end,
70                                   const char* target_match_end,
71                                   int max_bytes) {
72     return BlockHash::MatchingBytesToRight(source_match_end,
73                                            target_match_end,
74                                            max_bytes);
75   }
76 
StringLengthAsInt(const char * s)77   static int StringLengthAsInt(const char* s) {
78     return static_cast<int>(strlen(s));
79   }
80 
InitBlocksToDifferAtNthByte(int n)81   void InitBlocksToDifferAtNthByte(int n) {
82     CHECK(n < kBlockSize);
83     memset(compare_buffer_1_, 0xBE, kTimingTestSize);
84     memset(compare_buffer_2_, 0xBE, kTimingTestSize);
85     for (int index = n; index < kTimingTestSize; index += kBlockSize) {
86       compare_buffer_1_[index] = 0x00;
87       compare_buffer_2_[index] = 0x01;
88     }
89   }
90 
91   void TestAndPrintTimesForCompareFunctions(bool should_be_identical);
92 
TimingTestForBlocksThatDifferAtByte(int n)93   void TimingTestForBlocksThatDifferAtByte(int n) {
94     InitBlocksToDifferAtNthByte(n);
95     std::cout << "Comparing blocks that differ at byte " << n << std::endl;
96     TestAndPrintTimesForCompareFunctions(false);
97   }
98 
99   // Copy sample_text_without_spaces and search_string_without_spaces
100   // into newly allocated sample_text and search_string buffers,
101   // but pad them with space characters so that every character
102   // in sample_text_without_spaces matches (kBlockSize - 1)
103   // space characters in sample_text, followed by that character.
104   // For example:
105   // Since sample_text_without_spaces begins "The only thing"...,
106   // if kBlockSize is 4, then 3 space characters will be inserted
107   // between each letter of sample_text, as follows:
108   // "   T   h   e       o   n   l   y       t   h   i   n   g"...
109   // This makes testing simpler, because finding a kBlockSize-byte match
110   // between the sample text and search string only depends on the
111   // trailing letter in each block.
MakeEachLetterABlock(const char * string_without_spaces,const char ** result)112   static void MakeEachLetterABlock(const char* string_without_spaces,
113                                    const char** result) {
114     const size_t length_without_spaces = strlen(string_without_spaces);
115     char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
116     memset(padded_text, ' ', kBlockSize * length_without_spaces);
117     char* padded_text_ptr = padded_text + (kBlockSize - 1);
118     for (size_t i = 0; i < length_without_spaces; ++i) {
119       *padded_text_ptr = string_without_spaces[i];
120       padded_text_ptr += kBlockSize;
121     }
122     padded_text[kBlockSize * length_without_spaces] = '\0';
123     *result = padded_text;
124   }
125 
SetUpTestCase()126   static void SetUpTestCase() {
127     MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
128     MakeEachLetterABlock(search_string_without_spaces, &search_string);
129     MakeEachLetterABlock(search_string_altered_without_spaces,
130                          &search_string_altered);
131     MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
132     MakeEachLetterABlock(search_to_beginning_without_spaces,
133                          &search_to_beginning_string);
134     MakeEachLetterABlock(sample_text_many_matches_without_spaces,
135                          &sample_text_many_matches);
136     MakeEachLetterABlock(search_string_many_matches_without_spaces,
137                          &search_string_many_matches);
138     MakeEachLetterABlock("y", &test_string_y);
139     MakeEachLetterABlock("e", &test_string_e);
140     char* new_test_string_unaligned_e = new char[kBlockSize];
141     memset(new_test_string_unaligned_e, ' ', kBlockSize);
142     new_test_string_unaligned_e[kBlockSize - 2] = 'e';
143     test_string_unaligned_e = new_test_string_unaligned_e;
144     char* new_test_string_all_Qs = new char[kBlockSize];
145     memset(new_test_string_all_Qs, 'Q', kBlockSize);
146     test_string_all_Qs = new_test_string_all_Qs;
147     hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
148     hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
149     hashed_f =
150         RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
151     hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
152     hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
153   }
154 
TearDownTestCase()155   static void TearDownTestCase() {
156     delete[] sample_text;
157     delete[] search_string;
158     delete[] search_string_altered;
159     delete[] search_to_end_string;
160     delete[] search_to_beginning_string;
161     delete[] sample_text_many_matches;
162     delete[] search_string_many_matches;
163     delete[] test_string_y;
164     delete[] test_string_e;
165     delete[] test_string_unaligned_e;
166     delete[] test_string_all_Qs;
167   }
168 
169   // Each block in the sample text and search string is kBlockSize bytes long,
170   // and consists of (kBlockSize - 1) space characters
171   // followed by a single letter of text.
172 
173   // Block numbers of certain characters within the sample text:
174   // All six occurrences of "e", in order.
175   static const int block_of_first_e = 2;
176   static const int block_of_second_e = 16;
177   static const int block_of_third_e = 21;
178   static const int block_of_fourth_e = 27;
179   static const int block_of_fifth_e = 35;
180   static const int block_of_sixth_e = 42;
181 
182   static const int block_of_y_in_only = 7;
183   // The block number is multiplied by kBlockSize to arrive at the
184   // index, which points to the (kBlockSize - 1) space characters before
185   // the letter specified.
186   // Indices of certain characters within the sample text.
187   static const int index_of_first_e = block_of_first_e * kBlockSize;
188   static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
189   static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
190   static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
191   static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
192   static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
193   static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
194   static const int index_of_space_before_fear_itself = 33 * kBlockSize;
195   static const int index_of_space_before_itself = 38 * kBlockSize;
196   static const int index_of_ababc = 4 * kBlockSize;
197 
198   // Indices of certain characters within the search strings.
199   static const int index_of_second_w_in_what_we = 5 * kBlockSize;
200   static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
201   static const int index_of_f_in_fearsome = 16 * kBlockSize;
202   static const int index_of_space_in_eat_itself =  12 * kBlockSize;
203   static const int index_of_i_in_itself = 13 * kBlockSize;
204   static const int index_of_t_in_use_the = 4 * kBlockSize;
205   static const int index_of_o_in_online = 8 * kBlockSize;
206 
207   static const char sample_text_without_spaces[];
208   static const char search_string_without_spaces[];
209   static const char search_string_altered_without_spaces[];
210   static const char search_to_end_without_spaces[];
211   static const char search_to_beginning_without_spaces[];
212   static const char sample_text_many_matches_without_spaces[];
213   static const char search_string_many_matches_without_spaces[];
214 
215   static const char* sample_text;
216   static const char* search_string;
217   static const char* search_string_altered;
218   static const char* search_to_end_string;
219   static const char* search_to_beginning_string;
220   static const char* sample_text_many_matches;
221   static const char* search_string_many_matches;
222 
223   static const char* test_string_y;
224   static const char* test_string_e;
225   static const char* test_string_all_Qs;
226   static const char* test_string_unaligned_e;
227 
228   static uint32_t hashed_y;
229   static uint32_t hashed_e;
230   static uint32_t hashed_f;
231   static uint32_t hashed_unaligned_e;
232   static uint32_t hashed_all_Qs;
233 
234   // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
235   std::auto_ptr<const BlockHash> dh_;  // hash table is populated at startup
236   std::auto_ptr<BlockHash> th_;  // hash table not populated;
237                                 // used to test incremental adds
238 
239   BlockHash::Match best_match_;
240   char* compare_buffer_1_;
241   char* compare_buffer_2_;
242   int prime_result_;
243 };
244 
245 #ifdef GTEST_HAS_DEATH_TEST
246 typedef BlockHashTest BlockHashDeathTest;
247 #endif  // GTEST_HAS_DEATH_TEST
248 
249 // The C++ standard requires a separate definition of these static const values,
250 // even though their initializers are given within the class definition.
251 const int BlockHashTest::block_of_first_e;
252 const int BlockHashTest::block_of_second_e;
253 const int BlockHashTest::block_of_third_e;
254 const int BlockHashTest::block_of_fourth_e;
255 const int BlockHashTest::block_of_fifth_e;
256 const int BlockHashTest::block_of_sixth_e;
257 const int BlockHashTest::block_of_y_in_only;
258 const int BlockHashTest::index_of_first_e;
259 const int BlockHashTest::index_of_fourth_e;
260 const int BlockHashTest::index_of_sixth_e;
261 const int BlockHashTest::index_of_y_in_only;
262 const int BlockHashTest::index_of_space_before_fear_is_fear;
263 const int BlockHashTest::index_of_longest_match_ear_is_fear;
264 const int BlockHashTest::index_of_i_in_fear_is_fear;
265 const int BlockHashTest::index_of_space_before_fear_itself;
266 const int BlockHashTest::index_of_space_before_itself;
267 const int BlockHashTest::index_of_ababc;
268 const int BlockHashTest::index_of_second_w_in_what_we;
269 const int BlockHashTest::index_of_second_e_in_what_we_hear;
270 const int BlockHashTest::index_of_f_in_fearsome;
271 const int BlockHashTest::index_of_space_in_eat_itself;
272 const int BlockHashTest::index_of_i_in_itself;
273 const int BlockHashTest::index_of_t_in_use_the;
274 const int BlockHashTest::index_of_o_in_online;
275 
276 const char BlockHashTest::sample_text_without_spaces[] =
277     "The only thing we have to fear is fear itself";
278 
279 const char BlockHashTest::search_string_without_spaces[] =
280     "What we hear is fearsome";
281 
282 const char BlockHashTest::search_string_altered_without_spaces[] =
283     "Vhat ve hear is fearsomm";
284 
285 const char BlockHashTest::search_to_end_without_spaces[] =
286     "Pop will eat itself, eventually";
287 
288 const char BlockHashTest::search_to_beginning_without_spaces[] =
289     "Use The online dictionary";
290 
291 const char BlockHashTest::sample_text_many_matches_without_spaces[] =
292     "ababababcab";
293 
294 const char BlockHashTest::search_string_many_matches_without_spaces[] =
295     "ababc";
296 
297 const char* BlockHashTest::sample_text = NULL;
298 const char* BlockHashTest::search_string = NULL;
299 const char* BlockHashTest::search_string_altered = NULL;
300 const char* BlockHashTest::search_to_end_string = NULL;
301 const char* BlockHashTest::search_to_beginning_string = NULL;
302 const char* BlockHashTest::sample_text_many_matches = NULL;
303 const char* BlockHashTest::search_string_many_matches = NULL;
304 
305 const char* BlockHashTest::test_string_y = NULL;
306 const char* BlockHashTest::test_string_e = NULL;
307 const char* BlockHashTest::test_string_unaligned_e = NULL;
308 const char* BlockHashTest::test_string_all_Qs = NULL;
309 
310 uint32_t BlockHashTest::hashed_y = 0;
311 uint32_t BlockHashTest::hashed_e = 0;
312 uint32_t BlockHashTest::hashed_f = 0;
313 uint32_t BlockHashTest::hashed_unaligned_e = 0;
314 uint32_t BlockHashTest::hashed_all_Qs = 0;
315 
TestAndPrintTimesForCompareFunctions(bool should_be_identical)316 void BlockHashTest::TestAndPrintTimesForCompareFunctions(
317     bool should_be_identical) {
318   CHECK(compare_buffer_1_ != NULL);
319   CHECK(compare_buffer_2_ != NULL);
320   // Prime the memory cache.
321   prime_result_ =
322       memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
323   const char* const block1_limit =
324       &compare_buffer_1_[kTimingTestSize - kBlockSize];
325   int block_compare_words_result = 0;
326   CycleTimer block_compare_words_timer;
327   block_compare_words_timer.Start();
328   for (int i = 0; i < kTimingTestIterations; ++i) {
329     const char* block1 = compare_buffer_1_;
330     const char* block2 = compare_buffer_2_;
331     while (block1 < block1_limit) {
332       if (!BlockHash::BlockCompareWords(block1, block2)) {
333         ++block_compare_words_result;
334       }
335       block1 += kBlockSize;
336       block2 += kBlockSize;
337     }
338   }
339   block_compare_words_timer.Stop();
340   double time_for_block_compare_words =
341       static_cast<double>(block_compare_words_timer.GetInUsec())
342       / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
343   int block_contents_match_result = 0;
344   CycleTimer block_contents_match_timer;
345   block_contents_match_timer.Start();
346   for (int i = 0; i < kTimingTestIterations; ++i) {
347     const char* block1 = compare_buffer_1_;
348     const char* block2 = compare_buffer_2_;
349     while (block1 < block1_limit) {
350       if (!BlockHash::BlockContentsMatch(block1, block2)) {
351         ++block_contents_match_result;
352       }
353       block1 += kBlockSize;
354       block2 += kBlockSize;
355     }
356   }
357   block_contents_match_timer.Stop();
358   double time_for_block_contents_match =
359       static_cast<double>(block_contents_match_timer.GetInUsec())
360       / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
361   EXPECT_EQ(block_contents_match_result, block_compare_words_result);
362   if (should_be_identical) {
363     CHECK_EQ(0, block_compare_words_result);
364   } else {
365     CHECK_GT(block_compare_words_result, 0);
366   }
367   std::cout << "BlockHash::BlockCompareWords: "
368             << time_for_block_compare_words << " us per operation" << std::endl;
369   std::cout << "BlockHash::BlockContentsMatch: "
370             << time_for_block_contents_match << " us per operation"
371             << std::endl;
372   if (time_for_block_compare_words > 0) {
373     double percent_change =
374         (((time_for_block_contents_match - time_for_block_compare_words)
375           / time_for_block_compare_words) * 100.0);
376     if (percent_change >= 0.0) {
377       std::cout << "BlockContentsMatch is " << percent_change << "%"
378                 << " SLOWER than BlockCompareWords" << std::endl;
379     } else {
380       std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
381                 << " FASTER than BlockCompareWords" << std::endl;
382     }
383   }
384 #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
385   // Only check timings for optimized build.  There's plenty of margin: this
386   // check will fail only if BlockContentsMatch is at least twice as slow as
387   // BlockCompareWords.
388   EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
389 #endif  // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
390 }
391 
392 // The two strings passed to BlockHash::MatchingBytesToLeft do have matching
393 // characters -- in fact, they're the same string -- but since max_bytes is zero
394 // or negative, BlockHash::MatchingBytesToLeft should not read from the strings
395 // and should return 0.
TEST_F(BlockHashTest,MaxBytesZeroDoesNothing)396 TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
397   EXPECT_EQ(0, MatchingBytesToLeft(
398       &search_string[index_of_f_in_fearsome],
399       &search_string[index_of_f_in_fearsome],
400       0));
401   EXPECT_EQ(0, MatchingBytesToRight(
402       &search_string[index_of_f_in_fearsome],
403       &search_string[index_of_f_in_fearsome],
404       0));
405 }
406 
TEST_F(BlockHashTest,MaxBytesNegativeDoesNothing)407 TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
408   EXPECT_EQ(0, MatchingBytesToLeft(
409       &search_string[index_of_f_in_fearsome],
410       &search_string[index_of_f_in_fearsome],
411       -1));
412   EXPECT_EQ(0, MatchingBytesToLeft(
413       &search_string[index_of_f_in_fearsome],
414       &search_string[index_of_f_in_fearsome],
415       INT_MIN));
416   EXPECT_EQ(0, MatchingBytesToRight(
417       &search_string[index_of_f_in_fearsome],
418       &search_string[index_of_f_in_fearsome],
419       -1));
420   EXPECT_EQ(0, MatchingBytesToRight(
421       &search_string[index_of_f_in_fearsome],
422       &search_string[index_of_f_in_fearsome],
423       INT_MIN));
424 }
425 
TEST_F(BlockHashTest,MaxBytesOneMatch)426 TEST_F(BlockHashTest, MaxBytesOneMatch) {
427   EXPECT_EQ(1, MatchingBytesToLeft(
428       &search_string[index_of_f_in_fearsome],
429       &search_string[index_of_f_in_fearsome],
430       1));
431   EXPECT_EQ(1, MatchingBytesToRight(
432       &search_string[index_of_f_in_fearsome],
433       &search_string[index_of_f_in_fearsome],
434       1));
435 }
436 
TEST_F(BlockHashTest,MaxBytesOneNoMatch)437 TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
438   EXPECT_EQ(0, MatchingBytesToLeft(
439       &search_string[index_of_f_in_fearsome],
440       &search_string[index_of_second_e_in_what_we_hear],
441       1));
442   EXPECT_EQ(0, MatchingBytesToRight(
443       &search_string[index_of_f_in_fearsome],
444       &search_string[index_of_second_e_in_what_we_hear - 1],
445       1));
446 }
447 
TEST_F(BlockHashTest,LeftLimitedByMaxBytes)448 TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
449   // The number of bytes that match between the original "we hear is fearsome"
450   // and the altered "ve hear is fearsome".
451   const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
452   const int max_bytes = expected_length - 1;
453   EXPECT_EQ(max_bytes, MatchingBytesToLeft(
454       &search_string[index_of_f_in_fearsome],
455       &search_string_altered[index_of_f_in_fearsome],
456       max_bytes));
457 }
458 
TEST_F(BlockHashTest,LeftNotLimited)459 TEST_F(BlockHashTest, LeftNotLimited) {
460   // The number of bytes that match between the original "we hear is fearsome"
461   // and the altered "ve hear is fearsome".
462   const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
463   const int max_bytes = expected_length + 1;
464   EXPECT_EQ(expected_length, MatchingBytesToLeft(
465       &search_string[index_of_f_in_fearsome],
466       &search_string_altered[index_of_f_in_fearsome],
467       max_bytes));
468   EXPECT_EQ(expected_length, MatchingBytesToLeft(
469       &search_string[index_of_f_in_fearsome],
470       &search_string_altered[index_of_f_in_fearsome],
471       INT_MAX));
472 }
473 
TEST_F(BlockHashTest,RightLimitedByMaxBytes)474 TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
475   // The number of bytes that match between the original "fearsome"
476   // and the altered "fearsomm".
477   const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
478                               + (kBlockSize - 1);  // spacing between letters
479   const int max_bytes = expected_length - 1;
480   EXPECT_EQ(max_bytes, MatchingBytesToRight(
481       &search_string[index_of_f_in_fearsome],
482       &search_string_altered[index_of_f_in_fearsome],
483       max_bytes));
484 }
485 
TEST_F(BlockHashTest,RightNotLimited)486 TEST_F(BlockHashTest, RightNotLimited) {
487   // The number of bytes that match between the original "we hear is fearsome"
488   // and the altered "ve hear is fearsome".
489   const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
490                               + (kBlockSize - 1);  // spacing between letters
491   const int max_bytes = expected_length + 1;
492   EXPECT_EQ(expected_length, MatchingBytesToRight(
493       &search_string[index_of_f_in_fearsome],
494       &search_string_altered[index_of_f_in_fearsome],
495       max_bytes));
496   EXPECT_EQ(expected_length, MatchingBytesToRight(
497       &search_string[index_of_f_in_fearsome],
498       &search_string_altered[index_of_f_in_fearsome],
499       INT_MAX));
500 }
501 
502 // If this test fails in a non-x86 or non-gcc environment, consider adding
503 // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
504 // Makefile.in, and reconstructing the Makefile.  That will cause blockhash.cc
505 // to use a special implementation (BlockCompareWords) to compare blocks
506 // rather than using standard memcmp.
TEST_F(BlockHashTest,BlockContentsMatchIsAsFastAsBlockCompareWords)507 TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
508   compare_buffer_1_ = new char[kTimingTestSize];
509   compare_buffer_2_ = new char[kTimingTestSize];
510 
511   // The value 0xBE is arbitrarily chosen.  First test with identical contents
512   // in the buffers, so that the comparison functions cannot short-circuit
513   // and will return true.
514   memset(compare_buffer_1_, 0xBE, kTimingTestSize);
515   memset(compare_buffer_2_, 0xBE, kTimingTestSize);
516   std::cout << "Comparing "
517             << (kTimingTestSize / kBlockSize) << " identical values:"
518             << std::endl;
519   TestAndPrintTimesForCompareFunctions(true);
520 
521   // Now change one value in the middle of one buffer, so that the contents
522   // are no longer the same.
523   compare_buffer_1_[kTimingTestSize / 2] = 0x00;
524   std::cout << "Comparing "
525             << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
526             << " and one mismatch:" << std::endl;
527   TestAndPrintTimesForCompareFunctions(false);
528 
529   // Set one of the bytes of each block to differ so that
530   // none of the compare operations will return true, and run timing tests.
531   // In practice, BlockHash::BlockContentsMatch will only be called
532   // for two blocks whose hash values match, and the two most important
533   // cases are: (1) the blocks are identical, or (2) none of their bytes match.
534   TimingTestForBlocksThatDifferAtByte(0);
535   TimingTestForBlocksThatDifferAtByte(1);
536   TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
537   TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);
538 
539   delete[] compare_buffer_1_;
540   delete[] compare_buffer_2_;
541 }
542 
TEST_F(BlockHashTest,FindFailsBeforeHashing)543 TEST_F(BlockHashTest, FindFailsBeforeHashing) {
544   EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
545 }
546 
TEST_F(BlockHashTest,HashOneFindOne)547 TEST_F(BlockHashTest, HashOneFindOne) {
548   for (int i = 0; i <= index_of_y_in_only; ++i) {
549     th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
550   }
551   EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
552                                                    test_string_y));
553   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
554 }
555 
TEST_F(BlockHashTest,HashAllFindOne)556 TEST_F(BlockHashTest, HashAllFindOne) {
557   EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
558                                                    test_string_y));
559   EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
560 }
561 
TEST_F(BlockHashTest,NonMatchingTextNotFound)562 TEST_F(BlockHashTest, NonMatchingTextNotFound) {
563   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
564 }
565 
566 // Search for unaligned text.  The test string is contained in the
567 // sample text (unlike the non-matching string in NonMatchingTextNotFound,
568 // above), but it is not aligned on a block boundary.  FindMatchingBlock
569 // will only work if the test string is aligned on a block boundary.
570 //
571 //    "   T   h   e       o   n   l   y"
572 //              ^^^^ Here is the test string
573 //
TEST_F(BlockHashTest,UnalignedTextNotFound)574 TEST_F(BlockHashTest, UnalignedTextNotFound) {
575   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
576                                    test_string_unaligned_e));
577 }
578 
TEST_F(BlockHashTest,FindSixMatches)579 TEST_F(BlockHashTest, FindSixMatches) {
580   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
581                                                  test_string_e));
582   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
583                                                  test_string_e));
584   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
585                                                 test_string_e));
586   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
587                                                  test_string_e));
588   EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
589                                                 test_string_e));
590   EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
591                                                 test_string_e));
592   EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));
593 
594   // Starting over gives same result
595   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
596                                                  test_string_e));
597 }
598 
TEST_F(BlockHashTest,AddRangeFindThreeMatches)599 TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
600   // Add hash values only for those characters before the fourth instance
601   // of "e" in the sample text.  Tests that the ending index
602   // of AddAllBlocksThroughIndex() is not inclusive: only three matches
603   // for "e" should be found.
604   th_->AddAllBlocksThroughIndex(index_of_fourth_e);
605   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
606                                                  test_string_e));
607   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
608                                                  test_string_e));
609   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
610                                                 test_string_e));
611   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
612 
613   // Starting over gives same result
614   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
615                                                  test_string_e));
616 }
617 
618 // Try indices that are not even multiples of the block size.
619 // Add three ranges and verify the results after each
620 // call to AddAllBlocksThroughIndex().
TEST_F(BlockHashTest,AddRangeWithUnalignedIndices)621 TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
622   th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
623   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
624                                                  test_string_e));
625   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));
626 
627   // Starting over gives same result
628   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
629                                                  test_string_e));
630 
631   // Add the second range to expand the result set
632   th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
633   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
634                                                  test_string_e));
635   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
636                                                  test_string_e));
637   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
638                                                 test_string_e));
639   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
640 
641   // Starting over gives same result
642   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
643                                                  test_string_e));
644 
645   // Add the third range to expand the result set
646   th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
647 
648   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
649                                                  test_string_e));
650   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
651                                                  test_string_e));
652   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
653                                                 test_string_e));
654   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
655                                                  test_string_e));
656   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
657 
658   // Starting over gives same result
659   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
660                                                  test_string_e));
661 }
662 
663 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest,AddingRangesInDescendingOrderNoEffect)664 TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
665   th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
666 
667   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
668                                                  test_string_e));
669   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
670                                                  test_string_e));
671   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
672                                                 test_string_e));
673   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
674                                                  test_string_e));
675   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
676 
677   // Starting over gives same result
678   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
679                                                  test_string_e));
680 
681   // These calls will produce DFATAL error messages, and will do nothing,
682   // since the ranges have already been added.
683   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
684                      "<");
685   EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
686                      "<");
687 
688   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
689                                                  test_string_e));
690   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
691                                                  test_string_e));
692   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
693                                                 test_string_e));
694   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
695                                                  test_string_e));
696   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
697 
698   // Starting over gives same result
699   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
700                                                  test_string_e));
701 }
702 #endif  // GTEST_HAS_DEATH_TEST
703 
TEST_F(BlockHashTest,AddEntireRangeFindSixMatches)704 TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
705   th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
706   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
707                                                  test_string_e));
708   EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
709                                                  test_string_e));
710   EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
711                                                 test_string_e));
712   EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
713                                                  test_string_e));
714   EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
715                                                 test_string_e));
716   EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
717                                                 test_string_e));
718   EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));
719 
720   // Starting over gives same result
721   EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
722                                                  test_string_e));
723 }
724 
TEST_F(BlockHashTest,ZeroSizeSourceAccepted)725 TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
726   BlockHash zero_sized_hash(sample_text, 0, 0);
727   EXPECT_EQ(true, zero_sized_hash.Init(true));
728   EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
729 }
730 
731 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest,BadNextMatchingBlockReturnsNoMatch)732 TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
733   EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, "    ")),
734                      "invalid");
735 }
736 
TEST_F(BlockHashDeathTest,CallingInitTwiceIsIllegal)737 TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
738   BlockHash bh(sample_text, strlen(sample_text), 0);
739   EXPECT_TRUE(bh.Init(false));
740   EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
741 }
742 
TEST_F(BlockHashDeathTest,CallingAddBlockBeforeInitIsIllegal)743 TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
744   BlockHash bh(sample_text, strlen(sample_text), 0);
745   EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
746                      "called before");
747 }
748 
TEST_F(BlockHashDeathTest,AddAllBlocksThroughIndexOutOfRange)749 TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
750   EXPECT_DEBUG_DEATH(
751       th_->AddAllBlocksThroughIndex(static_cast<int>(strlen(sample_text) + 1)),
752       "higher than end");
753 }
754 #endif  // GTEST_HAS_DEATH_TEST
755 
TEST_F(BlockHashTest,UnknownFingerprintReturnsNoMatch)756 TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
757   EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
758 }
759 
TEST_F(BlockHashTest,FindBestMatch)760 TEST_F(BlockHashTest, FindBestMatch) {
761   dh_->FindBestMatch(hashed_f,
762                      &search_string[index_of_f_in_fearsome],
763                      search_string,
764                      strlen(search_string),
765                      &best_match_);
766   EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
767   EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
768   // The match includes the spaces after the final character,
769   // which is why (kBlockSize - 1) is added to the expected best size.
770   EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
771             best_match_.size());
772 }
773 
TEST_F(BlockHashTest,FindBestMatchWithStartingOffset)774 TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
775   BlockHash th2(sample_text, strlen(sample_text), 0x10000);
776   th2.Init(true);  // hash all blocks
777   th2.FindBestMatch(hashed_f,
778                     &search_string[index_of_f_in_fearsome],
779                     search_string,
780                     strlen(search_string),
781                     &best_match_);
782   // Offset should begin with dictionary_size
783   EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
784             best_match_.source_offset());
785   EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
786   // The match includes the spaces after the final character,
787   // which is why (kBlockSize - 1) is added to the expected best size.
788   EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
789             best_match_.size());
790 }
791 
TEST_F(BlockHashTest,BestMatchReachesEndOfDictionary)792 TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
793   // Hash the "i" in "fear itself"
794   uint32_t hash_value = RollingHash<kBlockSize>::Hash(
795       &search_to_end_string[index_of_i_in_itself]);
796   dh_->FindBestMatch(hash_value,
797                      &search_to_end_string[index_of_i_in_itself],
798                      search_to_end_string,
799                      strlen(search_to_end_string),
800                      &best_match_);
801   EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
802   EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
803   EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
804 }
805 
TEST_F(BlockHashTest,BestMatchReachesStartOfDictionary)806 TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
807   // Hash the "i" in "fear itself"
808   uint32_t hash_value = RollingHash<kBlockSize>::Hash(
809       &search_to_beginning_string[index_of_o_in_online]);
810   dh_->FindBestMatch(hash_value,
811                      &search_to_beginning_string[index_of_o_in_online],
812                      search_to_beginning_string,
813                      strlen(search_to_beginning_string),
814                      &best_match_);
815   EXPECT_EQ(0, best_match_.source_offset());  // beginning of dictionary
816   EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
817   // The match includes the spaces after the final character,
818   // which is why (kBlockSize - 1) is added to the expected best size.
819   EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
820             best_match_.size());
821 }
822 
TEST_F(BlockHashTest,BestMatchWithManyMatches)823 TEST_F(BlockHashTest, BestMatchWithManyMatches) {
824   BlockHash many_matches_hash(sample_text_many_matches,
825                               strlen(sample_text_many_matches),
826                               0);
827   EXPECT_TRUE(many_matches_hash.Init(true));
828   // Hash the "   a" at the beginning of the search string "ababc"
829   uint32_t hash_value =
830       RollingHash<kBlockSize>::Hash(search_string_many_matches);
831   many_matches_hash.FindBestMatch(hash_value,
832                                   search_string_many_matches,
833                                   search_string_many_matches,
834                                   strlen(search_string_many_matches),
835                                   &best_match_);
836   EXPECT_EQ(index_of_ababc, best_match_.source_offset());
837   EXPECT_EQ(0, best_match_.target_offset());
838   EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
839 }
840 
TEST_F(BlockHashTest,HashCollisionFindsNoMatch)841 TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
842   char* collision_search_string = new char[strlen(search_string) + 1];
843   memcpy(collision_search_string, search_string, strlen(search_string) + 1);
844   char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];
845 
846   // Tweak the collision string so that it has the same hash value
847   // but different text.  The last four characters of the search string
848   // should be "   f", and the bytes given below have the same hash value
849   // as those characters.
850   CHECK_GE(kBlockSize, 4);
851   fearsome_location[kBlockSize - 4] = 0x84;
852   fearsome_location[kBlockSize - 3] = 0xF1;
853   fearsome_location[kBlockSize - 2] = 0x51;
854   fearsome_location[kBlockSize - 1] = 0x00;
855   EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
856   EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
857                       fearsome_location,
858                       kBlockSize));
859   // No match should be found this time.
860   dh_->FindBestMatch(hashed_f,
861       fearsome_location,
862       collision_search_string,
863       strlen(search_string),  // since collision_search_string has embedded \0
864       &best_match_);
865   EXPECT_EQ(-1, best_match_.source_offset());
866   EXPECT_EQ(-1, best_match_.target_offset());
867   EXPECT_EQ(0U, best_match_.size());
868   delete[] collision_search_string;
869 }
870 
871 // If the footprint passed to FindBestMatch does not actually match
872 // the search string, it should not find any matches.
TEST_F(BlockHashTest,WrongFootprintFindsNoMatch)873 TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
874   dh_->FindBestMatch(hashed_e,  // Using hashed value of "e" instead of "f"!
875                      &search_string[index_of_f_in_fearsome],
876                      search_string,
877                      strlen(search_string),
878                      &best_match_);
879   EXPECT_EQ(-1, best_match_.source_offset());
880   EXPECT_EQ(-1, best_match_.target_offset());
881   EXPECT_EQ(0U, best_match_.size());
882 }
883 
884 // Use a dictionary containing 1M copies of the letter 'Q',
885 // and target data that also contains 1M Qs.  If FindBestMatch
886 // is not throttled to find a maximum number of matches, this
887 // will take a very long time -- several seconds at least.
888 // If this test appears to hang, it is because the throttling code
889 // (see BlockHash::kMaxMatchesToCheck for details) is not working.
TEST_F(BlockHashTest,SearchStringFindsTooManyMatches)890 TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
891   const int kTestSize = 1 << 20;  // 1M
892   char* huge_dictionary = new char[kTestSize];
893   memset(huge_dictionary, 'Q', kTestSize);
894   BlockHash huge_bh(huge_dictionary, kTestSize, 0);
895   EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
896   char* huge_target = new char[kTestSize];
897   memset(huge_target, 'Q', kTestSize);
898   CycleTimer timer;
899   timer.Start();
900   huge_bh.FindBestMatch(hashed_all_Qs,
901                         huge_target + (kTestSize / 2),  // middle of target
902                         huge_target,
903                         kTestSize,
904                         &best_match_);
905   timer.Stop();
906   double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
907   std::cout << "Time to search for best match with 1M matches: "
908             << elapsed_time_in_us << " us" << std::endl;
909   // All blocks match the candidate block.  FindBestMatch should have checked
910   // a certain number of matches before giving up.  The best match
911   // should include at least half the source and target, since the candidate
912   // block was in the middle of the target data.
913   EXPECT_GT((kTestSize / 2), best_match_.source_offset());
914   EXPECT_GT((kTestSize / 2), best_match_.target_offset());
915   EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
916   EXPECT_GT(5000000, elapsed_time_in_us);  // < 5 seconds
917 #ifdef NDEBUG
918   EXPECT_GT(1000000, elapsed_time_in_us);  // < 1 second
919 #endif  // NDEBUG
920   delete[] huge_target;
921   delete[] huge_dictionary;
922 }
923 
924 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest,AddTooManyBlocks)925 TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
926   for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
927     th_->AddOneIndexHash(i * kBlockSize, hashed_e);
928   }
929   // Didn't expect another block to be added
930   EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
931                                           hashed_e),
932                      "AddBlock");
933 }
934 #endif  // GTEST_HAS_DEATH_TEST
935 
936 }  //  namespace open_vcdiff
937