1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include <config.h>
17 #include "blockhash.h"
18 #include <limits.h> // INT_MIN
19 #include <string.h> // memcpy, memcmp, strlen
20 #include <iostream>
21 #include <memory> // auto_ptr
22 #include "encodetable.h"
23 #include "rolling_hash.h"
24 #include "testing.h"
25
26 namespace open_vcdiff {
27
28 const int kBlockSize = BlockHash::kBlockSize;
29
30 class BlockHashTest : public testing::Test {
31 protected:
32 static const int kTimingTestSize = 1 << 21; // 2M
33 static const int kTimingTestIterations = 32;
34
BlockHashTest()35 BlockHashTest() {
36 dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
37 strlen(sample_text)));
38 th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
39 EXPECT_TRUE(dh_.get() != NULL);
40 EXPECT_TRUE(th_.get() != NULL);
41 }
42
43 // BlockHashTest is a friend to BlockHash. Expose the protected functions
44 // that will be tested by the children of BlockHashTest.
BlockContentsMatch(const char * block1,const char * block2)45 static bool BlockContentsMatch(const char* block1, const char* block2) {
46 return BlockHash::BlockContentsMatch(block1, block2);
47 }
48
FirstMatchingBlock(const BlockHash & block_hash,uint32_t hash_value,const char * block_ptr) const49 int FirstMatchingBlock(const BlockHash& block_hash,
50 uint32_t hash_value,
51 const char* block_ptr) const {
52 return block_hash.FirstMatchingBlock(hash_value, block_ptr);
53 }
54
NextMatchingBlock(const BlockHash & block_hash,int block_number,const char * block_ptr) const55 int NextMatchingBlock(const BlockHash& block_hash,
56 int block_number,
57 const char* block_ptr) const {
58 return block_hash.NextMatchingBlock(block_number, block_ptr);
59 }
60
MatchingBytesToLeft(const char * source_match_start,const char * target_match_start,int max_bytes)61 static int MatchingBytesToLeft(const char* source_match_start,
62 const char* target_match_start,
63 int max_bytes) {
64 return BlockHash::MatchingBytesToLeft(source_match_start,
65 target_match_start,
66 max_bytes);
67 }
68
MatchingBytesToRight(const char * source_match_end,const char * target_match_end,int max_bytes)69 static int MatchingBytesToRight(const char* source_match_end,
70 const char* target_match_end,
71 int max_bytes) {
72 return BlockHash::MatchingBytesToRight(source_match_end,
73 target_match_end,
74 max_bytes);
75 }
76
StringLengthAsInt(const char * s)77 static int StringLengthAsInt(const char* s) {
78 return static_cast<int>(strlen(s));
79 }
80
InitBlocksToDifferAtNthByte(int n)81 void InitBlocksToDifferAtNthByte(int n) {
82 CHECK(n < kBlockSize);
83 memset(compare_buffer_1_, 0xBE, kTimingTestSize);
84 memset(compare_buffer_2_, 0xBE, kTimingTestSize);
85 for (int index = n; index < kTimingTestSize; index += kBlockSize) {
86 compare_buffer_1_[index] = 0x00;
87 compare_buffer_2_[index] = 0x01;
88 }
89 }
90
91 void TestAndPrintTimesForCompareFunctions(bool should_be_identical);
92
TimingTestForBlocksThatDifferAtByte(int n)93 void TimingTestForBlocksThatDifferAtByte(int n) {
94 InitBlocksToDifferAtNthByte(n);
95 std::cout << "Comparing blocks that differ at byte " << n << std::endl;
96 TestAndPrintTimesForCompareFunctions(false);
97 }
98
99 // Copy sample_text_without_spaces and search_string_without_spaces
100 // into newly allocated sample_text and search_string buffers,
101 // but pad them with space characters so that every character
102 // in sample_text_without_spaces matches (kBlockSize - 1)
103 // space characters in sample_text, followed by that character.
104 // For example:
105 // Since sample_text_without_spaces begins "The only thing"...,
106 // if kBlockSize is 4, then 3 space characters will be inserted
107 // between each letter of sample_text, as follows:
108 // " T h e o n l y t h i n g"...
109 // This makes testing simpler, because finding a kBlockSize-byte match
110 // between the sample text and search string only depends on the
111 // trailing letter in each block.
MakeEachLetterABlock(const char * string_without_spaces,const char ** result)112 static void MakeEachLetterABlock(const char* string_without_spaces,
113 const char** result) {
114 const size_t length_without_spaces = strlen(string_without_spaces);
115 char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
116 memset(padded_text, ' ', kBlockSize * length_without_spaces);
117 char* padded_text_ptr = padded_text + (kBlockSize - 1);
118 for (size_t i = 0; i < length_without_spaces; ++i) {
119 *padded_text_ptr = string_without_spaces[i];
120 padded_text_ptr += kBlockSize;
121 }
122 padded_text[kBlockSize * length_without_spaces] = '\0';
123 *result = padded_text;
124 }
125
SetUpTestCase()126 static void SetUpTestCase() {
127 MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
128 MakeEachLetterABlock(search_string_without_spaces, &search_string);
129 MakeEachLetterABlock(search_string_altered_without_spaces,
130 &search_string_altered);
131 MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
132 MakeEachLetterABlock(search_to_beginning_without_spaces,
133 &search_to_beginning_string);
134 MakeEachLetterABlock(sample_text_many_matches_without_spaces,
135 &sample_text_many_matches);
136 MakeEachLetterABlock(search_string_many_matches_without_spaces,
137 &search_string_many_matches);
138 MakeEachLetterABlock("y", &test_string_y);
139 MakeEachLetterABlock("e", &test_string_e);
140 char* new_test_string_unaligned_e = new char[kBlockSize];
141 memset(new_test_string_unaligned_e, ' ', kBlockSize);
142 new_test_string_unaligned_e[kBlockSize - 2] = 'e';
143 test_string_unaligned_e = new_test_string_unaligned_e;
144 char* new_test_string_all_Qs = new char[kBlockSize];
145 memset(new_test_string_all_Qs, 'Q', kBlockSize);
146 test_string_all_Qs = new_test_string_all_Qs;
147 hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
148 hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
149 hashed_f =
150 RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
151 hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
152 hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
153 }
154
TearDownTestCase()155 static void TearDownTestCase() {
156 delete[] sample_text;
157 delete[] search_string;
158 delete[] search_string_altered;
159 delete[] search_to_end_string;
160 delete[] search_to_beginning_string;
161 delete[] sample_text_many_matches;
162 delete[] search_string_many_matches;
163 delete[] test_string_y;
164 delete[] test_string_e;
165 delete[] test_string_unaligned_e;
166 delete[] test_string_all_Qs;
167 }
168
169 // Each block in the sample text and search string is kBlockSize bytes long,
170 // and consists of (kBlockSize - 1) space characters
171 // followed by a single letter of text.
172
173 // Block numbers of certain characters within the sample text:
174 // All six occurrences of "e", in order.
175 static const int block_of_first_e = 2;
176 static const int block_of_second_e = 16;
177 static const int block_of_third_e = 21;
178 static const int block_of_fourth_e = 27;
179 static const int block_of_fifth_e = 35;
180 static const int block_of_sixth_e = 42;
181
182 static const int block_of_y_in_only = 7;
183 // The block number is multiplied by kBlockSize to arrive at the
184 // index, which points to the (kBlockSize - 1) space characters before
185 // the letter specified.
186 // Indices of certain characters within the sample text.
187 static const int index_of_first_e = block_of_first_e * kBlockSize;
188 static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
189 static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
190 static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
191 static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
192 static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
193 static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
194 static const int index_of_space_before_fear_itself = 33 * kBlockSize;
195 static const int index_of_space_before_itself = 38 * kBlockSize;
196 static const int index_of_ababc = 4 * kBlockSize;
197
198 // Indices of certain characters within the search strings.
199 static const int index_of_second_w_in_what_we = 5 * kBlockSize;
200 static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
201 static const int index_of_f_in_fearsome = 16 * kBlockSize;
202 static const int index_of_space_in_eat_itself = 12 * kBlockSize;
203 static const int index_of_i_in_itself = 13 * kBlockSize;
204 static const int index_of_t_in_use_the = 4 * kBlockSize;
205 static const int index_of_o_in_online = 8 * kBlockSize;
206
207 static const char sample_text_without_spaces[];
208 static const char search_string_without_spaces[];
209 static const char search_string_altered_without_spaces[];
210 static const char search_to_end_without_spaces[];
211 static const char search_to_beginning_without_spaces[];
212 static const char sample_text_many_matches_without_spaces[];
213 static const char search_string_many_matches_without_spaces[];
214
215 static const char* sample_text;
216 static const char* search_string;
217 static const char* search_string_altered;
218 static const char* search_to_end_string;
219 static const char* search_to_beginning_string;
220 static const char* sample_text_many_matches;
221 static const char* search_string_many_matches;
222
223 static const char* test_string_y;
224 static const char* test_string_e;
225 static const char* test_string_all_Qs;
226 static const char* test_string_unaligned_e;
227
228 static uint32_t hashed_y;
229 static uint32_t hashed_e;
230 static uint32_t hashed_f;
231 static uint32_t hashed_unaligned_e;
232 static uint32_t hashed_all_Qs;
233
234 // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
235 std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup
236 std::auto_ptr<BlockHash> th_; // hash table not populated;
237 // used to test incremental adds
238
239 BlockHash::Match best_match_;
240 char* compare_buffer_1_;
241 char* compare_buffer_2_;
242 int prime_result_;
243 };
244
245 #ifdef GTEST_HAS_DEATH_TEST
246 typedef BlockHashTest BlockHashDeathTest;
247 #endif // GTEST_HAS_DEATH_TEST
248
249 // The C++ standard requires a separate definition of these static const values,
250 // even though their initializers are given within the class definition.
251 const int BlockHashTest::block_of_first_e;
252 const int BlockHashTest::block_of_second_e;
253 const int BlockHashTest::block_of_third_e;
254 const int BlockHashTest::block_of_fourth_e;
255 const int BlockHashTest::block_of_fifth_e;
256 const int BlockHashTest::block_of_sixth_e;
257 const int BlockHashTest::block_of_y_in_only;
258 const int BlockHashTest::index_of_first_e;
259 const int BlockHashTest::index_of_fourth_e;
260 const int BlockHashTest::index_of_sixth_e;
261 const int BlockHashTest::index_of_y_in_only;
262 const int BlockHashTest::index_of_space_before_fear_is_fear;
263 const int BlockHashTest::index_of_longest_match_ear_is_fear;
264 const int BlockHashTest::index_of_i_in_fear_is_fear;
265 const int BlockHashTest::index_of_space_before_fear_itself;
266 const int BlockHashTest::index_of_space_before_itself;
267 const int BlockHashTest::index_of_ababc;
268 const int BlockHashTest::index_of_second_w_in_what_we;
269 const int BlockHashTest::index_of_second_e_in_what_we_hear;
270 const int BlockHashTest::index_of_f_in_fearsome;
271 const int BlockHashTest::index_of_space_in_eat_itself;
272 const int BlockHashTest::index_of_i_in_itself;
273 const int BlockHashTest::index_of_t_in_use_the;
274 const int BlockHashTest::index_of_o_in_online;
275
276 const char BlockHashTest::sample_text_without_spaces[] =
277 "The only thing we have to fear is fear itself";
278
279 const char BlockHashTest::search_string_without_spaces[] =
280 "What we hear is fearsome";
281
282 const char BlockHashTest::search_string_altered_without_spaces[] =
283 "Vhat ve hear is fearsomm";
284
285 const char BlockHashTest::search_to_end_without_spaces[] =
286 "Pop will eat itself, eventually";
287
288 const char BlockHashTest::search_to_beginning_without_spaces[] =
289 "Use The online dictionary";
290
291 const char BlockHashTest::sample_text_many_matches_without_spaces[] =
292 "ababababcab";
293
294 const char BlockHashTest::search_string_many_matches_without_spaces[] =
295 "ababc";
296
297 const char* BlockHashTest::sample_text = NULL;
298 const char* BlockHashTest::search_string = NULL;
299 const char* BlockHashTest::search_string_altered = NULL;
300 const char* BlockHashTest::search_to_end_string = NULL;
301 const char* BlockHashTest::search_to_beginning_string = NULL;
302 const char* BlockHashTest::sample_text_many_matches = NULL;
303 const char* BlockHashTest::search_string_many_matches = NULL;
304
305 const char* BlockHashTest::test_string_y = NULL;
306 const char* BlockHashTest::test_string_e = NULL;
307 const char* BlockHashTest::test_string_unaligned_e = NULL;
308 const char* BlockHashTest::test_string_all_Qs = NULL;
309
310 uint32_t BlockHashTest::hashed_y = 0;
311 uint32_t BlockHashTest::hashed_e = 0;
312 uint32_t BlockHashTest::hashed_f = 0;
313 uint32_t BlockHashTest::hashed_unaligned_e = 0;
314 uint32_t BlockHashTest::hashed_all_Qs = 0;
315
TestAndPrintTimesForCompareFunctions(bool should_be_identical)316 void BlockHashTest::TestAndPrintTimesForCompareFunctions(
317 bool should_be_identical) {
318 CHECK(compare_buffer_1_ != NULL);
319 CHECK(compare_buffer_2_ != NULL);
320 // Prime the memory cache.
321 prime_result_ =
322 memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
323 const char* const block1_limit =
324 &compare_buffer_1_[kTimingTestSize - kBlockSize];
325 int block_compare_words_result = 0;
326 CycleTimer block_compare_words_timer;
327 block_compare_words_timer.Start();
328 for (int i = 0; i < kTimingTestIterations; ++i) {
329 const char* block1 = compare_buffer_1_;
330 const char* block2 = compare_buffer_2_;
331 while (block1 < block1_limit) {
332 if (!BlockHash::BlockCompareWords(block1, block2)) {
333 ++block_compare_words_result;
334 }
335 block1 += kBlockSize;
336 block2 += kBlockSize;
337 }
338 }
339 block_compare_words_timer.Stop();
340 double time_for_block_compare_words =
341 static_cast<double>(block_compare_words_timer.GetInUsec())
342 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
343 int block_contents_match_result = 0;
344 CycleTimer block_contents_match_timer;
345 block_contents_match_timer.Start();
346 for (int i = 0; i < kTimingTestIterations; ++i) {
347 const char* block1 = compare_buffer_1_;
348 const char* block2 = compare_buffer_2_;
349 while (block1 < block1_limit) {
350 if (!BlockHash::BlockContentsMatch(block1, block2)) {
351 ++block_contents_match_result;
352 }
353 block1 += kBlockSize;
354 block2 += kBlockSize;
355 }
356 }
357 block_contents_match_timer.Stop();
358 double time_for_block_contents_match =
359 static_cast<double>(block_contents_match_timer.GetInUsec())
360 / ((kTimingTestSize / kBlockSize) * kTimingTestIterations);
361 EXPECT_EQ(block_contents_match_result, block_compare_words_result);
362 if (should_be_identical) {
363 CHECK_EQ(0, block_compare_words_result);
364 } else {
365 CHECK_GT(block_compare_words_result, 0);
366 }
367 std::cout << "BlockHash::BlockCompareWords: "
368 << time_for_block_compare_words << " us per operation" << std::endl;
369 std::cout << "BlockHash::BlockContentsMatch: "
370 << time_for_block_contents_match << " us per operation"
371 << std::endl;
372 if (time_for_block_compare_words > 0) {
373 double percent_change =
374 (((time_for_block_contents_match - time_for_block_compare_words)
375 / time_for_block_compare_words) * 100.0);
376 if (percent_change >= 0.0) {
377 std::cout << "BlockContentsMatch is " << percent_change << "%"
378 << " SLOWER than BlockCompareWords" << std::endl;
379 } else {
380 std::cout << "BlockContentsMatch is " << (-percent_change) << "%"
381 << " FASTER than BlockCompareWords" << std::endl;
382 }
383 }
384 #if defined(NDEBUG) && !defined(VCDIFF_USE_BLOCK_COMPARE_WORDS)
385 // Only check timings for optimized build. There's plenty of margin: this
386 // check will fail only if BlockContentsMatch is at least twice as slow as
387 // BlockCompareWords.
388 EXPECT_GT(time_for_block_compare_words * 2.0, time_for_block_contents_match);
389 #endif // NDEBUG && !VCDIFF_USE_BLOCK_COMPARE_WORDS
390 }
391
392 // The two strings passed to BlockHash::MatchingBytesToLeft do have matching
393 // characters -- in fact, they're the same string -- but since max_bytes is zero
394 // or negative, BlockHash::MatchingBytesToLeft should not read from the strings
395 // and should return 0.
TEST_F(BlockHashTest,MaxBytesZeroDoesNothing)396 TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
397 EXPECT_EQ(0, MatchingBytesToLeft(
398 &search_string[index_of_f_in_fearsome],
399 &search_string[index_of_f_in_fearsome],
400 0));
401 EXPECT_EQ(0, MatchingBytesToRight(
402 &search_string[index_of_f_in_fearsome],
403 &search_string[index_of_f_in_fearsome],
404 0));
405 }
406
TEST_F(BlockHashTest,MaxBytesNegativeDoesNothing)407 TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
408 EXPECT_EQ(0, MatchingBytesToLeft(
409 &search_string[index_of_f_in_fearsome],
410 &search_string[index_of_f_in_fearsome],
411 -1));
412 EXPECT_EQ(0, MatchingBytesToLeft(
413 &search_string[index_of_f_in_fearsome],
414 &search_string[index_of_f_in_fearsome],
415 INT_MIN));
416 EXPECT_EQ(0, MatchingBytesToRight(
417 &search_string[index_of_f_in_fearsome],
418 &search_string[index_of_f_in_fearsome],
419 -1));
420 EXPECT_EQ(0, MatchingBytesToRight(
421 &search_string[index_of_f_in_fearsome],
422 &search_string[index_of_f_in_fearsome],
423 INT_MIN));
424 }
425
TEST_F(BlockHashTest,MaxBytesOneMatch)426 TEST_F(BlockHashTest, MaxBytesOneMatch) {
427 EXPECT_EQ(1, MatchingBytesToLeft(
428 &search_string[index_of_f_in_fearsome],
429 &search_string[index_of_f_in_fearsome],
430 1));
431 EXPECT_EQ(1, MatchingBytesToRight(
432 &search_string[index_of_f_in_fearsome],
433 &search_string[index_of_f_in_fearsome],
434 1));
435 }
436
TEST_F(BlockHashTest,MaxBytesOneNoMatch)437 TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
438 EXPECT_EQ(0, MatchingBytesToLeft(
439 &search_string[index_of_f_in_fearsome],
440 &search_string[index_of_second_e_in_what_we_hear],
441 1));
442 EXPECT_EQ(0, MatchingBytesToRight(
443 &search_string[index_of_f_in_fearsome],
444 &search_string[index_of_second_e_in_what_we_hear - 1],
445 1));
446 }
447
TEST_F(BlockHashTest,LeftLimitedByMaxBytes)448 TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
449 // The number of bytes that match between the original "we hear is fearsome"
450 // and the altered "ve hear is fearsome".
451 const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
452 const int max_bytes = expected_length - 1;
453 EXPECT_EQ(max_bytes, MatchingBytesToLeft(
454 &search_string[index_of_f_in_fearsome],
455 &search_string_altered[index_of_f_in_fearsome],
456 max_bytes));
457 }
458
TEST_F(BlockHashTest,LeftNotLimited)459 TEST_F(BlockHashTest, LeftNotLimited) {
460 // The number of bytes that match between the original "we hear is fearsome"
461 // and the altered "ve hear is fearsome".
462 const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
463 const int max_bytes = expected_length + 1;
464 EXPECT_EQ(expected_length, MatchingBytesToLeft(
465 &search_string[index_of_f_in_fearsome],
466 &search_string_altered[index_of_f_in_fearsome],
467 max_bytes));
468 EXPECT_EQ(expected_length, MatchingBytesToLeft(
469 &search_string[index_of_f_in_fearsome],
470 &search_string_altered[index_of_f_in_fearsome],
471 INT_MAX));
472 }
473
TEST_F(BlockHashTest,RightLimitedByMaxBytes)474 TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
475 // The number of bytes that match between the original "fearsome"
476 // and the altered "fearsomm".
477 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
478 + (kBlockSize - 1); // spacing between letters
479 const int max_bytes = expected_length - 1;
480 EXPECT_EQ(max_bytes, MatchingBytesToRight(
481 &search_string[index_of_f_in_fearsome],
482 &search_string_altered[index_of_f_in_fearsome],
483 max_bytes));
484 }
485
TEST_F(BlockHashTest,RightNotLimited)486 TEST_F(BlockHashTest, RightNotLimited) {
487 // The number of bytes that match between the original "we hear is fearsome"
488 // and the altered "ve hear is fearsome".
489 const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
490 + (kBlockSize - 1); // spacing between letters
491 const int max_bytes = expected_length + 1;
492 EXPECT_EQ(expected_length, MatchingBytesToRight(
493 &search_string[index_of_f_in_fearsome],
494 &search_string_altered[index_of_f_in_fearsome],
495 max_bytes));
496 EXPECT_EQ(expected_length, MatchingBytesToRight(
497 &search_string[index_of_f_in_fearsome],
498 &search_string_altered[index_of_f_in_fearsome],
499 INT_MAX));
500 }
501
502 // If this test fails in a non-x86 or non-gcc environment, consider adding
503 // -DVCDIFF_USE_BLOCK_COMPARE_WORDS to AM_CXXFLAGS in Makefile.am and
504 // Makefile.in, and reconstructing the Makefile. That will cause blockhash.cc
505 // to use a special implementation (BlockCompareWords) to compare blocks
506 // rather than using standard memcmp.
TEST_F(BlockHashTest,BlockContentsMatchIsAsFastAsBlockCompareWords)507 TEST_F(BlockHashTest, BlockContentsMatchIsAsFastAsBlockCompareWords) {
508 compare_buffer_1_ = new char[kTimingTestSize];
509 compare_buffer_2_ = new char[kTimingTestSize];
510
511 // The value 0xBE is arbitrarily chosen. First test with identical contents
512 // in the buffers, so that the comparison functions cannot short-circuit
513 // and will return true.
514 memset(compare_buffer_1_, 0xBE, kTimingTestSize);
515 memset(compare_buffer_2_, 0xBE, kTimingTestSize);
516 std::cout << "Comparing "
517 << (kTimingTestSize / kBlockSize) << " identical values:"
518 << std::endl;
519 TestAndPrintTimesForCompareFunctions(true);
520
521 // Now change one value in the middle of one buffer, so that the contents
522 // are no longer the same.
523 compare_buffer_1_[kTimingTestSize / 2] = 0x00;
524 std::cout << "Comparing "
525 << ((kTimingTestSize / kBlockSize) - 1) << " identical values"
526 << " and one mismatch:" << std::endl;
527 TestAndPrintTimesForCompareFunctions(false);
528
529 // Set one of the bytes of each block to differ so that
530 // none of the compare operations will return true, and run timing tests.
531 // In practice, BlockHash::BlockContentsMatch will only be called
532 // for two blocks whose hash values match, and the two most important
533 // cases are: (1) the blocks are identical, or (2) none of their bytes match.
534 TimingTestForBlocksThatDifferAtByte(0);
535 TimingTestForBlocksThatDifferAtByte(1);
536 TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
537 TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);
538
539 delete[] compare_buffer_1_;
540 delete[] compare_buffer_2_;
541 }
542
TEST_F(BlockHashTest,FindFailsBeforeHashing)543 TEST_F(BlockHashTest, FindFailsBeforeHashing) {
544 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
545 }
546
TEST_F(BlockHashTest,HashOneFindOne)547 TEST_F(BlockHashTest, HashOneFindOne) {
548 for (int i = 0; i <= index_of_y_in_only; ++i) {
549 th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
550 }
551 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
552 test_string_y));
553 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
554 }
555
TEST_F(BlockHashTest,HashAllFindOne)556 TEST_F(BlockHashTest, HashAllFindOne) {
557 EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
558 test_string_y));
559 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
560 }
561
TEST_F(BlockHashTest,NonMatchingTextNotFound)562 TEST_F(BlockHashTest, NonMatchingTextNotFound) {
563 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
564 }
565
566 // Search for unaligned text. The test string is contained in the
567 // sample text (unlike the non-matching string in NonMatchingTextNotFound,
568 // above), but it is not aligned on a block boundary. FindMatchingBlock
569 // will only work if the test string is aligned on a block boundary.
570 //
571 // " T h e o n l y"
572 // ^^^^ Here is the test string
573 //
TEST_F(BlockHashTest,UnalignedTextNotFound)574 TEST_F(BlockHashTest, UnalignedTextNotFound) {
575 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
576 test_string_unaligned_e));
577 }
578
TEST_F(BlockHashTest,FindSixMatches)579 TEST_F(BlockHashTest, FindSixMatches) {
580 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
581 test_string_e));
582 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
583 test_string_e));
584 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
585 test_string_e));
586 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
587 test_string_e));
588 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
589 test_string_e));
590 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
591 test_string_e));
592 EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));
593
594 // Starting over gives same result
595 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
596 test_string_e));
597 }
598
TEST_F(BlockHashTest,AddRangeFindThreeMatches)599 TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
600 // Add hash values only for those characters before the fourth instance
601 // of "e" in the sample text. Tests that the ending index
602 // of AddAllBlocksThroughIndex() is not inclusive: only three matches
603 // for "e" should be found.
604 th_->AddAllBlocksThroughIndex(index_of_fourth_e);
605 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
606 test_string_e));
607 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
608 test_string_e));
609 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
610 test_string_e));
611 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
612
613 // Starting over gives same result
614 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
615 test_string_e));
616 }
617
618 // Try indices that are not even multiples of the block size.
619 // Add three ranges and verify the results after each
620 // call to AddAllBlocksThroughIndex().
TEST_F(BlockHashTest,AddRangeWithUnalignedIndices)621 TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
622 th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
623 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
624 test_string_e));
625 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));
626
627 // Starting over gives same result
628 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
629 test_string_e));
630
631 // Add the second range to expand the result set
632 th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
633 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
634 test_string_e));
635 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
636 test_string_e));
637 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
638 test_string_e));
639 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
640
641 // Starting over gives same result
642 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
643 test_string_e));
644
645 // Add the third range to expand the result set
646 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
647
648 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
649 test_string_e));
650 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
651 test_string_e));
652 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
653 test_string_e));
654 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
655 test_string_e));
656 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
657
658 // Starting over gives same result
659 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
660 test_string_e));
661 }
662
663 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest,AddingRangesInDescendingOrderNoEffect)664 TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
665 th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
666
667 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
668 test_string_e));
669 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
670 test_string_e));
671 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
672 test_string_e));
673 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
674 test_string_e));
675 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
676
677 // Starting over gives same result
678 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
679 test_string_e));
680
681 // These calls will produce DFATAL error messages, and will do nothing,
682 // since the ranges have already been added.
683 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
684 "<");
685 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
686 "<");
687
688 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
689 test_string_e));
690 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
691 test_string_e));
692 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
693 test_string_e));
694 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
695 test_string_e));
696 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
697
698 // Starting over gives same result
699 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
700 test_string_e));
701 }
702 #endif // GTEST_HAS_DEATH_TEST
703
TEST_F(BlockHashTest,AddEntireRangeFindSixMatches)704 TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
705 th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
706 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
707 test_string_e));
708 EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
709 test_string_e));
710 EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
711 test_string_e));
712 EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
713 test_string_e));
714 EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
715 test_string_e));
716 EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
717 test_string_e));
718 EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));
719
720 // Starting over gives same result
721 EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
722 test_string_e));
723 }
724
TEST_F(BlockHashTest,ZeroSizeSourceAccepted)725 TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
726 BlockHash zero_sized_hash(sample_text, 0, 0);
727 EXPECT_EQ(true, zero_sized_hash.Init(true));
728 EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
729 }
730
731 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest,BadNextMatchingBlockReturnsNoMatch)732 TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
733 EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")),
734 "invalid");
735 }
736
TEST_F(BlockHashDeathTest,CallingInitTwiceIsIllegal)737 TEST_F(BlockHashDeathTest, CallingInitTwiceIsIllegal) {
738 BlockHash bh(sample_text, strlen(sample_text), 0);
739 EXPECT_TRUE(bh.Init(false));
740 EXPECT_DEBUG_DEATH(EXPECT_FALSE(bh.Init(false)), "twice");
741 }
742
TEST_F(BlockHashDeathTest,CallingAddBlockBeforeInitIsIllegal)743 TEST_F(BlockHashDeathTest, CallingAddBlockBeforeInitIsIllegal) {
744 BlockHash bh(sample_text, strlen(sample_text), 0);
745 EXPECT_DEBUG_DEATH(bh.AddAllBlocksThroughIndex(index_of_first_e),
746 "called before");
747 }
748
TEST_F(BlockHashDeathTest,AddAllBlocksThroughIndexOutOfRange)749 TEST_F(BlockHashDeathTest, AddAllBlocksThroughIndexOutOfRange) {
750 EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(strlen(sample_text) + 1),
751 "higher than end");
752 }
753 #endif // GTEST_HAS_DEATH_TEST
754
TEST_F(BlockHashTest,UnknownFingerprintReturnsNoMatch)755 TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
756 EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
757 }
758
TEST_F(BlockHashTest,FindBestMatch)759 TEST_F(BlockHashTest, FindBestMatch) {
760 dh_->FindBestMatch(hashed_f,
761 &search_string[index_of_f_in_fearsome],
762 search_string,
763 strlen(search_string),
764 &best_match_);
765 EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
766 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
767 // The match includes the spaces after the final character,
768 // which is why (kBlockSize - 1) is added to the expected best size.
769 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
770 best_match_.size());
771 }
772
TEST_F(BlockHashTest,FindBestMatchWithStartingOffset)773 TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
774 BlockHash th2(sample_text, strlen(sample_text), 0x10000);
775 th2.Init(true); // hash all blocks
776 th2.FindBestMatch(hashed_f,
777 &search_string[index_of_f_in_fearsome],
778 search_string,
779 strlen(search_string),
780 &best_match_);
781 // Offset should begin with dictionary_size
782 EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
783 best_match_.source_offset());
784 EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
785 // The match includes the spaces after the final character,
786 // which is why (kBlockSize - 1) is added to the expected best size.
787 EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
788 best_match_.size());
789 }
790
TEST_F(BlockHashTest,BestMatchReachesEndOfDictionary)791 TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
792 // Hash the "i" in "fear itself"
793 uint32_t hash_value = RollingHash<kBlockSize>::Hash(
794 &search_to_end_string[index_of_i_in_itself]);
795 dh_->FindBestMatch(hash_value,
796 &search_to_end_string[index_of_i_in_itself],
797 search_to_end_string,
798 strlen(search_to_end_string),
799 &best_match_);
800 EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
801 EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
802 EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
803 }
804
TEST_F(BlockHashTest,BestMatchReachesStartOfDictionary)805 TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
806 // Hash the "i" in "fear itself"
807 uint32_t hash_value = RollingHash<kBlockSize>::Hash(
808 &search_to_beginning_string[index_of_o_in_online]);
809 dh_->FindBestMatch(hash_value,
810 &search_to_beginning_string[index_of_o_in_online],
811 search_to_beginning_string,
812 strlen(search_to_beginning_string),
813 &best_match_);
814 EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary
815 EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
816 // The match includes the spaces after the final character,
817 // which is why (kBlockSize - 1) is added to the expected best size.
818 EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
819 best_match_.size());
820 }
821
TEST_F(BlockHashTest,BestMatchWithManyMatches)822 TEST_F(BlockHashTest, BestMatchWithManyMatches) {
823 BlockHash many_matches_hash(sample_text_many_matches,
824 strlen(sample_text_many_matches),
825 0);
826 EXPECT_TRUE(many_matches_hash.Init(true));
827 // Hash the " a" at the beginning of the search string "ababc"
828 uint32_t hash_value =
829 RollingHash<kBlockSize>::Hash(search_string_many_matches);
830 many_matches_hash.FindBestMatch(hash_value,
831 search_string_many_matches,
832 search_string_many_matches,
833 strlen(search_string_many_matches),
834 &best_match_);
835 EXPECT_EQ(index_of_ababc, best_match_.source_offset());
836 EXPECT_EQ(0, best_match_.target_offset());
837 EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
838 }
839
TEST_F(BlockHashTest,HashCollisionFindsNoMatch)840 TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
841 char* collision_search_string = new char[strlen(search_string) + 1];
842 memcpy(collision_search_string, search_string, strlen(search_string) + 1);
843 char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];
844
845 // Tweak the collision string so that it has the same hash value
846 // but different text. The last four characters of the search string
847 // should be " f", and the bytes given below have the same hash value
848 // as those characters.
849 CHECK_GE(kBlockSize, 4);
850 fearsome_location[kBlockSize - 4] = 0x84;
851 fearsome_location[kBlockSize - 3] = 0xF1;
852 fearsome_location[kBlockSize - 2] = 0x51;
853 fearsome_location[kBlockSize - 1] = 0x00;
854 EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
855 EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
856 fearsome_location,
857 kBlockSize));
858 // No match should be found this time.
859 dh_->FindBestMatch(hashed_f,
860 fearsome_location,
861 collision_search_string,
862 strlen(search_string), // since collision_search_string has embedded \0
863 &best_match_);
864 EXPECT_EQ(-1, best_match_.source_offset());
865 EXPECT_EQ(-1, best_match_.target_offset());
866 EXPECT_EQ(0U, best_match_.size());
867 delete[] collision_search_string;
868 }
869
870 // If the footprint passed to FindBestMatch does not actually match
871 // the search string, it should not find any matches.
TEST_F(BlockHashTest,WrongFootprintFindsNoMatch)872 TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
873 dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"!
874 &search_string[index_of_f_in_fearsome],
875 search_string,
876 strlen(search_string),
877 &best_match_);
878 EXPECT_EQ(-1, best_match_.source_offset());
879 EXPECT_EQ(-1, best_match_.target_offset());
880 EXPECT_EQ(0U, best_match_.size());
881 }
882
883 // Use a dictionary containing 1M copies of the letter 'Q',
884 // and target data that also contains 1M Qs. If FindBestMatch
885 // is not throttled to find a maximum number of matches, this
886 // will take a very long time -- several seconds at least.
887 // If this test appears to hang, it is because the throttling code
888 // (see BlockHash::kMaxMatchesToCheck for details) is not working.
TEST_F(BlockHashTest,SearchStringFindsTooManyMatches)889 TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
890 const int kTestSize = 1 << 20; // 1M
891 char* huge_dictionary = new char[kTestSize];
892 memset(huge_dictionary, 'Q', kTestSize);
893 BlockHash huge_bh(huge_dictionary, kTestSize, 0);
894 EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
895 char* huge_target = new char[kTestSize];
896 memset(huge_target, 'Q', kTestSize);
897 CycleTimer timer;
898 timer.Start();
899 huge_bh.FindBestMatch(hashed_all_Qs,
900 huge_target + (kTestSize / 2), // middle of target
901 huge_target,
902 kTestSize,
903 &best_match_);
904 timer.Stop();
905 double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
906 std::cout << "Time to search for best match with 1M matches: "
907 << elapsed_time_in_us << " us" << std::endl;
908 // All blocks match the candidate block. FindBestMatch should have checked
909 // a certain number of matches before giving up. The best match
910 // should include at least half the source and target, since the candidate
911 // block was in the middle of the target data.
912 EXPECT_GT((kTestSize / 2), best_match_.source_offset());
913 EXPECT_GT((kTestSize / 2), best_match_.target_offset());
914 EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
915 EXPECT_GT(5000000, elapsed_time_in_us); // < 5 seconds
916 #ifdef NDEBUG
917 EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second
918 #endif // NDEBUG
919 delete[] huge_target;
920 delete[] huge_dictionary;
921 }
922
923 #ifdef GTEST_HAS_DEATH_TEST
TEST_F(BlockHashDeathTest,AddTooManyBlocks)924 TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
925 for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
926 th_->AddOneIndexHash(i * kBlockSize, hashed_e);
927 }
928 // Didn't expect another block to be added
929 EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
930 hashed_e),
931 "AddBlock");
932 }
933 #endif // GTEST_HAS_DEATH_TEST
934
935 } // namespace open_vcdiff
936