• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2008 Google Inc.
2 // Author: Lincoln Smith
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include <config.h>
17 #include "google/vcencoder.h"
18 #include <stdlib.h>  // free, posix_memalign
19 #include <string.h>  // memcpy
20 #include <algorithm>
21 #include <string>
22 #include <vector>
23 #include "blockhash.h"
24 #include "checksum.h"
25 #include "testing.h"
26 #include "varint_bigendian.h"
27 #include "google/vcdecoder.h"
28 #include "vcdiff_defs.h"
29 
30 #ifdef HAVE_EXT_ROPE
31 #include <ext/rope>
32 #include "output_string_crope.h"
33 using __gnu_cxx::crope;
34 #endif  // HAVE_EXT_ROPE
35 
36 #ifdef HAVE_MALLOC_H
37 #include <malloc.h>
38 #endif  // HAVE_MALLOC_H
39 
40 #ifdef HAVE_SYS_MMAN_H
41 #define _XOPEN_SOURCE 600  // posix_memalign
42 #include <sys/mman.h>  // mprotect
43 #endif  // HAVE_SYS_MMAN_H
44 
45 #ifdef HAVE_UNISTD_H
46 #include <unistd.h>  // getpagesize
47 #endif  // HAVE_UNISTD_H
48 
49 namespace open_vcdiff {
50 namespace {
51 
52 static const size_t kFileHeaderSize = sizeof(DeltaFileHeader);
53 
54 // This is to check the maximum possible encoding size
55 // if using a single ADD instruction, so assume that the
56 // dictionary size, the length of the ADD data, the size
57 // of the target window, and the length of the delta window
58 // are all two-byte Varints, that is, 128 <= length < 4096.
59 // This figure includes three extra bytes for a zero-sized
60 // ADD instruction with a two-byte Varint explicit size.
61 // Any additional COPY & ADD instructions must reduce
62 // the length of the encoding from this maximum.
63 static const size_t kWindowHeaderSize = 21;
64 
65 class VerifyEncodedBytesTest : public testing::Test {
66  public:
67   typedef std::string string;
68 
VerifyEncodedBytesTest()69   VerifyEncodedBytesTest() : delta_index_(0) { }
~VerifyEncodedBytesTest()70   virtual ~VerifyEncodedBytesTest() { }
71 
ExpectByte(unsigned char b)72   void ExpectByte(unsigned char b) {
73     EXPECT_EQ(b, static_cast<unsigned char>(delta_[delta_index_]));
74     ++delta_index_;
75   }
76 
ExpectString(const char * s)77   void ExpectString(const char* s) {
78     const size_t size = strlen(s);  // don't include terminating NULL char
79     EXPECT_EQ(string(s, size),
80               string(delta_data() + delta_index_, size));
81     delta_index_ += size;
82   }
83 
ExpectNoMoreBytes()84   void ExpectNoMoreBytes() {
85     EXPECT_EQ(delta_index_, delta_size());
86   }
87 
ExpectSize(size_t size)88   void ExpectSize(size_t size) {
89     const char* delta_size_pos = &delta_[delta_index_];
90     EXPECT_EQ(size,
91               static_cast<size_t>(
92                   VarintBE<int32_t>::Parse(delta_data() + delta_size(),
93                                            &delta_size_pos)));
94     delta_index_ = delta_size_pos - delta_data();
95   }
96 
ExpectChecksum(VCDChecksum checksum)97   void ExpectChecksum(VCDChecksum checksum) {
98     const char* delta_checksum_pos = &delta_[delta_index_];
99     EXPECT_EQ(checksum,
100               static_cast<VCDChecksum>(
101                   VarintBE<int64_t>::Parse(delta_data() + delta_size(),
102                                            &delta_checksum_pos)));
103     delta_index_ = delta_checksum_pos - delta_data();
104   }
105 
delta_as_const() const106   const string& delta_as_const() const { return delta_; }
delta()107   string* delta() { return &delta_; }
108 
delta_data() const109   const char* delta_data() const { return delta_as_const().data(); }
delta_size() const110   size_t delta_size() const { return delta_as_const().size(); }
111 
112  private:
113   string delta_;
114   size_t delta_index_;
115 };
116 
117 class VCDiffEncoderTest : public VerifyEncodedBytesTest {
118  protected:
119   static const char kDictionary[];
120   static const char kTarget[];
121 
122   VCDiffEncoderTest();
~VCDiffEncoderTest()123   virtual ~VCDiffEncoderTest() { }
124 
125   void TestWithFixedChunkSize(size_t chunk_size);
126   void TestWithEncodedChunkVector(size_t chunk_size);
127 
128   HashedDictionary hashed_dictionary_;
129   VCDiffStreamingEncoder encoder_;
130   VCDiffStreamingDecoder decoder_;
131   VCDiffEncoder simple_encoder_;
132   VCDiffDecoder simple_decoder_;
133 
134   string result_target_;
135 };
136 
137 const char VCDiffEncoderTest::kDictionary[] =
138     "\"Just the place for a Snark!\" the Bellman cried,\n"
139     "As he landed his crew with care;\n"
140     "Supporting each man on the top of the tide\n"
141     "By a finger entwined in his hair.\n";
142 
143 const char VCDiffEncoderTest::kTarget[] =
144     "\"Just the place for a Snark! I have said it twice:\n"
145     "That alone should encourage the crew.\n"
146     "Just the place for a Snark! I have said it thrice:\n"
147     "What I tell you three times is true.\"\n";
148 
VCDiffEncoderTest()149 VCDiffEncoderTest::VCDiffEncoderTest()
150     : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
151       encoder_(&hashed_dictionary_,
152                VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
153                /* look_for_target_matches = */ true),
154       simple_encoder_(kDictionary, sizeof(kDictionary)) {
155   EXPECT_TRUE(hashed_dictionary_.Init());
156 }
157 
TEST_F(VCDiffEncoderTest,EncodeBeforeStartEncoding)158 TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) {
159   EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
160 }
161 
TEST_F(VCDiffEncoderTest,FinishBeforeStartEncoding)162 TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) {
163   EXPECT_FALSE(encoder_.FinishEncoding(delta()));
164 }
165 
TEST_F(VCDiffEncoderTest,EncodeDecodeNothing)166 TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) {
167   HashedDictionary nothing_dictionary("", 0);
168   EXPECT_TRUE(nothing_dictionary.Init());
169   VCDiffStreamingEncoder nothing_encoder(&nothing_dictionary,
170                                          VCD_STANDARD_FORMAT,
171                                          false);
172   EXPECT_TRUE(nothing_encoder.StartEncoding(delta()));
173   EXPECT_TRUE(nothing_encoder.FinishEncoding(delta()));
174   decoder_.StartDecoding("", 0);
175   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
176                                    delta_size(),
177                                    &result_target_));
178   EXPECT_TRUE(decoder_.FinishDecoding());
179   EXPECT_TRUE(result_target_.empty());
180 }
181 
182 // A NULL dictionary pointer is legal as long as the dictionary size is 0.
TEST_F(VCDiffEncoderTest,EncodeDecodeNullDictionaryPtr)183 TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) {
184   HashedDictionary null_dictionary(NULL, 0);
185   EXPECT_TRUE(null_dictionary.Init());
186   VCDiffStreamingEncoder null_encoder(&null_dictionary,
187                                       VCD_STANDARD_FORMAT,
188                                       false);
189   EXPECT_TRUE(null_encoder.StartEncoding(delta()));
190   EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta()));
191   EXPECT_TRUE(null_encoder.FinishEncoding(delta()));
192   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
193             delta_size());
194   decoder_.StartDecoding(NULL, 0);
195   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
196                                    delta_size(),
197                                    &result_target_));
198   EXPECT_TRUE(decoder_.FinishDecoding());
199   EXPECT_EQ(kTarget, result_target_);
200 }
201 
TEST_F(VCDiffEncoderTest,EncodeDecodeSimple)202 TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) {
203   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
204   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
205             delta_size());
206   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
207                                      sizeof(kDictionary),
208                                      delta_as_const(),
209                                      &result_target_));
210   EXPECT_EQ(kTarget, result_target_);
211 }
212 
TEST_F(VCDiffEncoderTest,EncodeDecodeInterleaved)213 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) {
214   simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED);
215   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
216   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
217             delta_size());
218   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
219                                      sizeof(kDictionary),
220                                      delta_as_const(),
221                                      &result_target_));
222   EXPECT_EQ(kTarget, result_target_);
223 }
224 
TEST_F(VCDiffEncoderTest,EncodeDecodeInterleavedChecksum)225 TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) {
226   simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM);
227   EXPECT_TRUE(simple_encoder_.Encode(kTarget,
228                                      strlen(kTarget),
229                                      delta()));
230   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
231             delta_size());
232   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
233                                      sizeof(kDictionary),
234                                      delta_as_const(),
235                                      &result_target_));
236   EXPECT_EQ(kTarget, result_target_);
237 }
238 
TEST_F(VCDiffEncoderTest,EncodeDecodeSingleChunk)239 TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) {
240   EXPECT_TRUE(encoder_.StartEncoding(delta()));
241   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
242   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
243   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
244             delta_size());
245   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
246   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
247                                    delta_size(),
248                                    &result_target_));
249   EXPECT_TRUE(decoder_.FinishDecoding());
250   EXPECT_EQ(kTarget, result_target_);
251 }
252 
TEST_F(VCDiffEncoderTest,EncodeDecodeSeparate)253 TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) {
254   string delta_start, delta_encode, delta_finish;
255   EXPECT_TRUE(encoder_.StartEncoding(&delta_start));
256   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode));
257   EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish));
258   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
259             delta_start.size() + delta_encode.size() + delta_finish.size());
260   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
261   EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(),
262                                    delta_start.size(),
263                                    &result_target_));
264   EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(),
265                                    delta_encode.size(),
266                                    &result_target_));
267   EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(),
268                                    delta_finish.size(),
269                                    &result_target_));
270   EXPECT_TRUE(decoder_.FinishDecoding());
271   EXPECT_EQ(kTarget, result_target_);
272 }
273 
274 #ifdef HAVE_EXT_ROPE
275 // Test that the crope class can be used in place of a string for encoding
276 // and decoding.
TEST_F(VCDiffEncoderTest,EncodeDecodeCrope)277 TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) {
278   crope delta_crope, result_crope;
279   EXPECT_TRUE(encoder_.StartEncoding(&delta_crope));
280   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope));
281   EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope));
282   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
283             delta_crope.size());
284   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
285   // crope can't guarantee that its characters are contiguous, so the decoding
286   // has to be done byte-by-byte.
287   for (crope::const_iterator it = delta_crope.begin();
288        it != delta_crope.end(); it++) {
289     const char this_char = *it;
290     EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope));
291   }
292   EXPECT_TRUE(decoder_.FinishDecoding());
293   crope expected_target(kTarget);
294   EXPECT_EQ(expected_target, result_crope);
295 }
296 #endif  // HAVE_EXT_ROPE
297 
TestWithFixedChunkSize(size_t chunk_size)298 void VCDiffEncoderTest::TestWithFixedChunkSize(size_t chunk_size) {
299   delta()->clear();
300   EXPECT_TRUE(encoder_.StartEncoding(delta()));
301   for (size_t chunk_start_index = 0;
302        chunk_start_index < strlen(kTarget);
303        chunk_start_index += chunk_size) {
304     size_t this_chunk_size = chunk_size;
305     const size_t bytes_available = strlen(kTarget) - chunk_start_index;
306     if (this_chunk_size > bytes_available) {
307       this_chunk_size = bytes_available;
308     }
309     EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
310                                      this_chunk_size,
311                                      delta()));
312   }
313   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
314   const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
315   const size_t size_of_windows =
316       strlen(kTarget) + (kWindowHeaderSize * num_windows);
317   EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size());
318   result_target_.clear();
319   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
320   for (size_t chunk_start_index = 0;
321        chunk_start_index < delta_size();
322        chunk_start_index += chunk_size) {
323     size_t this_chunk_size = chunk_size;
324     const size_t bytes_available = delta_size() - chunk_start_index;
325     if (this_chunk_size > bytes_available) {
326       this_chunk_size = bytes_available;
327     }
328     EXPECT_TRUE(decoder_.DecodeChunk(delta_data() + chunk_start_index,
329                                      this_chunk_size,
330                                      &result_target_));
331   }
332   EXPECT_TRUE(decoder_.FinishDecoding());
333   EXPECT_EQ(kTarget, result_target_);
334 }
335 
TEST_F(VCDiffEncoderTest,EncodeDecodeFixedChunkSizes)336 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) {
337   // These specific chunk sizes have failed in the past
338   TestWithFixedChunkSize(6);
339   TestWithFixedChunkSize(45);
340   TestWithFixedChunkSize(60);
341 
342   // Now loop through all possible chunk sizes
343   for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
344     TestWithFixedChunkSize(chunk_size);
345   }
346 }
347 
348 // If --allow_vcd_target=false is specified, the decoder will throw away some of
349 // the internally-stored decoded target beyond the current window.  Try
350 // different numbers of encoded window sizes to make sure that this behavior
351 // does not affect the results.
TEST_F(VCDiffEncoderTest,EncodeDecodeFixedChunkSizesNoVcdTarget)352 TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) {
353   decoder_.SetAllowVcdTarget(false);
354   // Loop through all possible chunk sizes
355   for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
356     TestWithFixedChunkSize(chunk_size);
357   }
358 }
359 
360 // Splits the text to be encoded into fixed-size chunks.  Encodes each
361 // chunk and puts it into a vector of strings.  Then decodes each string
362 // in the vector and appends the result into result_target_.
TestWithEncodedChunkVector(size_t chunk_size)363 void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) {
364   std::vector<string> encoded_chunks;
365   string this_encoded_chunk;
366   size_t total_chunk_size = 0;
367   EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk));
368   encoded_chunks.push_back(this_encoded_chunk);
369   total_chunk_size += this_encoded_chunk.size();
370   for (size_t chunk_start_index = 0;
371        chunk_start_index < strlen(kTarget);
372        chunk_start_index += chunk_size) {
373     size_t this_chunk_size = chunk_size;
374     const size_t bytes_available = strlen(kTarget) - chunk_start_index;
375     if (this_chunk_size > bytes_available) {
376       this_chunk_size = bytes_available;
377     }
378     this_encoded_chunk.clear();
379     EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index],
380                                      this_chunk_size,
381                                      &this_encoded_chunk));
382     encoded_chunks.push_back(this_encoded_chunk);
383     total_chunk_size += this_encoded_chunk.size();
384   }
385   this_encoded_chunk.clear();
386   EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk));
387   encoded_chunks.push_back(this_encoded_chunk);
388   total_chunk_size += this_encoded_chunk.size();
389   const size_t num_windows = (strlen(kTarget) / chunk_size) + 1;
390   const size_t size_of_windows =
391       strlen(kTarget) + (kWindowHeaderSize * num_windows);
392   EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size);
393   result_target_.clear();
394   decoder_.StartDecoding(kDictionary, sizeof(kDictionary));
395   for (std::vector<string>::iterator it = encoded_chunks.begin();
396        it != encoded_chunks.end(); ++it) {
397     EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_));
398   }
399   EXPECT_TRUE(decoder_.FinishDecoding());
400   EXPECT_EQ(kTarget, result_target_);
401 }
402 
TEST_F(VCDiffEncoderTest,EncodeDecodeStreamOfChunks)403 TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) {
404   // Loop through all possible chunk sizes
405   for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) {
406     TestWithEncodedChunkVector(chunk_size);
407   }
408 }
409 
410 // Verify that HashedDictionary stores a copy of the dictionary text,
411 // rather than just storing a pointer to it.  If the dictionary buffer
412 // is overwritten after creating a HashedDictionary from it, it shouldn't
413 // affect an encoder that uses that HashedDictionary.
TEST_F(VCDiffEncoderTest,DictionaryBufferOverwritten)414 TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) {
415   string dictionary_copy(kDictionary, sizeof(kDictionary));
416   HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size());
417   EXPECT_TRUE(hd_copy.Init());
418   VCDiffStreamingEncoder copy_encoder(&hd_copy,
419                                       VCD_FORMAT_INTERLEAVED
420                                           | VCD_FORMAT_CHECKSUM,
421                                       /* look_for_target_matches = */ true);
422   // Produce a reference version of the encoded text.
423   string delta_before;
424   EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before));
425   EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget,
426                                        strlen(kTarget),
427                                        &delta_before));
428   EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before));
429   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
430             delta_before.size());
431 
432   // Overwrite the dictionary text with all 'Q' characters.
433   dictionary_copy.replace(0,
434                           dictionary_copy.size(),
435                           dictionary_copy.size(),
436                           'Q');
437   // When the encoder is used on the same target text after overwriting
438   // the dictionary, it should produce the same encoded output.
439   string delta_after;
440   EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after));
441   EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after));
442   EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after));
443   EXPECT_EQ(delta_before, delta_after);
444 }
445 
446 // Binary data test part 1: The dictionary and target data should not
447 // be treated as NULL-terminated.  An embedded NULL should be handled like
448 // any other byte of data.
TEST_F(VCDiffEncoderTest,DictionaryHasEmbeddedNULLs)449 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) {
450   const char embedded_null_dictionary_text[] =
451       { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 };
452   const char embedded_null_target[] =
453       { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 };
454   CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
455   CHECK_EQ(8, sizeof(embedded_null_target));
456   HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
457       sizeof(embedded_null_dictionary_text));
458   EXPECT_TRUE(embedded_null_dictionary.Init());
459   VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
460       VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
461       /* look_for_target_matches = */ true);
462   EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
463   EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
464                                                 sizeof(embedded_null_target),
465                                                 delta()));
466   EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
467   decoder_.StartDecoding(embedded_null_dictionary_text,
468                          sizeof(embedded_null_dictionary_text));
469   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
470                                    delta_size(),
471                                    &result_target_));
472   EXPECT_TRUE(decoder_.FinishDecoding());
473   EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
474   EXPECT_EQ(string(embedded_null_target,
475                    sizeof(embedded_null_target)),
476             result_target_);
477 }
478 
479 // Binary data test part 2: An embedded CR or LF should be handled like
480 // any other byte of data.  No text-processing of the data should occur.
TEST_F(VCDiffEncoderTest,DictionaryHasEmbeddedNewlines)481 TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) {
482   const char embedded_null_dictionary_text[] =
483       { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A };
484   const char embedded_null_target[] =
485       { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 };
486   CHECK_EQ(10, sizeof(embedded_null_dictionary_text));
487   CHECK_EQ(8, sizeof(embedded_null_target));
488   HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text,
489       sizeof(embedded_null_dictionary_text));
490   EXPECT_TRUE(embedded_null_dictionary.Init());
491   VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary,
492       VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
493       /* look_for_target_matches = */ true);
494   EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta()));
495   EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target,
496                                                 sizeof(embedded_null_target),
497                                                 delta()));
498   EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta()));
499   decoder_.StartDecoding(embedded_null_dictionary_text,
500                          sizeof(embedded_null_dictionary_text));
501   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
502                                    delta_size(),
503                                    &result_target_));
504   EXPECT_TRUE(decoder_.FinishDecoding());
505   EXPECT_EQ(sizeof(embedded_null_target), result_target_.size());
506   EXPECT_EQ(string(embedded_null_target,
507                    sizeof(embedded_null_target)),
508             result_target_);
509 }
510 
TEST_F(VCDiffEncoderTest,UsingWideCharacters)511 TEST_F(VCDiffEncoderTest, UsingWideCharacters) {
512   const wchar_t wchar_dictionary_text[] =
513       L"\"Just the place for a Snark!\" the Bellman cried,\n"
514       L"As he landed his crew with care;\n"
515       L"Supporting each man on the top of the tide\n"
516       L"By a finger entwined in his hair.\n";
517 
518   const wchar_t wchar_target[] =
519       L"\"Just the place for a Snark! I have said it twice:\n"
520       L"That alone should encourage the crew.\n"
521       L"Just the place for a Snark! I have said it thrice:\n"
522       L"What I tell you three times is true.\"\n";
523 
524   HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text,
525                                     sizeof(wchar_dictionary_text));
526   EXPECT_TRUE(wchar_dictionary.Init());
527   VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary,
528                                        VCD_FORMAT_INTERLEAVED
529                                            | VCD_FORMAT_CHECKSUM,
530                                        /* look_for_target_matches = */ false);
531   EXPECT_TRUE(wchar_encoder.StartEncoding(delta()));
532   EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target,
533                                         sizeof(wchar_target),
534                                         delta()));
535   EXPECT_TRUE(wchar_encoder.FinishEncoding(delta()));
536   decoder_.StartDecoding((const char*) wchar_dictionary_text,
537                          sizeof(wchar_dictionary_text));
538   EXPECT_TRUE(decoder_.DecodeChunk(delta_data(),
539                                    delta_size(),
540                                    &result_target_));
541   EXPECT_TRUE(decoder_.FinishDecoding());
542   const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data();
543   EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar));
544   EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar));
545 }
546 
547 #if defined(HAVE_MPROTECT) && \
548    (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN))
549 // Bug 1220602: Make sure the encoder doesn't read past the end of the input
550 // buffer.
TEST_F(VCDiffEncoderTest,ShouldNotReadPastEndOfBuffer)551 TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) {
552   const size_t target_size = strlen(kTarget);
553 
554   // Allocate two memory pages.
555   const int page_size = getpagesize();
556   void* two_pages = NULL;
557 #ifdef HAVE_POSIX_MEMALIGN
558   posix_memalign(&two_pages, page_size, 2 * page_size);
559 #else  // !HAVE_POSIX_MEMALIGN
560   two_pages = memalign(page_size, 2 * page_size);
561 #endif  // HAVE_POSIX_MEMALIGN
562   char* const first_page = reinterpret_cast<char*>(two_pages);
563   char* const second_page = first_page + page_size;
564 
565   // Place the target string at the end of the first page.
566   char* const target_with_guard = second_page - target_size;
567   memcpy(target_with_guard, kTarget, target_size);
568 
569   // Make the second page unreadable.
570   mprotect(second_page, page_size, PROT_NONE);
571 
572   // Now perform the encode operation, which will cause a segmentation fault
573   // if it reads past the end of the buffer.
574   EXPECT_TRUE(encoder_.StartEncoding(delta()));
575   EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
576   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
577 
578   // Undo the mprotect.
579   mprotect(second_page, page_size, PROT_READ|PROT_WRITE);
580   free(two_pages);
581 }
582 
TEST_F(VCDiffEncoderTest,ShouldNotReadPastBeginningOfBuffer)583 TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) {
584   const size_t target_size = strlen(kTarget);
585 
586   // Allocate two memory pages.
587   const int page_size = getpagesize();
588   void* two_pages = NULL;
589 #ifdef HAVE_POSIX_MEMALIGN
590   posix_memalign(&two_pages, page_size, 2 * page_size);
591 #else  // !HAVE_POSIX_MEMALIGN
592   two_pages = memalign(page_size, 2 * page_size);
593 #endif  // HAVE_POSIX_MEMALIGN
594   char* const first_page = reinterpret_cast<char*>(two_pages);
595   char* const second_page = first_page + page_size;
596 
597   // Make the first page unreadable.
598   mprotect(first_page, page_size, PROT_NONE);
599 
600   // Place the target string at the beginning of the second page.
601   char* const target_with_guard = second_page;
602   memcpy(target_with_guard, kTarget, target_size);
603 
604   // Now perform the encode operation, which will cause a segmentation fault
605   // if it reads past the beginning of the buffer.
606   EXPECT_TRUE(encoder_.StartEncoding(delta()));
607   EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta()));
608   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
609 
610   // Undo the mprotect.
611   mprotect(first_page, page_size, PROT_READ|PROT_WRITE);
612   free(two_pages);
613 }
614 #endif  // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN)
615 
616 class VCDiffMatchCountTest : public VerifyEncodedBytesTest {
617  protected:
~VCDiffMatchCountTest()618   virtual ~VCDiffMatchCountTest() { }
619 
ExpectMatch(size_t match_size)620   void ExpectMatch(size_t match_size) {
621     if (match_size >= expected_match_counts_.size()) {
622       // Be generous to avoid resizing again
623       expected_match_counts_.resize(match_size * 2, 0);
624     }
625     ++expected_match_counts_[match_size];
626   }
627 
VerifyMatchCounts()628   void VerifyMatchCounts() {
629     EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
630                            expected_match_counts_.end(),
631                            actual_match_counts_.begin()));
632   }
633 
634   std::vector<int> expected_match_counts_;
635   std::vector<int> actual_match_counts_;
636 };
637 
638 class VCDiffHTML1Test : public VCDiffMatchCountTest {
639  protected:
640   static const char kDictionary[];
641   static const char kTarget[];
642   static const char kRedundantTarget[];
643 
644   VCDiffHTML1Test();
~VCDiffHTML1Test()645   virtual ~VCDiffHTML1Test() { }
646 
647   void SimpleEncode();
648   void StreamingEncode();
649 
650   HashedDictionary hashed_dictionary_;
651   VCDiffStreamingEncoder encoder_;
652   VCDiffStreamingDecoder decoder_;
653   VCDiffEncoder simple_encoder_;
654   VCDiffDecoder simple_decoder_;
655 
656   string result_target_;
657 };
658 
659 const char VCDiffHTML1Test::kDictionary[] =
660     "<html><font color=red>This part from the dict</font><br>";
661 
662 const char VCDiffHTML1Test::kTarget[] =
663     "<html><font color=red>This part from the dict</font><br>\n"
664     "And this part is not...</html>";
665 
666 const char VCDiffHTML1Test::kRedundantTarget[] =
667     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
668     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
669     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
670     "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";  // 256
671 
VCDiffHTML1Test()672 VCDiffHTML1Test::VCDiffHTML1Test()
673     : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
674       encoder_(&hashed_dictionary_,
675                VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
676                /* look_for_target_matches = */ true),
677       simple_encoder_(kDictionary, sizeof(kDictionary)) {
678   EXPECT_TRUE(hashed_dictionary_.Init());
679 }
680 
SimpleEncode()681 void VCDiffHTML1Test::SimpleEncode() {
682   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
683   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
684             delta_size());
685   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
686                                      sizeof(kDictionary),
687                                      delta_as_const(),
688                                      &result_target_));
689   EXPECT_EQ(kTarget, result_target_);
690 }
691 
StreamingEncode()692 void VCDiffHTML1Test::StreamingEncode() {
693   EXPECT_TRUE(encoder_.StartEncoding(delta()));
694   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
695   EXPECT_TRUE(encoder_.FinishEncoding(delta()));
696 }
697 
TEST_F(VCDiffHTML1Test,CheckOutputOfSimpleEncoder)698 TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) {
699   SimpleEncode();
700   // These values do not depend on the block size used for encoding
701   ExpectByte(0xD6);  // 'V' | 0x80
702   ExpectByte(0xC3);  // 'C' | 0x80
703   ExpectByte(0xC4);  // 'D' | 0x80
704   ExpectByte(0x00);  // Simple encoder never uses interleaved format
705   ExpectByte(0x00);  // Hdr_Indicator
706   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
707   ExpectByte(sizeof(kDictionary));  // Dictionary length
708   ExpectByte(0x00);  // Source segment position: start of dictionary
709   if (BlockHash::kBlockSize < 16) {
710     // A medium block size will catch the "his part " match.
711     ExpectByte(0x22);  // Length of the delta encoding
712     ExpectSize(strlen(kTarget));  // Size of the target window
713     ExpectByte(0x00);  // Delta_indicator (no compression)
714     ExpectByte(0x16);  // Length of the data section
715     ExpectByte(0x05);  // Length of the instructions section
716     ExpectByte(0x02);  // Length of the address section
717     // Data section
718     ExpectString("\nAnd t");      // Data for 1st ADD
719     ExpectString("is not...</html>");  // Data for 2nd ADD
720     // Instructions section
721     ExpectByte(0x73);  // COPY size 0 mode VCD_SAME(0)
722     ExpectByte(0x38);  // COPY size (56)
723     ExpectByte(0x07);  // ADD size 6
724     ExpectByte(0x19);  // COPY size 9 mode VCD_SELF
725     ExpectByte(0x11);  // ADD size 16
726     // Address section
727     ExpectByte(0x00);  // COPY address (0) mode VCD_SAME(0)
728     ExpectByte(0x17);  // COPY address (23) mode VCD_SELF
729   } else if (BlockHash::kBlockSize <= 56) {
730     // Any block size up to 56 will catch the matching prefix string.
731     ExpectByte(0x29);  // Length of the delta encoding
732     ExpectSize(strlen(kTarget));  // Size of the target window
733     ExpectByte(0x00);  // Delta_indicator (no compression)
734     ExpectByte(0x1F);  // Length of the data section
735     ExpectByte(0x04);  // Length of the instructions section
736     ExpectByte(0x01);  // Length of the address section
737     ExpectString("\nAnd this part is not...</html>");  // Data for ADD
738     // Instructions section
739     ExpectByte(0x73);  // COPY size 0 mode VCD_SAME(0)
740     ExpectByte(0x38);  // COPY size (56)
741     ExpectByte(0x01);  // ADD size 0
742     ExpectByte(0x1F);  // Size of ADD (31)
743     // Address section
744     ExpectByte(0x00);  // COPY address (0) mode VCD_SAME(0)
745   } else {
746     // The matching string is 56 characters long, and the block size is
747     // 64 or greater, so no match should be found.
748     ExpectSize(strlen(kTarget) + 7);  // Delta encoding len
749     ExpectSize(strlen(kTarget));  // Size of the target window
750     ExpectByte(0x00);  // Delta_indicator (no compression)
751     ExpectSize(strlen(kTarget));  // Length of the data section
752     ExpectByte(0x02);  // Length of the instructions section
753     ExpectByte(0x00);  // Length of the address section
754     // Data section
755     ExpectString(kTarget);
756     ExpectByte(0x01);  // ADD size 0
757     ExpectSize(strlen(kTarget));
758   }
759   ExpectNoMoreBytes();
760 }
761 
TEST_F(VCDiffHTML1Test,MatchCounts)762 TEST_F(VCDiffHTML1Test, MatchCounts) {
763   StreamingEncode();
764   encoder_.GetMatchCounts(&actual_match_counts_);
765   if (BlockHash::kBlockSize < 16) {
766     // A medium block size will catch the "his part " match.
767     ExpectMatch(56);
768     ExpectMatch(9);
769   } else if (BlockHash::kBlockSize <= 56) {
770     // Any block size up to 56 will catch the matching prefix string.
771     ExpectMatch(56);
772   }
773   VerifyMatchCounts();
774 }
775 
TEST_F(VCDiffHTML1Test,SimpleEncoderPerformsTargetMatching)776 TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) {
777   EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
778                                      strlen(kRedundantTarget),
779                                      delta()));
780   EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
781             delta_size());
782   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
783                                      sizeof(kDictionary),
784                                      delta_as_const(),
785                                      &result_target_));
786   EXPECT_EQ(kRedundantTarget, result_target_);
787   // These values do not depend on the block size used for encoding
788   ExpectByte(0xD6);  // 'V' | 0x80
789   ExpectByte(0xC3);  // 'C' | 0x80
790   ExpectByte(0xC4);  // 'D' | 0x80
791   ExpectByte(0x00);  // Simple encoder never uses interleaved format
792   ExpectByte(0x00);  // Hdr_Indicator
793   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
794   ExpectByte(sizeof(kDictionary));  // Dictionary length
795   ExpectByte(0x00);  // Source segment position: start of dictionary
796   ExpectByte(0x0C);  // Length of the delta encoding
797   ExpectSize(strlen(kRedundantTarget));  // Size of the target window
798   ExpectByte(0x00);  // Delta_indicator (no compression)
799   ExpectByte(0x01);  // Length of the data section
800   ExpectByte(0x04);  // Length of the instructions section
801   ExpectByte(0x01);  // Length of the address section
802   // Data section
803   ExpectString("A");      // Data for ADD
804   // Instructions section
805   ExpectByte(0x02);  // ADD size 1
806   ExpectByte(0x23);  // COPY size 0 mode VCD_HERE
807   ExpectSize(strlen(kRedundantTarget) - 1);  // COPY size 255
808   // Address section
809   ExpectByte(0x01);  // COPY address (1) mode VCD_HERE
810   ExpectNoMoreBytes();
811 }
812 
TEST_F(VCDiffHTML1Test,SimpleEncoderWithoutTargetMatching)813 TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) {
814   simple_encoder_.SetTargetMatching(false);
815   EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget,
816                                      strlen(kRedundantTarget),
817                                      delta()));
818   EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize,
819             delta_size());
820   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
821                                      sizeof(kDictionary),
822                                      delta_as_const(),
823                                      &result_target_));
824   EXPECT_EQ(kRedundantTarget, result_target_);
825   // These values do not depend on the block size used for encoding
826   ExpectByte(0xD6);  // 'V' | 0x80
827   ExpectByte(0xC3);  // 'C' | 0x80
828   ExpectByte(0xC4);  // 'D' | 0x80
829   ExpectByte(0x00);  // Simple encoder never uses interleaved format
830   ExpectByte(0x00);  // Hdr_Indicator
831   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
832   ExpectByte(sizeof(kDictionary));  // Dictionary length
833   ExpectByte(0x00);  // Source segment position: start of dictionary
834   ExpectSize(strlen(kRedundantTarget) + 0x0A);  // Length of the delta encoding
835   ExpectSize(strlen(kRedundantTarget));  // Size of the target window
836   ExpectByte(0x00);  // Delta_indicator (no compression)
837   ExpectSize(strlen(kRedundantTarget));  // Length of the data section
838   ExpectByte(0x03);  // Length of the instructions section
839   ExpectByte(0x00);  // Length of the address section
840   // Data section
841   ExpectString(kRedundantTarget);      // Data for ADD
842   // Instructions section
843   ExpectByte(0x01);  // ADD size 0
844   ExpectSize(strlen(kRedundantTarget));  // ADD size
845   // Address section empty
846   ExpectNoMoreBytes();
847 }
848 
849 #ifdef GTEST_HAS_DEATH_TEST
850 typedef VCDiffHTML1Test VCDiffHTML1DeathTest;
851 
TEST_F(VCDiffHTML1DeathTest,NullMatchCounts)852 TEST_F(VCDiffHTML1DeathTest, NullMatchCounts) {
853   EXPECT_DEBUG_DEATH(encoder_.GetMatchCounts(NULL), "GetMatchCounts");
854 }
855 #endif  // GTEST_HAS_DEATH_TEST
856 
857 class VCDiffHTML2Test : public VCDiffMatchCountTest {
858  protected:
859   static const char kDictionary[];
860   static const char kTarget[];
861 
862   VCDiffHTML2Test();
~VCDiffHTML2Test()863   virtual ~VCDiffHTML2Test() { }
864 
865   void SimpleEncode();
866   void StreamingEncode();
867 
868   HashedDictionary hashed_dictionary_;
869   VCDiffStreamingEncoder encoder_;
870   VCDiffStreamingDecoder decoder_;
871   VCDiffEncoder simple_encoder_;
872   VCDiffDecoder simple_decoder_;
873 
874   string result_target_;
875 };
876 
877 const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test";
878 
879 const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n";
880 
VCDiffHTML2Test()881 VCDiffHTML2Test::VCDiffHTML2Test()
882     : hashed_dictionary_(kDictionary, sizeof(kDictionary)),
883       encoder_(&hashed_dictionary_,
884                VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM,
885                /* look_for_target_matches = */ true),
886       simple_encoder_(kDictionary, sizeof(kDictionary)) {
887   EXPECT_TRUE(hashed_dictionary_.Init());
888 }
889 
SimpleEncode()890 void VCDiffHTML2Test::SimpleEncode() {
891   EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta()));
892   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
893             delta_size());
894   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
895                                      sizeof(kDictionary),
896                                      delta_as_const(),
897                                      &result_target_));
898   EXPECT_EQ(kTarget, result_target_);
899 }
900 
StreamingEncode()901 void VCDiffHTML2Test::StreamingEncode() {
902   EXPECT_TRUE(encoder_.StartEncoding(delta()));
903   EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta()));
904   EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize,
905             delta_size());
906   EXPECT_TRUE(simple_decoder_.Decode(kDictionary,
907                                      sizeof(kDictionary),
908                                      delta_as_const(),
909                                      &result_target_));
910   EXPECT_EQ(kTarget, result_target_);
911 }
912 
TEST_F(VCDiffHTML2Test,VerifyOutputOfSimpleEncoder)913 TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) {
914   SimpleEncode();
915   // These values do not depend on the block size used for encoding
916   ExpectByte(0xD6);  // 'V' | 0x80
917   ExpectByte(0xC3);  // 'C' | 0x80
918   ExpectByte(0xC4);  // 'D' | 0x80
919   ExpectByte(0x00);  // Simple encoder never uses interleaved format
920   ExpectByte(0x00);  // Hdr_Indicator
921   ExpectByte(VCD_SOURCE);  // Win_Indicator: VCD_SOURCE (dictionary)
922   ExpectByte(sizeof(kDictionary));  // Dictionary length
923   ExpectByte(0x00);  // Source segment position: start of dictionary
924   if (BlockHash::kBlockSize <= 8) {
925     ExpectByte(12);  // Length of the delta encoding
926     ExpectSize(strlen(kTarget));  // Size of the target window
927     ExpectByte(0x00);  // Delta_indicator (no compression)
928     ExpectByte(0x04);  // Length of the data section
929     ExpectByte(0x02);  // Length of the instructions section
930     ExpectByte(0x01);  // Length of the address section
931     ExpectByte('!');
932     ExpectByte('!');
933     ExpectByte('!');
934     ExpectByte('\n');
935     ExpectByte(0x1E);  // COPY size 14 mode VCD_SELF
936     ExpectByte(0x05);  // ADD size 4
937     ExpectByte(0x03);  // COPY address (3) mode VCD_SELF
938   } else {
939     // Larger block sizes will not catch any matches.
940     ExpectSize(strlen(kTarget) + 7);  // Delta encoding len
941     ExpectSize(strlen(kTarget));  // Size of the target window
942     ExpectByte(0x00);  // Delta_indicator (no compression)
943     ExpectSize(strlen(kTarget));  // Length of the data section
944     ExpectByte(0x02);  // Length of the instructions section
945     ExpectByte(0x00);  // Length of the address section
946     // Data section
947     ExpectString(kTarget);
948     ExpectByte(0x01);  // ADD size 0
949     ExpectSize(strlen(kTarget));
950   }
951   ExpectNoMoreBytes();
952 }
953 
TEST_F(VCDiffHTML2Test,VerifyOutputWithChecksum)954 TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) {
955   StreamingEncode();
956   const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget));
957   CHECK_EQ(5, VarintBE<int64_t>::Length(html2_checksum));
958   // These values do not depend on the block size used for encoding
959   ExpectByte(0xD6);  // 'V' | 0x80
960   ExpectByte(0xC3);  // 'C' | 0x80
961   ExpectByte(0xC4);  // 'D' | 0x80
962   ExpectByte('S');  // Format extensions
963   ExpectByte(0x00);  // Hdr_Indicator
964   ExpectByte(VCD_SOURCE | VCD_CHECKSUM);  // Win_Indicator
965   ExpectByte(sizeof(kDictionary));  // Dictionary length
966   ExpectByte(0x00);  // Source segment position: start of dictionary
967   if (BlockHash::kBlockSize <= 8) {
968     ExpectByte(17);  // Length of the delta encoding
969     ExpectSize(strlen(kTarget));  // Size of the target window
970     ExpectByte(0x00);  // Delta_indicator (no compression)
971     ExpectByte(0x00);  // Length of the data section
972     ExpectByte(0x07);  // Length of the instructions section
973     ExpectByte(0x00);  // Length of the address section
974     ExpectChecksum(html2_checksum);
975     ExpectByte(0x1E);  // COPY size 14 mode VCD_SELF
976     ExpectByte(0x03);  // COPY address (3) mode VCD_SELF
977     ExpectByte(0x05);  // ADD size 4
978     ExpectByte('!');
979     ExpectByte('!');
980     ExpectByte('!');
981     ExpectByte('\n');
982   } else {
983     // Larger block sizes will not catch any matches.
984     ExpectSize(strlen(kTarget) + 12);  // Delta encoding len
985     ExpectSize(strlen(kTarget));  // Size of the target window
986     ExpectByte(0x00);  // Delta_indicator (no compression)
987     ExpectByte(0x00);  // Length of the data section
988     ExpectSize(0x02 + strlen(kTarget));  // Interleaved
989     ExpectByte(0x00);  // Length of the address section
990     ExpectChecksum(html2_checksum);
991     // Data section
992     ExpectByte(0x01);  // ADD size 0
993     ExpectSize(strlen(kTarget));
994     ExpectString(kTarget);
995   }
996   ExpectNoMoreBytes();
997 }
998 
TEST_F(VCDiffHTML2Test,MatchCounts)999 TEST_F(VCDiffHTML2Test, MatchCounts) {
1000   StreamingEncode();
1001   encoder_.GetMatchCounts(&actual_match_counts_);
1002   if (BlockHash::kBlockSize <= 8) {
1003     ExpectMatch(14);
1004   }
1005   VerifyMatchCounts();
1006 }
1007 
1008 }  // anonymous namespace
1009 }  // namespace open_vcdiff
1010