// Copyright 2008 Google Inc. // Author: Lincoln Smith // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include "google/vcencoder.h" #include // free, posix_memalign #include // memcpy #include #include #include #include "blockhash.h" #include "checksum.h" #include "testing.h" #include "varint_bigendian.h" #include "google/vcdecoder.h" #include "vcdiff_defs.h" #ifdef HAVE_EXT_ROPE #include #include "output_string_crope.h" using __gnu_cxx::crope; #endif // HAVE_EXT_ROPE #ifdef HAVE_MALLOC_H #include #endif // HAVE_MALLOC_H #ifdef HAVE_SYS_MMAN_H #if !defined(_XOPEN_SOURCE) || _XOPEN_SOURCE < 600 #undef _XOPEN_SOURCE #define _XOPEN_SOURCE 600 // posix_memalign #endif #include // mprotect #endif // HAVE_SYS_MMAN_H #ifdef HAVE_UNISTD_H #include // getpagesize #endif // HAVE_UNISTD_H namespace open_vcdiff { namespace { static const size_t kFileHeaderSize = sizeof(DeltaFileHeader); // This is to check the maximum possible encoding size // if using a single ADD instruction, so assume that the // dictionary size, the length of the ADD data, the size // of the target window, and the length of the delta window // are all two-byte Varints, that is, 128 <= length < 4096. // This figure includes three extra bytes for a zero-sized // ADD instruction with a two-byte Varint explicit size. // Any additional COPY & ADD instructions must reduce // the length of the encoding from this maximum. static const size_t kWindowHeaderSize = 21; class VerifyEncodedBytesTest : public testing::Test { public: typedef std::string string; VerifyEncodedBytesTest() : delta_index_(0) { } virtual ~VerifyEncodedBytesTest() { } void ExpectByte(unsigned char b) { EXPECT_EQ(b, static_cast(delta_[delta_index_])); ++delta_index_; } void ExpectString(const char* s) { const size_t size = strlen(s); // don't include terminating NULL char EXPECT_EQ(s, string(delta_data() + delta_index_, size)); delta_index_ += size; } void ExpectNoMoreBytes() { EXPECT_EQ(delta_index_, delta_size()); } void ExpectSize(size_t size) { const char* delta_size_pos = &delta_[delta_index_]; EXPECT_EQ(size, static_cast( VarintBE::Parse(delta_data() + delta_size(), &delta_size_pos))); delta_index_ = delta_size_pos - delta_data(); } void ExpectChecksum(VCDChecksum checksum) { const char* delta_checksum_pos = &delta_[delta_index_]; EXPECT_EQ(checksum, static_cast( VarintBE::Parse(delta_data() + delta_size(), &delta_checksum_pos))); delta_index_ = delta_checksum_pos - delta_data(); } const string& delta_as_const() const { return delta_; } string* delta() { return &delta_; } const char* delta_data() const { return delta_as_const().data(); } size_t delta_size() const { return delta_as_const().size(); } private: string delta_; size_t delta_index_; }; class VCDiffEncoderTest : public VerifyEncodedBytesTest { protected: static const char kDictionary[]; static const char kTarget[]; static const char kJSONDiff[]; VCDiffEncoderTest(); virtual ~VCDiffEncoderTest() { } void TestWithFixedChunkSize(VCDiffStreamingEncoder *encoder, VCDiffStreamingDecoder *decoder, size_t chunk_size); void TestWithEncodedChunkVector(size_t chunk_size); HashedDictionary hashed_dictionary_; VCDiffStreamingEncoder encoder_; VCDiffStreamingDecoder decoder_; VCDiffEncoder simple_encoder_; VCDiffDecoder simple_decoder_; VCDiffStreamingEncoder json_encoder_; string result_target_; }; const char VCDiffEncoderTest::kDictionary[] = "\"Just the place for a Snark!\" the Bellman cried,\n" "As he landed his crew with care;\n" "Supporting each man on the top of the tide\n" "By a finger entwined in his hair.\n"; const char VCDiffEncoderTest::kTarget[] = "\"Just the place for a Snark! I have said it twice:\n" "That alone should encourage the crew.\n" "Just the place for a Snark! I have said it thrice:\n" "What I tell you three times is true.\"\n"; const char VCDiffEncoderTest::kJSONDiff[] = "[\"\\\"Just the place for a Snark! I have said it twice:\\n" "That alone should encourage the crew.\\n\"," "161,44," "\"hrice:\\nWhat I tell you three times is true.\\\"\\n\",]"; VCDiffEncoderTest::VCDiffEncoderTest() : hashed_dictionary_(kDictionary, sizeof(kDictionary)), encoder_(&hashed_dictionary_, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ true), simple_encoder_(kDictionary, sizeof(kDictionary)), json_encoder_(&hashed_dictionary_, VCD_FORMAT_JSON, /* look_for_target_matches = */ true) { EXPECT_TRUE(hashed_dictionary_.Init()); } TEST_F(VCDiffEncoderTest, EncodeBeforeStartEncoding) { EXPECT_FALSE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); } TEST_F(VCDiffEncoderTest, FinishBeforeStartEncoding) { EXPECT_FALSE(encoder_.FinishEncoding(delta())); } TEST_F(VCDiffEncoderTest, EncodeDecodeNothing) { HashedDictionary nothing_dictionary("", 0); EXPECT_TRUE(nothing_dictionary.Init()); VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, VCD_STANDARD_FORMAT, false); EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); decoder_.StartDecoding("", 0); EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), delta_size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_TRUE(result_target_.empty()); } TEST_F(VCDiffEncoderTest, EncodeNothingJSON) { HashedDictionary nothing_dictionary("", 0); EXPECT_TRUE(nothing_dictionary.Init()); VCDiffStreamingEncoder nothing_encoder(¬hing_dictionary, VCD_FORMAT_JSON, false); EXPECT_TRUE(nothing_encoder.StartEncoding(delta())); EXPECT_TRUE(nothing_encoder.FinishEncoding(delta())); EXPECT_EQ("", delta_as_const()); } // A NULL dictionary pointer is legal as long as the dictionary size is 0. TEST_F(VCDiffEncoderTest, EncodeDecodeNullDictionaryPtr) { HashedDictionary null_dictionary(NULL, 0); EXPECT_TRUE(null_dictionary.Init()); VCDiffStreamingEncoder null_encoder(&null_dictionary, VCD_STANDARD_FORMAT, false); EXPECT_TRUE(null_encoder.StartEncoding(delta())); EXPECT_TRUE(null_encoder.EncodeChunk(kTarget, strlen(kTarget), delta())); EXPECT_TRUE(null_encoder.FinishEncoding(delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); decoder_.StartDecoding(NULL, 0); EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), delta_size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeDecodeSimple) { EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeDecodeInterleaved) { simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED); EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeDecodeInterleavedChecksum) { simple_encoder_.SetFormatFlags(VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM); EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeDecodeSingleChunk) { EXPECT_TRUE(encoder_.StartEncoding(delta())); EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); EXPECT_TRUE(encoder_.FinishEncoding(delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), delta_size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeSimpleJSON) { EXPECT_TRUE(json_encoder_.StartEncoding(delta())); EXPECT_TRUE(json_encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); EXPECT_TRUE(json_encoder_.FinishEncoding(delta())); EXPECT_EQ(kJSONDiff, delta_as_const()); } TEST_F(VCDiffEncoderTest, EncodeDecodeSeparate) { string delta_start, delta_encode, delta_finish; EXPECT_TRUE(encoder_.StartEncoding(&delta_start)); EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_encode)); EXPECT_TRUE(encoder_.FinishEncoding(&delta_finish)); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_start.size() + delta_encode.size() + delta_finish.size()); decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); EXPECT_TRUE(decoder_.DecodeChunk(delta_start.data(), delta_start.size(), &result_target_)); EXPECT_TRUE(decoder_.DecodeChunk(delta_encode.data(), delta_encode.size(), &result_target_)); EXPECT_TRUE(decoder_.DecodeChunk(delta_finish.data(), delta_finish.size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_EQ(kTarget, result_target_); } #ifdef HAVE_EXT_ROPE // Test that the crope class can be used in place of a string for encoding // and decoding. TEST_F(VCDiffEncoderTest, EncodeDecodeCrope) { crope delta_crope, result_crope; EXPECT_TRUE(encoder_.StartEncoding(&delta_crope)); EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), &delta_crope)); EXPECT_TRUE(encoder_.FinishEncoding(&delta_crope)); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_crope.size()); decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); // crope can't guarantee that its characters are contiguous, so the decoding // has to be done byte-by-byte. for (crope::const_iterator it = delta_crope.begin(); it != delta_crope.end(); it++) { const char this_char = *it; EXPECT_TRUE(decoder_.DecodeChunk(&this_char, 1, &result_crope)); } EXPECT_TRUE(decoder_.FinishDecoding()); crope expected_target(kTarget); EXPECT_EQ(expected_target, result_crope); } #endif // HAVE_EXT_ROPE // Test the encoding and decoding with a fixed chunk size. // If decoder is null, only test the encoding. void VCDiffEncoderTest::TestWithFixedChunkSize(VCDiffStreamingEncoder *encoder, VCDiffStreamingDecoder *decoder, size_t chunk_size) { delta()->clear(); EXPECT_TRUE(encoder->StartEncoding(delta())); for (size_t chunk_start_index = 0; chunk_start_index < strlen(kTarget); chunk_start_index += chunk_size) { size_t this_chunk_size = chunk_size; const size_t bytes_available = strlen(kTarget) - chunk_start_index; if (this_chunk_size > bytes_available) { this_chunk_size = bytes_available; } EXPECT_TRUE(encoder->EncodeChunk(&kTarget[chunk_start_index], this_chunk_size, delta())); } EXPECT_TRUE(encoder->FinishEncoding(delta())); const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; const size_t size_of_windows = strlen(kTarget) + (kWindowHeaderSize * num_windows); EXPECT_GE(kFileHeaderSize + size_of_windows, delta_size()); result_target_.clear(); if (!decoder) return; decoder->StartDecoding(kDictionary, sizeof(kDictionary)); for (size_t chunk_start_index = 0; chunk_start_index < delta_size(); chunk_start_index += chunk_size) { size_t this_chunk_size = chunk_size; const size_t bytes_available = delta_size() - chunk_start_index; if (this_chunk_size > bytes_available) { this_chunk_size = bytes_available; } EXPECT_TRUE(decoder->DecodeChunk(delta_data() + chunk_start_index, this_chunk_size, &result_target_)); } EXPECT_TRUE(decoder->FinishDecoding()); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizes) { // These specific chunk sizes have failed in the past TestWithFixedChunkSize(&encoder_, &decoder_, 6); TestWithFixedChunkSize(&encoder_, &decoder_, 45); TestWithFixedChunkSize(&encoder_, &decoder_, 60); // Now loop through all possible chunk sizes for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { TestWithFixedChunkSize(&encoder_, &decoder_, chunk_size); } } TEST_F(VCDiffEncoderTest, EncodeFixedChunkSizesJSON) { // There is no JSON decoder; these diffs are created by hand. TestWithFixedChunkSize(&json_encoder_, NULL, 6); EXPECT_EQ("[\"\\\"Just \",\"the pl\",\"ace fo\",\"r a Sn\",\"ark! I\"," "\" have \",\"said i\",\"t twic\",\"e:\\nTha\",\"t alon\"," "\"e shou\",\"ld enc\",\"ourage\",\" the c\",\"rew.\\nJ\"," "\"ust th\",\"e plac\",\"e for \",\"a Snar\",\"k! I h\"," "\"ave sa\",\"id it \",\"thrice\",\":\\nWhat\",\" I tel\"," "\"l you \",\"three \",\"times \",\"is tru\",\"e.\\\"\\n\",]", delta_as_const()); TestWithFixedChunkSize(&json_encoder_, NULL, 45); EXPECT_EQ("[\"\\\"Just the place for a Snark! I have said it t\"," "\"wice:\\nThat alone should encourage the crew.\\nJ\"," "\"ust the place for a Snark! I have said it thr\",\"ice:\\n" "What I tell you three times is true.\\\"\\n\",]", delta_as_const()); TestWithFixedChunkSize(&json_encoder_, NULL, 60); EXPECT_EQ("[\"\\\"Just the place for a Snark! I have said it twice:\\n" "That alon\",\"e should encourage the crew.\\n" "Just the place for a Snark! I h\",\"ave said it thrice:\\n" "What I tell you three times is true.\\\"\\n\",]", delta_as_const()); } // If --allow_vcd_target=false is specified, the decoder will throw away some of // the internally-stored decoded target beyond the current window. Try // different numbers of encoded window sizes to make sure that this behavior // does not affect the results. TEST_F(VCDiffEncoderTest, EncodeDecodeFixedChunkSizesNoVcdTarget) { decoder_.SetAllowVcdTarget(false); // Loop through all possible chunk sizes for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { TestWithFixedChunkSize(&encoder_, &decoder_, chunk_size); } } // Splits the text to be encoded into fixed-size chunks. Encodes each // chunk and puts it into a vector of strings. Then decodes each string // in the vector and appends the result into result_target_. void VCDiffEncoderTest::TestWithEncodedChunkVector(size_t chunk_size) { std::vector encoded_chunks; string this_encoded_chunk; size_t total_chunk_size = 0; EXPECT_TRUE(encoder_.StartEncoding(&this_encoded_chunk)); encoded_chunks.push_back(this_encoded_chunk); total_chunk_size += this_encoded_chunk.size(); for (size_t chunk_start_index = 0; chunk_start_index < strlen(kTarget); chunk_start_index += chunk_size) { size_t this_chunk_size = chunk_size; const size_t bytes_available = strlen(kTarget) - chunk_start_index; if (this_chunk_size > bytes_available) { this_chunk_size = bytes_available; } this_encoded_chunk.clear(); EXPECT_TRUE(encoder_.EncodeChunk(&kTarget[chunk_start_index], this_chunk_size, &this_encoded_chunk)); encoded_chunks.push_back(this_encoded_chunk); total_chunk_size += this_encoded_chunk.size(); } this_encoded_chunk.clear(); EXPECT_TRUE(encoder_.FinishEncoding(&this_encoded_chunk)); encoded_chunks.push_back(this_encoded_chunk); total_chunk_size += this_encoded_chunk.size(); const size_t num_windows = (strlen(kTarget) / chunk_size) + 1; const size_t size_of_windows = strlen(kTarget) + (kWindowHeaderSize * num_windows); EXPECT_GE(kFileHeaderSize + size_of_windows, total_chunk_size); result_target_.clear(); decoder_.StartDecoding(kDictionary, sizeof(kDictionary)); for (std::vector::iterator it = encoded_chunks.begin(); it != encoded_chunks.end(); ++it) { EXPECT_TRUE(decoder_.DecodeChunk(it->data(), it->size(), &result_target_)); } EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffEncoderTest, EncodeDecodeStreamOfChunks) { // Loop through all possible chunk sizes for (size_t chunk_size = 1; chunk_size < strlen(kTarget); ++chunk_size) { TestWithEncodedChunkVector(chunk_size); } } // Verify that HashedDictionary stores a copy of the dictionary text, // rather than just storing a pointer to it. If the dictionary buffer // is overwritten after creating a HashedDictionary from it, it shouldn't // affect an encoder that uses that HashedDictionary. TEST_F(VCDiffEncoderTest, DictionaryBufferOverwritten) { string dictionary_copy(kDictionary, sizeof(kDictionary)); HashedDictionary hd_copy(dictionary_copy.data(), dictionary_copy.size()); EXPECT_TRUE(hd_copy.Init()); VCDiffStreamingEncoder copy_encoder(&hd_copy, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ true); // Produce a reference version of the encoded text. string delta_before; EXPECT_TRUE(copy_encoder.StartEncoding(&delta_before)); EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_before)); EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_before)); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_before.size()); // Overwrite the dictionary text with all 'Q' characters. dictionary_copy.replace(0, dictionary_copy.size(), dictionary_copy.size(), 'Q'); // When the encoder is used on the same target text after overwriting // the dictionary, it should produce the same encoded output. string delta_after; EXPECT_TRUE(copy_encoder.StartEncoding(&delta_after)); EXPECT_TRUE(copy_encoder.EncodeChunk(kTarget, strlen(kTarget), &delta_after)); EXPECT_TRUE(copy_encoder.FinishEncoding(&delta_after)); EXPECT_EQ(delta_before, delta_after); } // Binary data test part 1: The dictionary and target data should not // be treated as NULL-terminated. An embedded NULL should be handled like // any other byte of data. TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNULLs) { const char embedded_null_dictionary_text[] = { 0x00, 0xFF, 0xFE, 0xFD, 0x00, 0xFD, 0xFE, 0xFF, 0x00, 0x03 }; const char embedded_null_target[] = { 0xFD, 0x00, 0xFD, 0xFE, 0x03, 0x00, 0x01, 0x00 }; CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); CHECK_EQ(8, sizeof(embedded_null_target)); HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, sizeof(embedded_null_dictionary_text)); EXPECT_TRUE(embedded_null_dictionary.Init()); VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ true); EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, sizeof(embedded_null_target), delta())); EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); decoder_.StartDecoding(embedded_null_dictionary_text, sizeof(embedded_null_dictionary_text)); EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), delta_size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); EXPECT_EQ(string(embedded_null_target, sizeof(embedded_null_target)), result_target_); } // Binary data test part 2: An embedded CR or LF should be handled like // any other byte of data. No text-processing of the data should occur. TEST_F(VCDiffEncoderTest, DictionaryHasEmbeddedNewlines) { const char embedded_null_dictionary_text[] = { 0x0C, 0xFF, 0xFE, 0x0C, 0x00, 0x0A, 0xFE, 0xFF, 0x00, 0x0A }; const char embedded_null_target[] = { 0x0C, 0x00, 0x0A, 0xFE, 0x03, 0x00, 0x0A, 0x00 }; CHECK_EQ(10, sizeof(embedded_null_dictionary_text)); CHECK_EQ(8, sizeof(embedded_null_target)); HashedDictionary embedded_null_dictionary(embedded_null_dictionary_text, sizeof(embedded_null_dictionary_text)); EXPECT_TRUE(embedded_null_dictionary.Init()); VCDiffStreamingEncoder embedded_null_encoder(&embedded_null_dictionary, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ true); EXPECT_TRUE(embedded_null_encoder.StartEncoding(delta())); EXPECT_TRUE(embedded_null_encoder.EncodeChunk(embedded_null_target, sizeof(embedded_null_target), delta())); EXPECT_TRUE(embedded_null_encoder.FinishEncoding(delta())); decoder_.StartDecoding(embedded_null_dictionary_text, sizeof(embedded_null_dictionary_text)); EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), delta_size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); EXPECT_EQ(sizeof(embedded_null_target), result_target_.size()); EXPECT_EQ(string(embedded_null_target, sizeof(embedded_null_target)), result_target_); } TEST_F(VCDiffEncoderTest, UsingWideCharacters) { const wchar_t wchar_dictionary_text[] = L"\"Just the place for a Snark!\" the Bellman cried,\n" L"As he landed his crew with care;\n" L"Supporting each man on the top of the tide\n" L"By a finger entwined in his hair.\n"; const wchar_t wchar_target[] = L"\"Just the place for a Snark! I have said it twice:\n" L"That alone should encourage the crew.\n" L"Just the place for a Snark! I have said it thrice:\n" L"What I tell you three times is true.\"\n"; HashedDictionary wchar_dictionary((const char*) wchar_dictionary_text, sizeof(wchar_dictionary_text)); EXPECT_TRUE(wchar_dictionary.Init()); VCDiffStreamingEncoder wchar_encoder(&wchar_dictionary, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ false); EXPECT_TRUE(wchar_encoder.StartEncoding(delta())); EXPECT_TRUE(wchar_encoder.EncodeChunk((const char*) wchar_target, sizeof(wchar_target), delta())); EXPECT_TRUE(wchar_encoder.FinishEncoding(delta())); decoder_.StartDecoding((const char*) wchar_dictionary_text, sizeof(wchar_dictionary_text)); EXPECT_TRUE(decoder_.DecodeChunk(delta_data(), delta_size(), &result_target_)); EXPECT_TRUE(decoder_.FinishDecoding()); const wchar_t* result_as_wchar = (const wchar_t*) result_target_.data(); EXPECT_EQ(wcslen(wchar_target), wcslen(result_as_wchar)); EXPECT_EQ(0, wcscmp(wchar_target, result_as_wchar)); } #if defined(HAVE_MPROTECT) && \ (defined(HAVE_MEMALIGN) || defined(HAVE_POSIX_MEMALIGN)) // Bug 1220602: Make sure the encoder doesn't read past the end of the input // buffer. TEST_F(VCDiffEncoderTest, ShouldNotReadPastEndOfBuffer) { const size_t target_size = strlen(kTarget); // Allocate two memory pages. const int page_size = getpagesize(); void* two_pages = NULL; #ifdef HAVE_POSIX_MEMALIGN posix_memalign(&two_pages, page_size, 2 * page_size); #else // !HAVE_POSIX_MEMALIGN two_pages = memalign(page_size, 2 * page_size); #endif // HAVE_POSIX_MEMALIGN char* const first_page = reinterpret_cast(two_pages); char* const second_page = first_page + page_size; // Place the target string at the end of the first page. char* const target_with_guard = second_page - target_size; memcpy(target_with_guard, kTarget, target_size); // Make the second page unreadable. mprotect(second_page, page_size, PROT_NONE); // Now perform the encode operation, which will cause a segmentation fault // if it reads past the end of the buffer. EXPECT_TRUE(encoder_.StartEncoding(delta())); EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); EXPECT_TRUE(encoder_.FinishEncoding(delta())); // Undo the mprotect. mprotect(second_page, page_size, PROT_READ|PROT_WRITE); free(two_pages); } TEST_F(VCDiffEncoderTest, ShouldNotReadPastBeginningOfBuffer) { const size_t target_size = strlen(kTarget); // Allocate two memory pages. const int page_size = getpagesize(); void* two_pages = NULL; #ifdef HAVE_POSIX_MEMALIGN posix_memalign(&two_pages, page_size, 2 * page_size); #else // !HAVE_POSIX_MEMALIGN two_pages = memalign(page_size, 2 * page_size); #endif // HAVE_POSIX_MEMALIGN char* const first_page = reinterpret_cast(two_pages); char* const second_page = first_page + page_size; // Make the first page unreadable. mprotect(first_page, page_size, PROT_NONE); // Place the target string at the beginning of the second page. char* const target_with_guard = second_page; memcpy(target_with_guard, kTarget, target_size); // Now perform the encode operation, which will cause a segmentation fault // if it reads past the beginning of the buffer. EXPECT_TRUE(encoder_.StartEncoding(delta())); EXPECT_TRUE(encoder_.EncodeChunk(target_with_guard, target_size, delta())); EXPECT_TRUE(encoder_.FinishEncoding(delta())); // Undo the mprotect. mprotect(first_page, page_size, PROT_READ|PROT_WRITE); free(two_pages); } #endif // HAVE_MPROTECT && (HAVE_MEMALIGN || HAVE_POSIX_MEMALIGN) class VCDiffHTML1Test : public VerifyEncodedBytesTest { protected: static const char kDictionary[]; static const char kTarget[]; static const char kRedundantTarget[]; VCDiffHTML1Test(); virtual ~VCDiffHTML1Test() { } void SimpleEncode(); void StreamingEncode(); HashedDictionary hashed_dictionary_; VCDiffStreamingEncoder encoder_; VCDiffStreamingDecoder decoder_; VCDiffEncoder simple_encoder_; VCDiffDecoder simple_decoder_; string result_target_; }; const char VCDiffHTML1Test::kDictionary[] = "This part from the dict
"; const char VCDiffHTML1Test::kTarget[] = "This part from the dict
\n" "And this part is not..."; const char VCDiffHTML1Test::kRedundantTarget[] = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; // 256 VCDiffHTML1Test::VCDiffHTML1Test() : hashed_dictionary_(kDictionary, sizeof(kDictionary)), encoder_(&hashed_dictionary_, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ true), simple_encoder_(kDictionary, sizeof(kDictionary)) { EXPECT_TRUE(hashed_dictionary_.Init()); } void VCDiffHTML1Test::SimpleEncode() { EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kTarget, result_target_); } void VCDiffHTML1Test::StreamingEncode() { EXPECT_TRUE(encoder_.StartEncoding(delta())); EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); EXPECT_TRUE(encoder_.FinishEncoding(delta())); } TEST_F(VCDiffHTML1Test, CheckOutputOfSimpleEncoder) { SimpleEncode(); // These values do not depend on the block size used for encoding ExpectByte(0xD6); // 'V' | 0x80 ExpectByte(0xC3); // 'C' | 0x80 ExpectByte(0xC4); // 'D' | 0x80 ExpectByte(0x00); // Simple encoder never uses interleaved format ExpectByte(0x00); // Hdr_Indicator ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) ExpectByte(sizeof(kDictionary)); // Dictionary length ExpectByte(0x00); // Source segment position: start of dictionary if (BlockHash::kBlockSize < 16) { // A medium block size will catch the "his part " match. ExpectByte(0x22); // Length of the delta encoding ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectByte(0x16); // Length of the data section ExpectByte(0x05); // Length of the instructions section ExpectByte(0x02); // Length of the address section // Data section ExpectString("\nAnd t"); // Data for 1st ADD ExpectString("is not..."); // Data for 2nd ADD // Instructions section ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) ExpectByte(0x38); // COPY size (56) ExpectByte(0x07); // ADD size 6 ExpectByte(0x19); // COPY size 9 mode VCD_SELF ExpectByte(0x11); // ADD size 16 // Address section ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) ExpectByte(0x17); // COPY address (23) mode VCD_SELF } else if (BlockHash::kBlockSize <= 56) { // Any block size up to 56 will catch the matching prefix string. ExpectByte(0x29); // Length of the delta encoding ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectByte(0x1F); // Length of the data section ExpectByte(0x04); // Length of the instructions section ExpectByte(0x01); // Length of the address section ExpectString("\nAnd this part is not..."); // Data for ADD // Instructions section ExpectByte(0x73); // COPY size 0 mode VCD_SAME(0) ExpectByte(0x38); // COPY size (56) ExpectByte(0x01); // ADD size 0 ExpectByte(0x1F); // Size of ADD (31) // Address section ExpectByte(0x00); // COPY address (0) mode VCD_SAME(0) } else { // The matching string is 56 characters long, and the block size is // 64 or greater, so no match should be found. ExpectSize(strlen(kTarget) + 7); // Delta encoding len ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectSize(strlen(kTarget)); // Length of the data section ExpectByte(0x02); // Length of the instructions section ExpectByte(0x00); // Length of the address section // Data section ExpectString(kTarget); ExpectByte(0x01); // ADD size 0 ExpectSize(strlen(kTarget)); } ExpectNoMoreBytes(); } TEST_F(VCDiffHTML1Test, SimpleEncoderPerformsTargetMatching) { EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget, strlen(kRedundantTarget), delta())); EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kRedundantTarget, result_target_); // These values do not depend on the block size used for encoding ExpectByte(0xD6); // 'V' | 0x80 ExpectByte(0xC3); // 'C' | 0x80 ExpectByte(0xC4); // 'D' | 0x80 ExpectByte(0x00); // Simple encoder never uses interleaved format ExpectByte(0x00); // Hdr_Indicator ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) ExpectByte(sizeof(kDictionary)); // Dictionary length ExpectByte(0x00); // Source segment position: start of dictionary ExpectByte(0x0C); // Length of the delta encoding ExpectSize(strlen(kRedundantTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectByte(0x01); // Length of the data section ExpectByte(0x04); // Length of the instructions section ExpectByte(0x01); // Length of the address section // Data section ExpectString("A"); // Data for ADD // Instructions section ExpectByte(0x02); // ADD size 1 ExpectByte(0x23); // COPY size 0 mode VCD_HERE ExpectSize(strlen(kRedundantTarget) - 1); // COPY size 255 // Address section ExpectByte(0x01); // COPY address (1) mode VCD_HERE ExpectNoMoreBytes(); } TEST_F(VCDiffHTML1Test, SimpleEncoderWithoutTargetMatching) { simple_encoder_.SetTargetMatching(false); EXPECT_TRUE(simple_encoder_.Encode(kRedundantTarget, strlen(kRedundantTarget), delta())); EXPECT_GE(strlen(kRedundantTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kRedundantTarget, result_target_); // These values do not depend on the block size used for encoding ExpectByte(0xD6); // 'V' | 0x80 ExpectByte(0xC3); // 'C' | 0x80 ExpectByte(0xC4); // 'D' | 0x80 ExpectByte(0x00); // Simple encoder never uses interleaved format ExpectByte(0x00); // Hdr_Indicator ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) ExpectByte(sizeof(kDictionary)); // Dictionary length ExpectByte(0x00); // Source segment position: start of dictionary ExpectSize(strlen(kRedundantTarget) + 0x0A); // Length of the delta encoding ExpectSize(strlen(kRedundantTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectSize(strlen(kRedundantTarget)); // Length of the data section ExpectByte(0x03); // Length of the instructions section ExpectByte(0x00); // Length of the address section // Data section ExpectString(kRedundantTarget); // Data for ADD // Instructions section ExpectByte(0x01); // ADD size 0 ExpectSize(strlen(kRedundantTarget)); // ADD size // Address section empty ExpectNoMoreBytes(); } class VCDiffHTML2Test : public VerifyEncodedBytesTest { protected: static const char kDictionary[]; static const char kTarget[]; VCDiffHTML2Test(); virtual ~VCDiffHTML2Test() { } void SimpleEncode(); void StreamingEncode(); HashedDictionary hashed_dictionary_; VCDiffStreamingEncoder encoder_; VCDiffStreamingDecoder decoder_; VCDiffEncoder simple_encoder_; VCDiffDecoder simple_decoder_; string result_target_; }; const char VCDiffHTML2Test::kDictionary[] = "10\nThis is a test"; const char VCDiffHTML2Test::kTarget[] = "This is a test!!!\n"; VCDiffHTML2Test::VCDiffHTML2Test() : hashed_dictionary_(kDictionary, sizeof(kDictionary)), encoder_(&hashed_dictionary_, VCD_FORMAT_INTERLEAVED | VCD_FORMAT_CHECKSUM, /* look_for_target_matches = */ true), simple_encoder_(kDictionary, sizeof(kDictionary)) { EXPECT_TRUE(hashed_dictionary_.Init()); } void VCDiffHTML2Test::SimpleEncode() { EXPECT_TRUE(simple_encoder_.Encode(kTarget, strlen(kTarget), delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kTarget, result_target_); } void VCDiffHTML2Test::StreamingEncode() { EXPECT_TRUE(encoder_.StartEncoding(delta())); EXPECT_TRUE(encoder_.EncodeChunk(kTarget, strlen(kTarget), delta())); EXPECT_GE(strlen(kTarget) + kFileHeaderSize + kWindowHeaderSize, delta_size()); EXPECT_TRUE(simple_decoder_.Decode(kDictionary, sizeof(kDictionary), delta_as_const(), &result_target_)); EXPECT_EQ(kTarget, result_target_); } TEST_F(VCDiffHTML2Test, VerifyOutputOfSimpleEncoder) { SimpleEncode(); // These values do not depend on the block size used for encoding ExpectByte(0xD6); // 'V' | 0x80 ExpectByte(0xC3); // 'C' | 0x80 ExpectByte(0xC4); // 'D' | 0x80 ExpectByte(0x00); // Simple encoder never uses interleaved format ExpectByte(0x00); // Hdr_Indicator ExpectByte(VCD_SOURCE); // Win_Indicator: VCD_SOURCE (dictionary) ExpectByte(sizeof(kDictionary)); // Dictionary length ExpectByte(0x00); // Source segment position: start of dictionary if (BlockHash::kBlockSize <= 8) { ExpectByte(12); // Length of the delta encoding ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectByte(0x04); // Length of the data section ExpectByte(0x02); // Length of the instructions section ExpectByte(0x01); // Length of the address section ExpectByte('!'); ExpectByte('!'); ExpectByte('!'); ExpectByte('\n'); ExpectByte(0x1E); // COPY size 14 mode VCD_SELF ExpectByte(0x05); // ADD size 4 ExpectByte(0x03); // COPY address (3) mode VCD_SELF } else { // Larger block sizes will not catch any matches. ExpectSize(strlen(kTarget) + 7); // Delta encoding len ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectSize(strlen(kTarget)); // Length of the data section ExpectByte(0x02); // Length of the instructions section ExpectByte(0x00); // Length of the address section // Data section ExpectString(kTarget); ExpectByte(0x01); // ADD size 0 ExpectSize(strlen(kTarget)); } ExpectNoMoreBytes(); } TEST_F(VCDiffHTML2Test, VerifyOutputWithChecksum) { StreamingEncode(); const VCDChecksum html2_checksum = ComputeAdler32(kTarget, strlen(kTarget)); CHECK_EQ(5, VarintBE::Length(html2_checksum)); // These values do not depend on the block size used for encoding ExpectByte(0xD6); // 'V' | 0x80 ExpectByte(0xC3); // 'C' | 0x80 ExpectByte(0xC4); // 'D' | 0x80 ExpectByte('S'); // Format extensions ExpectByte(0x00); // Hdr_Indicator ExpectByte(VCD_SOURCE | VCD_CHECKSUM); // Win_Indicator ExpectByte(sizeof(kDictionary)); // Dictionary length ExpectByte(0x00); // Source segment position: start of dictionary if (BlockHash::kBlockSize <= 8) { ExpectByte(17); // Length of the delta encoding ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectByte(0x00); // Length of the data section ExpectByte(0x07); // Length of the instructions section ExpectByte(0x00); // Length of the address section ExpectChecksum(html2_checksum); ExpectByte(0x1E); // COPY size 14 mode VCD_SELF ExpectByte(0x03); // COPY address (3) mode VCD_SELF ExpectByte(0x05); // ADD size 4 ExpectByte('!'); ExpectByte('!'); ExpectByte('!'); ExpectByte('\n'); } else { // Larger block sizes will not catch any matches. ExpectSize(strlen(kTarget) + 12); // Delta encoding len ExpectSize(strlen(kTarget)); // Size of the target window ExpectByte(0x00); // Delta_indicator (no compression) ExpectByte(0x00); // Length of the data section ExpectSize(0x02 + strlen(kTarget)); // Interleaved ExpectByte(0x00); // Length of the address section ExpectChecksum(html2_checksum); // Data section ExpectByte(0x01); // ADD size 0 ExpectSize(strlen(kTarget)); ExpectString(kTarget); } ExpectNoMoreBytes(); } } // anonymous namespace } // namespace open_vcdiff