1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_UTIL_CRC32_H_ 16 #define ICING_UTIL_CRC32_H_ 17 18 #include <cstdint> 19 #include <string_view> 20 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 23 namespace icing { 24 namespace lib { 25 26 // Efficient mechanism to incrementally compute checksum of a file and keep it 27 // updated when its content changes. Internally uses zlib based crc32() 28 // implementation. 29 // 30 // See https://www.zlib.net/manual.html#Checksum for more details. 31 class Crc32 { 32 public: 33 // Default to the checksum of an empty string, that is "0". Crc32()34 Crc32() : crc_(0) {} 35 Crc32(uint32_t init_crc)36 explicit Crc32(uint32_t init_crc) : crc_(init_crc) {} 37 Crc32(std::string_view str)38 explicit Crc32(std::string_view str) : crc_(0) { Append(str); } 39 40 inline bool operator==(const Crc32& other) const { 41 return crc_ == other.Get(); 42 } 43 44 // Returns the checksum of all the data that has been processed till now. 45 uint32_t Get() const; 46 47 // Incrementally update the current checksum to reflect the fact that the 48 // underlying data has been appended with 'str'. It calculates a new crc32 49 // based on the current crc value and the newly appended string. 50 // 51 // NOTE: As this method accepts incremental appends, all these 3 will lead to 52 // the same checksum: 53 // 1) crc32.Append("AAA"); crc32.Append("BBB"); 54 // 2) crc32.Append("AAABBB"); 55 // 3) crc32.Append("AA"); crc32.Append("AB"); crc32.Append("BB"); 56 // 57 // NOTE: While this class internally uses zlib's crc32(), 58 // Crc32(base_crc).Append(str) is not the same as zlib::crc32(base_crc, str); 59 uint32_t Append(std::string_view str); 60 61 // Update a string's rolling crc when some content is modified in the middle 62 // at an offset. We need the xored_str, which is the new value xored with the 63 // original value. 64 // 65 // Original string: 66 // string(original_start | original_mid | original_end) 67 // -------------------------------------------> full_data_size 68 // ^ offset position 69 // 70 // Modified string: 71 // string(original_start | changed_mid | original_end) 72 // ^ offset position 73 // 74 // And where 75 // xored_str = changed_mid ^ original_mid 76 // xored_len = length(xored_str) 77 // full_data_size = the length of all the strings that have been Appended to 78 // generate the current checksum 79 // 80 // REQUIRES: offset position + xored_len <= full_data_size. 81 // 82 // E.g. 83 // Old data: ABCDEF; New data: ABXYZF 84 // 85 // Crc32 crc32; crc32.Append("ABCDEF"); 86 // crc32.UpdateWithXor("CDE" xor "XYZ", 6, 2); 87 // 88 // This is the same as 89 // Crc32 crc32; crc32.Append("ABXYZF"); 90 // 91 // See .cc file for implementation notes. 92 // 93 // Returns: 94 // Updated crc on success 95 // INVALID_ARGUMENT if offset position + xored_len > full_data_size 96 libtextclassifier3::StatusOr<uint32_t> UpdateWithXor( 97 std::string_view xored_str, int full_data_size, int position); 98 99 private: 100 uint32_t crc_; 101 }; 102 103 } // namespace lib 104 } // namespace icing 105 106 #endif // ICING_UTIL_CRC32_H_ 107