1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/file-backed-bitmap.h"
16
17 #include <cstdint>
18
19 #include "icing/text_classifier/lib3/utils/base/status.h"
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/absl_ports/str_cat.h"
23 #include "icing/file/filesystem.h"
24 #include "icing/file/memory-mapped-file.h"
25 #include "icing/legacy/core/icing-string-util.h"
26 #include "icing/util/crc32.h"
27 #include "icing/util/logging.h"
28 #include "icing/util/math-util.h"
29 #include "icing/util/status-macros.h"
30
31 namespace icing {
32 namespace lib {
33
GetBlockCapacity(int num_blocks)34 int FileBackedBitmap::GetBlockCapacity(int num_blocks) {
35 // The first block has a lower capacity due to the Header.
36 const int capacity_bytes = kBlockByteSize * num_blocks - kHeaderByteSize;
37 return capacity_bytes * 8;
38 }
39
40 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedBitmap>>
Create(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile::Strategy mmap_strategy)41 FileBackedBitmap::Create(const Filesystem* filesystem,
42 std::string_view file_path,
43 MemoryMappedFile::Strategy mmap_strategy) {
44 if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
45 return absl_ports::UnimplementedError(
46 "FileBackedBitmap currently doesn't support READ_WRITE_MANUAL_SYNC "
47 "mmap strategy.");
48 }
49
50 ICING_ASSIGN_OR_RETURN(
51 MemoryMappedFile mmapper,
52 MemoryMappedFile::Create(*filesystem, file_path, mmap_strategy));
53
54 auto bitmap = std::unique_ptr<FileBackedBitmap>(
55 new FileBackedBitmap(filesystem, file_path, std::move(mmapper)));
56
57 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
58 // that can support error logging.
59 libtextclassifier3::Status status = bitmap->Initialize();
60 if (!status.ok()) {
61 ICING_LOG(ERROR) << status.error_message();
62 return status;
63 }
64 return bitmap;
65 }
66
FileBackedBitmap(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile && mmapper)67 FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
68 std::string_view file_path,
69 MemoryMappedFile&& mmapper)
70 : filesystem_(filesystem),
71 file_path_(file_path),
72 mmapper_(std::make_unique<MemoryMappedFile>(std::move(mmapper))) {}
73
~FileBackedBitmap()74 FileBackedBitmap::~FileBackedBitmap() {
75 // Only update if we have auto_sync setup, otherwise the checksum will be
76 // updated when the client calls PersistToDisk
77 if (mmapper_->strategy() ==
78 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
79 // Any valid, initialized file should at least have 1 block.
80 if (mmapper_->region_size() >= kBlockByteSize &&
81 header().version == kCurrentVersion &&
82 header().state == Header::ChecksumState::kStale) {
83 if (!PersistToDisk().ok()) {
84 ICING_LOG(WARNING)
85 << "Failed to persist bitmap to disk while destructing "
86 << file_path_;
87 }
88 }
89 }
90 }
91
header() const92 const FileBackedBitmap::Header& FileBackedBitmap::header() const {
93 return reinterpret_cast<const Header&>(*mmapper_->region());
94 }
95
mutable_header()96 FileBackedBitmap::Header* FileBackedBitmap::mutable_header() {
97 return reinterpret_cast<Header*>(mmapper_->mutable_region());
98 }
99
Initialize()100 libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
101 ICING_VLOG(1) << "Initialize bitmap file: " << file_path_;
102
103 const bool is_new_bitmap = !filesystem_->FileExists(file_path_.c_str());
104
105 int64_t file_size = 0;
106 if (is_new_bitmap) {
107 file_size = kBlockByteSize;
108 if (!filesystem_->Grow(file_path_.c_str(), file_size)) {
109 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
110 "Unable to create a minimal bitmap; "
111 "filename: %s; target size: %lld",
112 file_path_.c_str(), static_cast<long long>(file_size)));
113 }
114
115 ICING_VLOG(1) << "Creating new bitmap in file: " << file_path_
116 << " of size: " << file_size;
117 } else {
118 file_size = filesystem_->GetFileSize(file_path_.c_str());
119 if (file_size == Filesystem::kBadFileSize) {
120 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
121 "File corrupted; filename: %s; size: %lld.", file_path_.c_str(),
122 static_cast<long long>(file_size)));
123 }
124
125 ICING_VLOG(1) << "Loading bitmap from file: " << file_path_
126 << " of size: " << file_size;
127 }
128
129 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
130 // that can support error logging.
131 libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
132 if (!status.ok()) {
133 ICING_LOG(ERROR) << status.error_message();
134 return status;
135 }
136
137 if (is_new_bitmap) {
138 mutable_header()->version = kCurrentVersion;
139 mutable_header()->state = Header::ChecksumState::kStale;
140 mutable_header()->checksum = 0;
141
142 return mmapper_->PersistToDisk();
143 }
144
145 if (header().state == Header::ChecksumState::kStale) {
146 return absl_ports::InternalError(absl_ports::StrCat(
147 "File corrupted, has partially flushed data; filename: ", file_path_));
148 }
149
150 if (header().checksum != ComputeChecksum()) {
151 return absl_ports::InternalError(absl_ports::StrCat(
152 "File corrupted, checksum doesn't match; filename: ", file_path_));
153 }
154
155 if (header().version != kCurrentVersion) {
156 return UpgradeToCurrentVersion();
157 }
158
159 return libtextclassifier3::Status::OK;
160 }
161
UpgradeToCurrentVersion()162 libtextclassifier3::Status FileBackedBitmap::UpgradeToCurrentVersion() {
163 // Currently, only 1 format is supported.
164 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
165 "File corrupted, mismatched version; filename: %s; %d vs %d.",
166 file_path_.c_str(), header().version, kCurrentVersion));
167 }
168
SetWord(int word_index,Word word)169 libtextclassifier3::Status FileBackedBitmap::SetWord(int word_index,
170 Word word) {
171 if (word_index >= NumBits() / kNumWordBits) {
172 ICING_LOG(ERROR) << "word_index: " << word_index
173 << ", number of words: " << NumBits() / kNumWordBits;
174 return absl_ports::InternalError("Trying to access invalid memory");
175 }
176
177 Word* bitmap_data =
178 reinterpret_cast<Word*>(mmapper_->mutable_region() + kHeaderByteSize);
179
180 bitmap_data[word_index] = word;
181
182 return libtextclassifier3::Status::OK;
183 }
184
GetWord(int word_index) const185 libtextclassifier3::StatusOr<FileBackedBitmap::Word> FileBackedBitmap::GetWord(
186 int word_index) const {
187 if (word_index >= NumBits() / kNumWordBits) {
188 ICING_LOG(ERROR) << "word_index: " << word_index
189 << ", number of words: " << NumBits() / kNumWordBits;
190 return absl_ports::InternalError("Trying to access invalid memory");
191 }
192
193 const Word* bitmap_data = reinterpret_cast<const Word*>(
194 mmapper_->mutable_region() + kHeaderByteSize);
195 return bitmap_data[word_index];
196 }
197
NumBits() const198 int FileBackedBitmap::NumBits() const {
199 return (mmapper_->region_size() - kHeaderByteSize) * 8;
200 }
201
Set(int bit_index,bool bit_value)202 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
203 bool bit_value) {
204 if (bit_index >= NumBits()) {
205 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
206 // that can support error logging.
207 libtextclassifier3::Status status = GrowTo(bit_index);
208 if (!status.ok()) {
209 ICING_LOG(ERROR) << status.error_message();
210 return status;
211 }
212
213 if (!bit_value) {
214 // All newly added bits are set to false.
215 return libtextclassifier3::Status::OK;
216 }
217 }
218
219 // Figure out which word needs to be modified.
220 const int word_index = bit_index / kNumWordBits;
221 const int word_mask = 1u << (bit_index % kNumWordBits);
222
223 ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
224 Word new_word = bit_value ? (old_word | word_mask) : old_word & ~word_mask;
225 if (new_word != old_word) {
226 ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
227 mutable_header()->state = Header::ChecksumState::kStale;
228 }
229
230 return libtextclassifier3::Status::OK;
231 }
232
Get(int bit_index) const233 libtextclassifier3::StatusOr<bool> FileBackedBitmap::Get(int bit_index) const {
234 if (bit_index >= NumBits()) {
235 return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
236 "Bitmap file %s is of size %d and can't read bit_index %d.",
237 file_path_.c_str(), NumBits(), bit_index));
238 }
239
240 const Word word_index = bit_index / kNumWordBits;
241 const Word word_mask = 1u << (bit_index % kNumWordBits);
242
243 ICING_ASSIGN_OR_RETURN(Word word, GetWord(word_index));
244 return word & word_mask;
245 }
246
FileSizeForBits(int num_bits)247 size_t FileBackedBitmap::FileSizeForBits(int num_bits) {
248 const int word_index = num_bits / kNumWordBits;
249 size_t new_file_size = kHeaderByteSize + (word_index + 1) * sizeof(Word);
250 return math_util::RoundUpTo(new_file_size,
251 static_cast<size_t>(kBlockByteSize));
252 }
253
GrowTo(int new_num_bits)254 libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
255 if (new_num_bits > kMaxNumBits) {
256 return absl_ports::ResourceExhaustedError(IcingStringUtil::StringPrintf(
257 "Bitmap file %s has a max-capacity of %d bits and cannot fit %d bits",
258 file_path_.c_str(), kMaxNumBits, new_num_bits));
259 }
260
261 const size_t new_file_size = FileSizeForBits(new_num_bits);
262 if (!filesystem_->Grow(file_path_.c_str(), new_file_size)) {
263 return absl_ports::InternalError(
264 IcingStringUtil::StringPrintf("Growing file %s to new size %zd failed",
265 file_path_.c_str(), new_file_size));
266 }
267
268 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
269 // that can support error logging.
270 libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
271 if (!status.ok()) {
272 ICING_LOG(ERROR) << status.error_message();
273 return status;
274 }
275
276 ICING_VLOG(1) << "Grew file " << file_path_ << " to new size "
277 << new_file_size;
278 mutable_header()->state = Header::ChecksumState::kStale;
279 return libtextclassifier3::Status::OK;
280 }
281
TruncateTo(int new_num_bits)282 libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
283 if (new_num_bits > NumBits()) {
284 return libtextclassifier3::Status::OK;
285 }
286
287 const size_t new_file_size = FileSizeForBits(new_num_bits);
288 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
289 // that can support error logging.
290 libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
291 if (!status.ok()) {
292 ICING_LOG(ERROR) << status.error_message();
293 return status;
294 }
295 if (!filesystem_->Truncate(file_path_.c_str(), new_file_size)) {
296 return absl_ports::InternalError(IcingStringUtil::StringPrintf(
297 "Truncating file %s to new size %zd failed", file_path_.c_str(),
298 new_file_size));
299 }
300
301 const int word_index = new_num_bits / kNumWordBits;
302 // Mask to only keep bits <= new_num_bits and clear everything else.
303 const Word word_mask = (1u << (new_num_bits % kNumWordBits)) - 1;
304
305 ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
306 Word new_word = old_word & word_mask;
307 ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
308
309 // TODO(cassiewang) It might be worth replacing this with memset().
310 const int num_words = NumBits() / kNumWordBits;
311 for (int i = word_index + 1; i < num_words; ++i) {
312 ICING_RETURN_IF_ERROR(SetWord(i, 0));
313 }
314
315 mutable_header()->state = Header::ChecksumState::kStale;
316 return libtextclassifier3::Status::OK;
317 }
318
PersistToDisk()319 libtextclassifier3::Status FileBackedBitmap::PersistToDisk() {
320 mutable_header()->checksum = ComputeChecksum();
321 mutable_header()->state = Header::ChecksumState::kFresh;
322 return mmapper_->PersistToDisk();
323 }
324
ComputeChecksum() const325 uint32_t FileBackedBitmap::ComputeChecksum() const {
326 std::string_view bitmap_bytes(mmapper_->region() + kHeaderByteSize,
327 NumBits() / 8);
328 return Crc32().Append(bitmap_bytes);
329 }
330
331 } // namespace lib
332 } // namespace icing
333