• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/file-backed-bitmap.h"
16 
17 #include <cstdint>
18 
19 #include "icing/text_classifier/lib3/utils/base/status.h"
20 #include "icing/text_classifier/lib3/utils/base/statusor.h"
21 #include "icing/absl_ports/canonical_errors.h"
22 #include "icing/absl_ports/str_cat.h"
23 #include "icing/file/filesystem.h"
24 #include "icing/file/memory-mapped-file.h"
25 #include "icing/legacy/core/icing-string-util.h"
26 #include "icing/util/crc32.h"
27 #include "icing/util/logging.h"
28 #include "icing/util/math-util.h"
29 #include "icing/util/status-macros.h"
30 
31 namespace icing {
32 namespace lib {
33 
GetBlockCapacity(int num_blocks)34 int FileBackedBitmap::GetBlockCapacity(int num_blocks) {
35   // The first block has a lower capacity due to the Header.
36   const int capacity_bytes = kBlockByteSize * num_blocks - kHeaderByteSize;
37   return capacity_bytes * 8;
38 }
39 
40 libtextclassifier3::StatusOr<std::unique_ptr<FileBackedBitmap>>
Create(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile::Strategy mmap_strategy)41 FileBackedBitmap::Create(const Filesystem* filesystem,
42                          std::string_view file_path,
43                          MemoryMappedFile::Strategy mmap_strategy) {
44   if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
45     return absl_ports::UnimplementedError(
46         "FileBackedBitmap currently doesn't support READ_WRITE_MANUAL_SYNC "
47         "mmap strategy.");
48   }
49 
50   ICING_ASSIGN_OR_RETURN(
51       MemoryMappedFile mmapper,
52       MemoryMappedFile::Create(*filesystem, file_path, mmap_strategy));
53 
54   auto bitmap = std::unique_ptr<FileBackedBitmap>(
55       new FileBackedBitmap(filesystem, file_path, std::move(mmapper)));
56 
57   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
58   // that can support error logging.
59   libtextclassifier3::Status status = bitmap->Initialize();
60   if (!status.ok()) {
61     ICING_LOG(ERROR) << status.error_message();
62     return status;
63   }
64   return bitmap;
65 }
66 
FileBackedBitmap(const Filesystem * filesystem,std::string_view file_path,MemoryMappedFile && mmapper)67 FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
68                                    std::string_view file_path,
69                                    MemoryMappedFile&& mmapper)
70     : filesystem_(filesystem),
71       file_path_(file_path),
72       mmapper_(std::make_unique<MemoryMappedFile>(std::move(mmapper))) {}
73 
~FileBackedBitmap()74 FileBackedBitmap::~FileBackedBitmap() {
75   // Only update if we have auto_sync setup, otherwise the checksum will be
76   // updated when the client calls PersistToDisk
77   if (mmapper_->strategy() ==
78       MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
79     // Any valid, initialized file should at least have 1 block.
80     if (mmapper_->region_size() >= kBlockByteSize &&
81         header().version == kCurrentVersion &&
82         header().state == Header::ChecksumState::kStale) {
83       if (!PersistToDisk().ok()) {
84         ICING_LOG(WARNING)
85             << "Failed to persist bitmap to disk while destructing "
86             << file_path_;
87       }
88     }
89   }
90 }
91 
header() const92 const FileBackedBitmap::Header& FileBackedBitmap::header() const {
93   return reinterpret_cast<const Header&>(*mmapper_->region());
94 }
95 
mutable_header()96 FileBackedBitmap::Header* FileBackedBitmap::mutable_header() {
97   return reinterpret_cast<Header*>(mmapper_->mutable_region());
98 }
99 
Initialize()100 libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
101   ICING_VLOG(1) << "Initialize bitmap file: " << file_path_;
102 
103   const bool is_new_bitmap = !filesystem_->FileExists(file_path_.c_str());
104 
105   int64_t file_size = 0;
106   if (is_new_bitmap) {
107     file_size = kBlockByteSize;
108     if (!filesystem_->Grow(file_path_.c_str(), file_size)) {
109       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
110           "Unable to create a minimal bitmap; "
111           "filename: %s; target size: %lld",
112           file_path_.c_str(), static_cast<long long>(file_size)));
113     }
114 
115     ICING_VLOG(1) << "Creating new bitmap in file: " << file_path_
116                   << " of size: " << file_size;
117   } else {
118     file_size = filesystem_->GetFileSize(file_path_.c_str());
119     if (file_size == Filesystem::kBadFileSize) {
120       return absl_ports::InternalError(IcingStringUtil::StringPrintf(
121           "File corrupted; filename: %s; size: %lld.", file_path_.c_str(),
122           static_cast<long long>(file_size)));
123     }
124 
125     ICING_VLOG(1) << "Loading bitmap from  file: " << file_path_
126                   << " of size: " << file_size;
127   }
128 
129   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
130   // that can support error logging.
131   libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
132   if (!status.ok()) {
133     ICING_LOG(ERROR) << status.error_message();
134     return status;
135   }
136 
137   if (is_new_bitmap) {
138     mutable_header()->version = kCurrentVersion;
139     mutable_header()->state = Header::ChecksumState::kStale;
140     mutable_header()->checksum = 0;
141 
142     return mmapper_->PersistToDisk();
143   }
144 
145   if (header().state == Header::ChecksumState::kStale) {
146     return absl_ports::InternalError(absl_ports::StrCat(
147         "File corrupted, has partially flushed data; filename: ", file_path_));
148   }
149 
150   if (header().checksum != ComputeChecksum()) {
151     return absl_ports::InternalError(absl_ports::StrCat(
152         "File corrupted, checksum doesn't match; filename: ", file_path_));
153   }
154 
155   if (header().version != kCurrentVersion) {
156     return UpgradeToCurrentVersion();
157   }
158 
159   return libtextclassifier3::Status::OK;
160 }
161 
UpgradeToCurrentVersion()162 libtextclassifier3::Status FileBackedBitmap::UpgradeToCurrentVersion() {
163   // Currently, only 1 format is supported.
164   return absl_ports::InternalError(IcingStringUtil::StringPrintf(
165       "File corrupted, mismatched version; filename: %s; %d vs %d.",
166       file_path_.c_str(), header().version, kCurrentVersion));
167 }
168 
SetWord(int word_index,Word word)169 libtextclassifier3::Status FileBackedBitmap::SetWord(int word_index,
170                                                      Word word) {
171   if (word_index >= NumBits() / kNumWordBits) {
172     ICING_LOG(ERROR) << "word_index: " << word_index
173                      << ", number of words: " << NumBits() / kNumWordBits;
174     return absl_ports::InternalError("Trying to access invalid memory");
175   }
176 
177   Word* bitmap_data =
178       reinterpret_cast<Word*>(mmapper_->mutable_region() + kHeaderByteSize);
179 
180   bitmap_data[word_index] = word;
181 
182   return libtextclassifier3::Status::OK;
183 }
184 
GetWord(int word_index) const185 libtextclassifier3::StatusOr<FileBackedBitmap::Word> FileBackedBitmap::GetWord(
186     int word_index) const {
187   if (word_index >= NumBits() / kNumWordBits) {
188     ICING_LOG(ERROR) << "word_index: " << word_index
189                      << ", number of words: " << NumBits() / kNumWordBits;
190     return absl_ports::InternalError("Trying to access invalid memory");
191   }
192 
193   const Word* bitmap_data = reinterpret_cast<const Word*>(
194       mmapper_->mutable_region() + kHeaderByteSize);
195   return bitmap_data[word_index];
196 }
197 
NumBits() const198 int FileBackedBitmap::NumBits() const {
199   return (mmapper_->region_size() - kHeaderByteSize) * 8;
200 }
201 
Set(int bit_index,bool bit_value)202 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
203                                                  bool bit_value) {
204   if (bit_index >= NumBits()) {
205     // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
206     // that can support error logging.
207     libtextclassifier3::Status status = GrowTo(bit_index);
208     if (!status.ok()) {
209       ICING_LOG(ERROR) << status.error_message();
210       return status;
211     }
212 
213     if (!bit_value) {
214       // All newly added bits are set to false.
215       return libtextclassifier3::Status::OK;
216     }
217   }
218 
219   // Figure out which word needs to be modified.
220   const int word_index = bit_index / kNumWordBits;
221   const int word_mask = 1u << (bit_index % kNumWordBits);
222 
223   ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
224   Word new_word = bit_value ? (old_word | word_mask) : old_word & ~word_mask;
225   if (new_word != old_word) {
226     ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
227     mutable_header()->state = Header::ChecksumState::kStale;
228   }
229 
230   return libtextclassifier3::Status::OK;
231 }
232 
Get(int bit_index) const233 libtextclassifier3::StatusOr<bool> FileBackedBitmap::Get(int bit_index) const {
234   if (bit_index >= NumBits()) {
235     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
236         "Bitmap file %s is of size %d and can't read bit_index %d.",
237         file_path_.c_str(), NumBits(), bit_index));
238   }
239 
240   const Word word_index = bit_index / kNumWordBits;
241   const Word word_mask = 1u << (bit_index % kNumWordBits);
242 
243   ICING_ASSIGN_OR_RETURN(Word word, GetWord(word_index));
244   return word & word_mask;
245 }
246 
FileSizeForBits(int num_bits)247 size_t FileBackedBitmap::FileSizeForBits(int num_bits) {
248   const int word_index = num_bits / kNumWordBits;
249   size_t new_file_size = kHeaderByteSize + (word_index + 1) * sizeof(Word);
250   return math_util::RoundUpTo(new_file_size,
251                               static_cast<size_t>(kBlockByteSize));
252 }
253 
GrowTo(int new_num_bits)254 libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
255   if (new_num_bits > kMaxNumBits) {
256     return absl_ports::ResourceExhaustedError(IcingStringUtil::StringPrintf(
257         "Bitmap file %s has a max-capacity of %d bits and cannot fit %d bits",
258         file_path_.c_str(), kMaxNumBits, new_num_bits));
259   }
260 
261   const size_t new_file_size = FileSizeForBits(new_num_bits);
262   if (!filesystem_->Grow(file_path_.c_str(), new_file_size)) {
263     return absl_ports::InternalError(
264         IcingStringUtil::StringPrintf("Growing file %s to new size %zd failed",
265                                       file_path_.c_str(), new_file_size));
266   }
267 
268   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
269   // that can support error logging.
270   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
271   if (!status.ok()) {
272     ICING_LOG(ERROR) << status.error_message();
273     return status;
274   }
275 
276   ICING_VLOG(1) << "Grew file " << file_path_ << " to new size "
277                 << new_file_size;
278   mutable_header()->state = Header::ChecksumState::kStale;
279   return libtextclassifier3::Status::OK;
280 }
281 
TruncateTo(int new_num_bits)282 libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
283   if (new_num_bits > NumBits()) {
284     return libtextclassifier3::Status::OK;
285   }
286 
287   const size_t new_file_size = FileSizeForBits(new_num_bits);
288   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
289   // that can support error logging.
290   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
291   if (!status.ok()) {
292     ICING_LOG(ERROR) << status.error_message();
293     return status;
294   }
295   if (!filesystem_->Truncate(file_path_.c_str(), new_file_size)) {
296     return absl_ports::InternalError(IcingStringUtil::StringPrintf(
297         "Truncating file %s to new size %zd failed", file_path_.c_str(),
298         new_file_size));
299   }
300 
301   const int word_index = new_num_bits / kNumWordBits;
302   // Mask to only keep bits <= new_num_bits and clear everything else.
303   const Word word_mask = (1u << (new_num_bits % kNumWordBits)) - 1;
304 
305   ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
306   Word new_word = old_word & word_mask;
307   ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
308 
309   // TODO(cassiewang) It might be worth replacing this with memset().
310   const int num_words = NumBits() / kNumWordBits;
311   for (int i = word_index + 1; i < num_words; ++i) {
312     ICING_RETURN_IF_ERROR(SetWord(i, 0));
313   }
314 
315   mutable_header()->state = Header::ChecksumState::kStale;
316   return libtextclassifier3::Status::OK;
317 }
318 
PersistToDisk()319 libtextclassifier3::Status FileBackedBitmap::PersistToDisk() {
320   mutable_header()->checksum = ComputeChecksum();
321   mutable_header()->state = Header::ChecksumState::kFresh;
322   return mmapper_->PersistToDisk();
323 }
324 
ComputeChecksum() const325 uint32_t FileBackedBitmap::ComputeChecksum() const {
326   std::string_view bitmap_bytes(mmapper_->region() + kHeaderByteSize,
327                                 NumBits() / 8);
328   return Crc32().Append(bitmap_bytes);
329 }
330 
331 }  // namespace lib
332 }  // namespace icing
333