1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/index/numeric/integer-index.h"
16
17 #include <algorithm>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/file/destructible-directory.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/file/memory-mapped-file.h"
32 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
33 #include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
34 #include "icing/index/numeric/integer-index-storage.h"
35 #include "icing/index/numeric/posting-list-integer-index-serializer.h"
36 #include "icing/store/document-id.h"
37 #include "icing/util/crc32.h"
38 #include "icing/util/status-macros.h"
39
40 namespace icing {
41 namespace lib {
42
43 namespace {
44
45 // Helper function to get the file name of metadata.
GetMetadataFileName()46 std::string GetMetadataFileName() {
47 return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
48 }
49
50 // Helper function to get the file path of metadata according to the given
51 // working directory.
GetMetadataFilePath(std::string_view working_path)52 std::string GetMetadataFilePath(std::string_view working_path) {
53 return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
54 }
55
56 constexpr std::string_view kWildcardPropertyIndexFileName =
57 "wildcard_property_index";
58
59 constexpr std::string_view kWildcardPropertyStorageFileName =
60 "wildcard_property_storage";
61
GetWildcardPropertyStorageFilePath(std::string_view working_path)62 std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
63 return absl_ports::StrCat(working_path, "/",
64 kWildcardPropertyStorageFileName);
65 }
66
67 // Helper function to get the sub working (directory) path of
68 // IntegerIndexStorage according to the given working directory and property
69 // path.
GetPropertyIndexStoragePath(std::string_view working_path,std::string_view property_path)70 std::string GetPropertyIndexStoragePath(std::string_view working_path,
71 std::string_view property_path) {
72 return absl_ports::StrCat(working_path, "/", property_path);
73 }
74
75 // Helper function to get all existing property paths by listing all
76 // directories.
77 libtextclassifier3::StatusOr<std::vector<std::string>>
GetAllExistingPropertyPaths(const Filesystem & filesystem,const std::string & working_path)78 GetAllExistingPropertyPaths(const Filesystem& filesystem,
79 const std::string& working_path) {
80 std::vector<std::string> property_paths;
81 std::unordered_set<std::string> excludes = {
82 GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
83 if (!filesystem.ListDirectory(working_path.c_str(), excludes,
84 /*recursive=*/false, &property_paths)) {
85 return absl_ports::InternalError("Failed to list directory");
86 }
87 return property_paths;
88 }
89
90 libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
GetPropertyIntegerIndexStorageMap(const Filesystem & filesystem,const std::string & working_path,PostingListIntegerIndexSerializer * posting_list_serializer,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)91 GetPropertyIntegerIndexStorageMap(
92 const Filesystem& filesystem, const std::string& working_path,
93 PostingListIntegerIndexSerializer* posting_list_serializer,
94 int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
95 ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
96 GetAllExistingPropertyPaths(filesystem, working_path));
97
98 IntegerIndex::PropertyToStorageMapType property_to_storage_map;
99 for (const std::string& property_path : property_paths) {
100 if (property_path == kWildcardPropertyIndexFileName) {
101 continue;
102 }
103 std::string storage_working_path =
104 GetPropertyIndexStoragePath(working_path, property_path);
105 ICING_ASSIGN_OR_RETURN(
106 std::unique_ptr<IntegerIndexStorage> storage,
107 IntegerIndexStorage::Create(
108 filesystem, storage_working_path,
109 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
110 pre_mapping_fbv),
111 posting_list_serializer));
112 property_to_storage_map.insert(
113 std::make_pair(property_path, std::move(storage)));
114 }
115
116 return property_to_storage_map;
117 }
118
119 // RETURNS:
120 // - On success, an unordered_set representing the list of property paths
121 // stored in the WildcardPropertyStorage managed by property_storage
122 // - INTERNAL_ERROR on any failure to successfully read the underlying proto.
CreatePropertySet(const FileBackedProto<WildcardPropertyStorage> & property_storage)123 libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
124 const FileBackedProto<WildcardPropertyStorage>& property_storage) {
125 std::unordered_set<std::string> wildcard_properties_set;
126 auto wildcard_properties_or = property_storage.Read();
127 if (!wildcard_properties_or.ok()) {
128 if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
129 return wildcard_properties_set;
130 }
131 return wildcard_properties_or.status();
132 }
133
134 const WildcardPropertyStorage* wildcard_properties =
135 wildcard_properties_or.ValueOrDie();
136 wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
137 for (const std::string& property : wildcard_properties->property_entries()) {
138 wildcard_properties_set.insert(property);
139 }
140 return wildcard_properties_set;
141 }
142
143 } // namespace
144
IndexAllBufferedKeys()145 libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
146 integer_index_.SetDirty();
147
148 auto iter = integer_index_.property_to_storage_map_.find(property_path_);
149 IntegerIndexStorage* target_storage = nullptr;
150 // 1. Check if this property already has its own individual index.
151 if (iter != integer_index_.property_to_storage_map_.end()) {
152 target_storage = iter->second.get();
153 // 2. Check if this property was added to wildcard storage.
154 } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
155 integer_index_.wildcard_properties_set_.end()) {
156 target_storage = integer_index_.wildcard_index_storage_.get();
157 // 3. Check if we've reach the limit of individual property storages.
158 } else if (integer_index_.property_to_storage_map_.size() >=
159 kMaxPropertyStorages) {
160 // 3a. Create the wildcard storage if it doesn't exist.
161 if (integer_index_.wildcard_index_storage_ == nullptr) {
162 ICING_ASSIGN_OR_RETURN(
163 integer_index_.wildcard_index_storage_,
164 IntegerIndexStorage::Create(
165 integer_index_.filesystem_,
166 GetPropertyIndexStoragePath(integer_index_.working_path_,
167 kWildcardPropertyIndexFileName),
168 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
169 pre_mapping_fbv_),
170 integer_index_.posting_list_serializer_.get()));
171 }
172 ICING_RETURN_IF_ERROR(
173 integer_index_.AddPropertyToWildcardStorage(property_path_));
174 target_storage = integer_index_.wildcard_index_storage_.get();
175 // 4. Create a new individual storage for this new property.
176 } else {
177 ICING_ASSIGN_OR_RETURN(
178 std::unique_ptr<IntegerIndexStorage> new_storage,
179 IntegerIndexStorage::Create(
180 integer_index_.filesystem_,
181 GetPropertyIndexStoragePath(integer_index_.working_path_,
182 property_path_),
183 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
184 pre_mapping_fbv_),
185 integer_index_.posting_list_serializer_.get()));
186 target_storage = new_storage.get();
187 integer_index_.property_to_storage_map_.insert(
188 std::make_pair(property_path_, std::move(new_storage)));
189 }
190
191 return target_storage->AddKeys(document_id_, section_id_,
192 std::move(seen_keys_));
193 }
194
195 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
Create(const Filesystem & filesystem,std::string working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)196 IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
197 int32_t num_data_threshold_for_bucket_split,
198 bool pre_mapping_fbv) {
199 if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
200 // Discard working_path if metadata file is missing, and reinitialize.
201 if (filesystem.DirectoryExists(working_path.c_str())) {
202 ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
203 }
204 return InitializeNewFiles(filesystem, std::move(working_path),
205 num_data_threshold_for_bucket_split,
206 pre_mapping_fbv);
207 }
208 return InitializeExistingFiles(filesystem, std::move(working_path),
209 num_data_threshold_for_bucket_split,
210 pre_mapping_fbv);
211 }
212
~IntegerIndex()213 IntegerIndex::~IntegerIndex() {
214 if (!PersistToDisk().ok()) {
215 ICING_LOG(WARNING)
216 << "Failed to persist integer index to disk while destructing "
217 << working_path_;
218 }
219 }
220
221 libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
GetIterator(std::string_view property_path,int64_t key_lower,int64_t key_upper,const DocumentStore & document_store,const SchemaStore & schema_store,int64_t current_time_ms) const222 IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
223 int64_t key_upper,
224 const DocumentStore& document_store,
225 const SchemaStore& schema_store,
226 int64_t current_time_ms) const {
227 std::string property_path_str(property_path);
228 auto iter = property_to_storage_map_.find(property_path_str);
229 if (iter != property_to_storage_map_.end()) {
230 return iter->second->GetIterator(key_lower, key_upper);
231 }
232
233 if (wildcard_properties_set_.find(property_path_str) !=
234 wildcard_properties_set_.end()) {
235 ICING_ASSIGN_OR_RETURN(
236 std::unique_ptr<DocHitInfoIterator> delegate,
237 wildcard_index_storage_->GetIterator(key_lower, key_upper));
238 std::set<std::string> property_paths = {std::move(property_path_str)};
239 return DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
240 std::move(delegate), &document_store, &schema_store,
241 std::move(property_paths), current_time_ms);
242 }
243
244 // Return an empty iterator.
245 return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
246 /*numeric_index_iter=*/nullptr);
247 }
248
AddPropertyToWildcardStorage(const std::string & property_path)249 libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
250 const std::string& property_path) {
251 SetDirty();
252
253 WildcardPropertyStorage wildcard_properties;
254 wildcard_properties.mutable_property_entries()->Reserve(
255 wildcard_properties_set_.size());
256 for (const std::string& property_path : wildcard_properties_set_) {
257 wildcard_properties.add_property_entries(property_path);
258 }
259 ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
260 std::make_unique<WildcardPropertyStorage>(
261 std::move(wildcard_properties))));
262
263 wildcard_properties_set_.insert(property_path);
264 return libtextclassifier3::Status::OK;
265 }
266
Optimize(const std::vector<DocumentId> & document_id_old_to_new,DocumentId new_last_added_document_id)267 libtextclassifier3::Status IntegerIndex::Optimize(
268 const std::vector<DocumentId>& document_id_old_to_new,
269 DocumentId new_last_added_document_id) {
270 std::string temp_working_path = working_path_ + "_temp";
271 ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
272
273 DestructibleDirectory temp_working_path_ddir(&filesystem_,
274 std::move(temp_working_path));
275 if (!temp_working_path_ddir.is_valid()) {
276 return absl_ports::InternalError(
277 "Unable to create temp directory to build new integer index");
278 }
279
280 {
281 // Transfer all indexed data from current integer index to new integer
282 // index. Also PersistToDisk and destruct the instance after finishing, so
283 // we can safely swap directories later.
284 ICING_ASSIGN_OR_RETURN(
285 std::unique_ptr<IntegerIndex> new_integer_index,
286 Create(filesystem_, temp_working_path_ddir.dir(),
287 num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
288 ICING_RETURN_IF_ERROR(
289 TransferIndex(document_id_old_to_new, new_integer_index.get()));
290 new_integer_index->set_last_added_document_id(new_last_added_document_id);
291 ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
292 }
293
294 // Destruct current storage instances to safely swap directories.
295 metadata_mmapped_file_.reset();
296 property_to_storage_map_.clear();
297 wildcard_index_storage_.reset();
298 wildcard_property_storage_.reset();
299 if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
300 working_path_.c_str())) {
301 return absl_ports::InternalError(
302 "Unable to apply new integer index due to failed swap");
303 }
304
305 // Reinitialize the integer index.
306 std::string metadata_file_path = GetMetadataFilePath(working_path_);
307 ICING_ASSIGN_OR_RETURN(
308 MemoryMappedFile metadata_mmapped_file,
309 MemoryMappedFile::Create(filesystem_, metadata_file_path,
310 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
311 /*max_file_size=*/kMetadataFileSize,
312 /*pre_mapping_file_offset=*/0,
313 /*pre_mapping_mmap_size=*/kMetadataFileSize));
314 if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
315 return absl_ports::InternalError(
316 "Invalid metadata file size after Optimize");
317 }
318 metadata_mmapped_file_ =
319 std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
320
321 // Recreate all of the data structures tracking the wildcard storage.
322 std::string wildcard_property_path =
323 GetWildcardPropertyStorageFilePath(working_path_);
324 wildcard_property_storage_ =
325 std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
326 filesystem_, wildcard_property_path);
327
328 ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
329 CreatePropertySet(*wildcard_property_storage_));
330 if (!wildcard_properties_set_.empty()) {
331 ICING_ASSIGN_OR_RETURN(
332 wildcard_index_storage_,
333 IntegerIndexStorage::Create(
334 filesystem_,
335 GetPropertyIndexStoragePath(working_path_,
336 kWildcardPropertyIndexFileName),
337 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
338 pre_mapping_fbv_),
339 posting_list_serializer_.get()));
340 }
341
342 // Initialize all existing integer index storages.
343 ICING_ASSIGN_OR_RETURN(
344 property_to_storage_map_,
345 GetPropertyIntegerIndexStorageMap(
346 filesystem_, working_path_, posting_list_serializer_.get(),
347 num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
348
349 return libtextclassifier3::Status::OK;
350 }
351
Clear()352 libtextclassifier3::Status IntegerIndex::Clear() {
353 SetDirty();
354
355 // Step 1: clear property_to_storage_map_.
356 property_to_storage_map_.clear();
357 wildcard_index_storage_.reset();
358
359 // Step 2: delete all IntegerIndexStorages. It is safe because there is no
360 // active IntegerIndexStorage after clearing the map.
361 ICING_ASSIGN_OR_RETURN(
362 std::vector<std::string> property_paths,
363 GetAllExistingPropertyPaths(filesystem_, working_path_));
364 for (const std::string& property_path : property_paths) {
365 ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
366 filesystem_,
367 GetPropertyIndexStoragePath(working_path_, property_path)));
368 }
369
370 // Step 3: Delete the wildcard property storage
371 std::string wildcard_property_path =
372 GetWildcardPropertyStorageFilePath(working_path_);
373 if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
374 !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
375 return absl_ports::InternalError(absl_ports::StrCat(
376 "Unable to delete file at path ", wildcard_property_path));
377 }
378
379 info().last_added_document_id = kInvalidDocumentId;
380 return libtextclassifier3::Status::OK;
381 }
382
383 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)384 IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
385 std::string&& working_path,
386 int32_t num_data_threshold_for_bucket_split,
387 bool pre_mapping_fbv) {
388 // Create working directory.
389 if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
390 return absl_ports::InternalError(
391 absl_ports::StrCat("Failed to create directory: ", working_path));
392 }
393
394 // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
395 // call GrowAndRemapIfNecessary to grow the underlying file.
396 ICING_ASSIGN_OR_RETURN(
397 MemoryMappedFile metadata_mmapped_file,
398 MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
399 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
400 /*max_file_size=*/kMetadataFileSize,
401 /*pre_mapping_file_offset=*/0,
402 /*pre_mapping_mmap_size=*/kMetadataFileSize));
403 ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
404 /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
405
406 std::string wildcard_property_path =
407 GetWildcardPropertyStorageFilePath(working_path);
408 auto wildcard_property_storage =
409 std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
410 filesystem, wildcard_property_path);
411
412 // Create instance.
413 auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
414 filesystem, std::move(working_path),
415 std::make_unique<PostingListIntegerIndexSerializer>(),
416 std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
417 /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
418 /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
419 num_data_threshold_for_bucket_split, pre_mapping_fbv));
420
421 // Initialize info content by writing mapped memory directly.
422 Info& info_ref = new_integer_index->info();
423 info_ref.magic = Info::kMagic;
424 info_ref.last_added_document_id = kInvalidDocumentId;
425 info_ref.num_data_threshold_for_bucket_split =
426 num_data_threshold_for_bucket_split;
427 // Initialize new PersistentStorage. The initial checksums will be computed
428 // and set via InitializeNewStorage.
429 ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
430
431 return new_integer_index;
432 }
433
434 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeExistingFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)435 IntegerIndex::InitializeExistingFiles(
436 const Filesystem& filesystem, std::string&& working_path,
437 int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
438 // Mmap the content of the crcs and info.
439 ICING_ASSIGN_OR_RETURN(
440 MemoryMappedFile metadata_mmapped_file,
441 MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
442 MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
443 /*max_file_size=*/kMetadataFileSize,
444 /*pre_mapping_file_offset=*/0,
445 /*pre_mapping_mmap_size=*/kMetadataFileSize));
446 if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
447 return absl_ports::FailedPreconditionError("Incorrect metadata file size");
448 }
449
450 auto posting_list_serializer =
451 std::make_unique<PostingListIntegerIndexSerializer>();
452
453 // Initialize all existing integer index storages.
454 ICING_ASSIGN_OR_RETURN(
455 PropertyToStorageMapType property_to_storage_map,
456 GetPropertyIntegerIndexStorageMap(
457 filesystem, working_path, posting_list_serializer.get(),
458 num_data_threshold_for_bucket_split, pre_mapping_fbv));
459
460 std::string wildcard_property_path =
461 GetWildcardPropertyStorageFilePath(working_path);
462 auto wildcard_property_storage =
463 std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
464 filesystem, wildcard_property_path);
465
466 ICING_ASSIGN_OR_RETURN(
467 std::unordered_set<std::string> wildcard_properties_set,
468 CreatePropertySet(*wildcard_property_storage));
469
470 std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
471 if (!wildcard_properties_set.empty()) {
472 ICING_ASSIGN_OR_RETURN(
473 wildcard_index_storage,
474 IntegerIndexStorage::Create(
475 filesystem,
476 GetPropertyIndexStoragePath(working_path,
477 kWildcardPropertyIndexFileName),
478 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
479 pre_mapping_fbv),
480 posting_list_serializer.get()));
481 }
482
483 // Create instance.
484 auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
485 filesystem, std::move(working_path), std::move(posting_list_serializer),
486 std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
487 std::move(property_to_storage_map), std::move(wildcard_property_storage),
488 std::move(wildcard_properties_set), std::move(wildcard_index_storage),
489 num_data_threshold_for_bucket_split, pre_mapping_fbv));
490 // Initialize existing PersistentStorage. Checksums will be validated.
491 ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
492
493 // Validate magic.
494 if (integer_index->info().magic != Info::kMagic) {
495 return absl_ports::FailedPreconditionError("Incorrect magic value");
496 }
497
498 // If num_data_threshold_for_bucket_split mismatches, then return error to let
499 // caller rebuild.
500 if (integer_index->info().num_data_threshold_for_bucket_split !=
501 num_data_threshold_for_bucket_split) {
502 return absl_ports::FailedPreconditionError(
503 "Mismatch num_data_threshold_for_bucket_split");
504 }
505
506 return integer_index;
507 }
508
509 libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
TransferIntegerIndexStorage(const std::vector<DocumentId> & document_id_old_to_new,const IntegerIndexStorage * old_storage,const std::string & property_path,IntegerIndex * new_integer_index) const510 IntegerIndex::TransferIntegerIndexStorage(
511 const std::vector<DocumentId>& document_id_old_to_new,
512 const IntegerIndexStorage* old_storage, const std::string& property_path,
513 IntegerIndex* new_integer_index) const {
514 std::string new_storage_working_path = GetPropertyIndexStoragePath(
515 new_integer_index->working_path_, property_path);
516 ICING_ASSIGN_OR_RETURN(
517 std::unique_ptr<IntegerIndexStorage> new_storage,
518 IntegerIndexStorage::Create(
519 new_integer_index->filesystem_, new_storage_working_path,
520 IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
521 pre_mapping_fbv_),
522 new_integer_index->posting_list_serializer_.get()));
523
524 ICING_RETURN_IF_ERROR(
525 old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
526
527 if (new_storage->num_data() == 0) {
528 new_storage.reset();
529 ICING_RETURN_IF_ERROR(
530 IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
531 }
532 return new_storage;
533 }
534
TransferWildcardStorage(IntegerIndex * new_integer_index) const535 libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
536 IntegerIndex* new_integer_index) const {
537 auto property_storage = std::make_unique<WildcardPropertyStorage>();
538 property_storage->mutable_property_entries()->Reserve(
539 wildcard_properties_set_.size());
540 for (const std::string& property : wildcard_properties_set_) {
541 property_storage->add_property_entries(property);
542 }
543
544 ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
545 std::move(property_storage)));
546 new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
547 return libtextclassifier3::Status::OK;
548 }
549
TransferIndex(const std::vector<DocumentId> & document_id_old_to_new,IntegerIndex * new_integer_index) const550 libtextclassifier3::Status IntegerIndex::TransferIndex(
551 const std::vector<DocumentId>& document_id_old_to_new,
552 IntegerIndex* new_integer_index) const {
553 // Transfer over the integer index storages
554 std::unique_ptr<IntegerIndexStorage> new_storage;
555 for (const auto& [property_path, old_storage] : property_to_storage_map_) {
556 ICING_ASSIGN_OR_RETURN(
557 new_storage,
558 TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
559 property_path, new_integer_index));
560 if (new_storage != nullptr) {
561 new_integer_index->property_to_storage_map_.insert(
562 {property_path, std::move(new_storage)});
563 }
564 }
565 if (wildcard_index_storage_ != nullptr) {
566 ICING_ASSIGN_OR_RETURN(
567 new_storage,
568 TransferIntegerIndexStorage(
569 document_id_old_to_new, wildcard_index_storage_.get(),
570 std::string(kWildcardPropertyIndexFileName), new_integer_index));
571 if (new_storage != nullptr) {
572 new_integer_index->wildcard_index_storage_ = std::move(new_storage);
573
574 // The only time we need to copy over the list of properties using
575 // wildcard storage is if wildcard_index_storage and new_storage are both
576 // non-null. Otherwise, the new wildcard index storage won't have any
577 // data.
578 ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
579 }
580 }
581
582 return libtextclassifier3::Status::OK;
583 }
584
PersistStoragesToDisk(bool force)585 libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk(bool force) {
586 if (!force && !is_storage_dirty()) {
587 return libtextclassifier3::Status::OK;
588 }
589
590 for (auto& [_, storage] : property_to_storage_map_) {
591 ICING_RETURN_IF_ERROR(storage->PersistToDisk());
592 }
593 // No need to persist wildcard_properties_storage_. All calls to
594 // FileBackedProto::Write are fully written through at the time of the call.
595 if (wildcard_index_storage_) {
596 ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
597 }
598 return libtextclassifier3::Status::OK;
599 }
600
PersistMetadataToDisk(bool force)601 libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk(bool force) {
602 if (!force && !is_info_dirty() && !is_storage_dirty()) {
603 return libtextclassifier3::Status::OK;
604 }
605
606 // Changes should have been applied to the underlying file when using
607 // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
608 // extra safety step to ensure they are written out.
609 return metadata_mmapped_file_->PersistToDisk();
610 }
611
ComputeInfoChecksum(bool force)612 libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum(
613 bool force) {
614 if (!force && !is_info_dirty()) {
615 return Crc32(crcs().component_crcs.info_crc);
616 }
617
618 return info().ComputeChecksum();
619 }
620
ComputeStoragesChecksum(bool force)621 libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum(
622 bool force) {
623 if (!force && !is_storage_dirty()) {
624 return Crc32(crcs().component_crcs.storages_crc);
625 }
626
627 // XOR all crcs of all storages. Since XOR is commutative and associative,
628 // the order doesn't matter.
629 uint32_t storages_checksum = 0;
630 for (auto& [property_path, storage] : property_to_storage_map_) {
631 ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
632 storage_crc.Append(property_path);
633
634 storages_checksum ^= storage_crc.Get();
635 }
636
637 if (wildcard_index_storage_ != nullptr) {
638 ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
639 wildcard_index_storage_->UpdateChecksums());
640 storages_checksum ^= storage_crc.Get();
641 }
642
643 ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
644 wildcard_property_storage_->ComputeChecksum());
645 storages_checksum ^= wildcard_properties_crc.Get();
646
647 return Crc32(storages_checksum);
648 }
649
650 } // namespace lib
651 } // namespace icing
652