• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/numeric/integer-index.h"
16 
17 #include <algorithm>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <utility>
23 #include <vector>
24 
25 #include "icing/text_classifier/lib3/utils/base/status.h"
26 #include "icing/text_classifier/lib3/utils/base/statusor.h"
27 #include "icing/absl_ports/canonical_errors.h"
28 #include "icing/absl_ports/str_cat.h"
29 #include "icing/file/destructible-directory.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/file/memory-mapped-file.h"
32 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
33 #include "icing/index/numeric/doc-hit-info-iterator-numeric.h"
34 #include "icing/index/numeric/integer-index-storage.h"
35 #include "icing/index/numeric/posting-list-integer-index-serializer.h"
36 #include "icing/store/document-id.h"
37 #include "icing/util/crc32.h"
38 #include "icing/util/status-macros.h"
39 
40 namespace icing {
41 namespace lib {
42 
43 namespace {
44 
45 // Helper function to get the file name of metadata.
GetMetadataFileName()46 std::string GetMetadataFileName() {
47   return absl_ports::StrCat(IntegerIndex::kFilePrefix, ".m");
48 }
49 
50 // Helper function to get the file path of metadata according to the given
51 // working directory.
GetMetadataFilePath(std::string_view working_path)52 std::string GetMetadataFilePath(std::string_view working_path) {
53   return absl_ports::StrCat(working_path, "/", GetMetadataFileName());
54 }
55 
56 constexpr std::string_view kWildcardPropertyIndexFileName =
57     "wildcard_property_index";
58 
59 constexpr std::string_view kWildcardPropertyStorageFileName =
60     "wildcard_property_storage";
61 
GetWildcardPropertyStorageFilePath(std::string_view working_path)62 std::string GetWildcardPropertyStorageFilePath(std::string_view working_path) {
63   return absl_ports::StrCat(working_path, "/",
64                             kWildcardPropertyStorageFileName);
65 }
66 
67 // Helper function to get the sub working (directory) path of
68 // IntegerIndexStorage according to the given working directory and property
69 // path.
GetPropertyIndexStoragePath(std::string_view working_path,std::string_view property_path)70 std::string GetPropertyIndexStoragePath(std::string_view working_path,
71                                         std::string_view property_path) {
72   return absl_ports::StrCat(working_path, "/", property_path);
73 }
74 
75 // Helper function to get all existing property paths by listing all
76 // directories.
77 libtextclassifier3::StatusOr<std::vector<std::string>>
GetAllExistingPropertyPaths(const Filesystem & filesystem,const std::string & working_path)78 GetAllExistingPropertyPaths(const Filesystem& filesystem,
79                             const std::string& working_path) {
80   std::vector<std::string> property_paths;
81   std::unordered_set<std::string> excludes = {
82       GetMetadataFileName(), std::string(kWildcardPropertyStorageFileName)};
83   if (!filesystem.ListDirectory(working_path.c_str(), excludes,
84                                 /*recursive=*/false, &property_paths)) {
85     return absl_ports::InternalError("Failed to list directory");
86   }
87   return property_paths;
88 }
89 
90 libtextclassifier3::StatusOr<IntegerIndex::PropertyToStorageMapType>
GetPropertyIntegerIndexStorageMap(const Filesystem & filesystem,const std::string & working_path,PostingListIntegerIndexSerializer * posting_list_serializer,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)91 GetPropertyIntegerIndexStorageMap(
92     const Filesystem& filesystem, const std::string& working_path,
93     PostingListIntegerIndexSerializer* posting_list_serializer,
94     int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
95   ICING_ASSIGN_OR_RETURN(std::vector<std::string> property_paths,
96                          GetAllExistingPropertyPaths(filesystem, working_path));
97 
98   IntegerIndex::PropertyToStorageMapType property_to_storage_map;
99   for (const std::string& property_path : property_paths) {
100     if (property_path == kWildcardPropertyIndexFileName) {
101       continue;
102     }
103     std::string storage_working_path =
104         GetPropertyIndexStoragePath(working_path, property_path);
105     ICING_ASSIGN_OR_RETURN(
106         std::unique_ptr<IntegerIndexStorage> storage,
107         IntegerIndexStorage::Create(
108             filesystem, storage_working_path,
109             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
110                                          pre_mapping_fbv),
111             posting_list_serializer));
112     property_to_storage_map.insert(
113         std::make_pair(property_path, std::move(storage)));
114   }
115 
116   return property_to_storage_map;
117 }
118 
119 // RETURNS:
120 //   - On success, an unordered_set representing the list of property paths
121 //     stored in the WildcardPropertyStorage managed by property_storage
122 //   - INTERNAL_ERROR on any failure to successfully read the underlying proto.
CreatePropertySet(const FileBackedProto<WildcardPropertyStorage> & property_storage)123 libtextclassifier3::StatusOr<std::unordered_set<std::string>> CreatePropertySet(
124     const FileBackedProto<WildcardPropertyStorage>& property_storage) {
125   std::unordered_set<std::string> wildcard_properties_set;
126   auto wildcard_properties_or = property_storage.Read();
127   if (!wildcard_properties_or.ok()) {
128     if (absl_ports::IsNotFound(wildcard_properties_or.status())) {
129       return wildcard_properties_set;
130     }
131     return wildcard_properties_or.status();
132   }
133 
134   const WildcardPropertyStorage* wildcard_properties =
135       wildcard_properties_or.ValueOrDie();
136   wildcard_properties_set.reserve(wildcard_properties->property_entries_size());
137   for (const std::string& property : wildcard_properties->property_entries()) {
138     wildcard_properties_set.insert(property);
139   }
140   return wildcard_properties_set;
141 }
142 
143 }  // namespace
144 
IndexAllBufferedKeys()145 libtextclassifier3::Status IntegerIndex::Editor::IndexAllBufferedKeys() && {
146   integer_index_.SetDirty();
147 
148   auto iter = integer_index_.property_to_storage_map_.find(property_path_);
149   IntegerIndexStorage* target_storage = nullptr;
150   // 1. Check if this property already has its own individual index.
151   if (iter != integer_index_.property_to_storage_map_.end()) {
152     target_storage = iter->second.get();
153     // 2. Check if this property was added to wildcard storage.
154   } else if (integer_index_.wildcard_properties_set_.find(property_path_) !=
155              integer_index_.wildcard_properties_set_.end()) {
156     target_storage = integer_index_.wildcard_index_storage_.get();
157     // 3. Check if we've reach the limit of individual property storages.
158   } else if (integer_index_.property_to_storage_map_.size() >=
159              kMaxPropertyStorages) {
160     // 3a. Create the wildcard storage if it doesn't exist.
161     if (integer_index_.wildcard_index_storage_ == nullptr) {
162       ICING_ASSIGN_OR_RETURN(
163           integer_index_.wildcard_index_storage_,
164           IntegerIndexStorage::Create(
165               integer_index_.filesystem_,
166               GetPropertyIndexStoragePath(integer_index_.working_path_,
167                                           kWildcardPropertyIndexFileName),
168               IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
169                                            pre_mapping_fbv_),
170               integer_index_.posting_list_serializer_.get()));
171     }
172     ICING_RETURN_IF_ERROR(
173         integer_index_.AddPropertyToWildcardStorage(property_path_));
174     target_storage = integer_index_.wildcard_index_storage_.get();
175     // 4. Create a new individual storage for this new property.
176   } else {
177     ICING_ASSIGN_OR_RETURN(
178         std::unique_ptr<IntegerIndexStorage> new_storage,
179         IntegerIndexStorage::Create(
180             integer_index_.filesystem_,
181             GetPropertyIndexStoragePath(integer_index_.working_path_,
182                                         property_path_),
183             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
184                                          pre_mapping_fbv_),
185             integer_index_.posting_list_serializer_.get()));
186     target_storage = new_storage.get();
187     integer_index_.property_to_storage_map_.insert(
188         std::make_pair(property_path_, std::move(new_storage)));
189   }
190 
191   return target_storage->AddKeys(document_id_, section_id_,
192                                  std::move(seen_keys_));
193 }
194 
195 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
Create(const Filesystem & filesystem,std::string working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)196 IntegerIndex::Create(const Filesystem& filesystem, std::string working_path,
197                      int32_t num_data_threshold_for_bucket_split,
198                      bool pre_mapping_fbv) {
199   if (!filesystem.FileExists(GetMetadataFilePath(working_path).c_str())) {
200     // Discard working_path if metadata file is missing, and reinitialize.
201     if (filesystem.DirectoryExists(working_path.c_str())) {
202       ICING_RETURN_IF_ERROR(Discard(filesystem, working_path));
203     }
204     return InitializeNewFiles(filesystem, std::move(working_path),
205                               num_data_threshold_for_bucket_split,
206                               pre_mapping_fbv);
207   }
208   return InitializeExistingFiles(filesystem, std::move(working_path),
209                                  num_data_threshold_for_bucket_split,
210                                  pre_mapping_fbv);
211 }
212 
~IntegerIndex()213 IntegerIndex::~IntegerIndex() {
214   if (!PersistToDisk().ok()) {
215     ICING_LOG(WARNING)
216         << "Failed to persist integer index to disk while destructing "
217         << working_path_;
218   }
219 }
220 
221 libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
GetIterator(std::string_view property_path,int64_t key_lower,int64_t key_upper,const DocumentStore & document_store,const SchemaStore & schema_store,int64_t current_time_ms) const222 IntegerIndex::GetIterator(std::string_view property_path, int64_t key_lower,
223                           int64_t key_upper,
224                           const DocumentStore& document_store,
225                           const SchemaStore& schema_store,
226                           int64_t current_time_ms) const {
227   std::string property_path_str(property_path);
228   auto iter = property_to_storage_map_.find(property_path_str);
229   if (iter != property_to_storage_map_.end()) {
230     return iter->second->GetIterator(key_lower, key_upper);
231   }
232 
233   if (wildcard_properties_set_.find(property_path_str) !=
234       wildcard_properties_set_.end()) {
235     ICING_ASSIGN_OR_RETURN(
236         std::unique_ptr<DocHitInfoIterator> delegate,
237         wildcard_index_storage_->GetIterator(key_lower, key_upper));
238     std::set<std::string> property_paths = {std::move(property_path_str)};
239     return DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
240         std::move(delegate), &document_store, &schema_store,
241         std::move(property_paths), current_time_ms);
242   }
243 
244   // Return an empty iterator.
245   return std::make_unique<DocHitInfoIteratorNumeric<int64_t>>(
246       /*numeric_index_iter=*/nullptr);
247 }
248 
AddPropertyToWildcardStorage(const std::string & property_path)249 libtextclassifier3::Status IntegerIndex::AddPropertyToWildcardStorage(
250     const std::string& property_path) {
251   SetDirty();
252 
253   WildcardPropertyStorage wildcard_properties;
254   wildcard_properties.mutable_property_entries()->Reserve(
255       wildcard_properties_set_.size());
256   for (const std::string& property_path : wildcard_properties_set_) {
257     wildcard_properties.add_property_entries(property_path);
258   }
259   ICING_RETURN_IF_ERROR(wildcard_property_storage_->Write(
260       std::make_unique<WildcardPropertyStorage>(
261           std::move(wildcard_properties))));
262 
263   wildcard_properties_set_.insert(property_path);
264   return libtextclassifier3::Status::OK;
265 }
266 
Optimize(const std::vector<DocumentId> & document_id_old_to_new,DocumentId new_last_added_document_id)267 libtextclassifier3::Status IntegerIndex::Optimize(
268     const std::vector<DocumentId>& document_id_old_to_new,
269     DocumentId new_last_added_document_id) {
270   std::string temp_working_path = working_path_ + "_temp";
271   ICING_RETURN_IF_ERROR(Discard(filesystem_, temp_working_path));
272 
273   DestructibleDirectory temp_working_path_ddir(&filesystem_,
274                                                std::move(temp_working_path));
275   if (!temp_working_path_ddir.is_valid()) {
276     return absl_ports::InternalError(
277         "Unable to create temp directory to build new integer index");
278   }
279 
280   {
281     // Transfer all indexed data from current integer index to new integer
282     // index. Also PersistToDisk and destruct the instance after finishing, so
283     // we can safely swap directories later.
284     ICING_ASSIGN_OR_RETURN(
285         std::unique_ptr<IntegerIndex> new_integer_index,
286         Create(filesystem_, temp_working_path_ddir.dir(),
287                num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
288     ICING_RETURN_IF_ERROR(
289         TransferIndex(document_id_old_to_new, new_integer_index.get()));
290     new_integer_index->set_last_added_document_id(new_last_added_document_id);
291     ICING_RETURN_IF_ERROR(new_integer_index->PersistToDisk());
292   }
293 
294   // Destruct current storage instances to safely swap directories.
295   metadata_mmapped_file_.reset();
296   property_to_storage_map_.clear();
297   wildcard_index_storage_.reset();
298   wildcard_property_storage_.reset();
299   if (!filesystem_.SwapFiles(temp_working_path_ddir.dir().c_str(),
300                              working_path_.c_str())) {
301     return absl_ports::InternalError(
302         "Unable to apply new integer index due to failed swap");
303   }
304 
305   // Reinitialize the integer index.
306   std::string metadata_file_path = GetMetadataFilePath(working_path_);
307   ICING_ASSIGN_OR_RETURN(
308       MemoryMappedFile metadata_mmapped_file,
309       MemoryMappedFile::Create(filesystem_, metadata_file_path,
310                                MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
311                                /*max_file_size=*/kMetadataFileSize,
312                                /*pre_mapping_file_offset=*/0,
313                                /*pre_mapping_mmap_size=*/kMetadataFileSize));
314   if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
315     return absl_ports::InternalError(
316         "Invalid metadata file size after Optimize");
317   }
318   metadata_mmapped_file_ =
319       std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file));
320 
321   // Recreate all of the data structures tracking the wildcard storage.
322   std::string wildcard_property_path =
323       GetWildcardPropertyStorageFilePath(working_path_);
324   wildcard_property_storage_ =
325       std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
326           filesystem_, wildcard_property_path);
327 
328   ICING_ASSIGN_OR_RETURN(wildcard_properties_set_,
329                          CreatePropertySet(*wildcard_property_storage_));
330   if (!wildcard_properties_set_.empty()) {
331     ICING_ASSIGN_OR_RETURN(
332         wildcard_index_storage_,
333         IntegerIndexStorage::Create(
334             filesystem_,
335             GetPropertyIndexStoragePath(working_path_,
336                                         kWildcardPropertyIndexFileName),
337             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
338                                          pre_mapping_fbv_),
339             posting_list_serializer_.get()));
340   }
341 
342   // Initialize all existing integer index storages.
343   ICING_ASSIGN_OR_RETURN(
344       property_to_storage_map_,
345       GetPropertyIntegerIndexStorageMap(
346           filesystem_, working_path_, posting_list_serializer_.get(),
347           num_data_threshold_for_bucket_split_, pre_mapping_fbv_));
348 
349   return libtextclassifier3::Status::OK;
350 }
351 
Clear()352 libtextclassifier3::Status IntegerIndex::Clear() {
353   SetDirty();
354 
355   // Step 1: clear property_to_storage_map_.
356   property_to_storage_map_.clear();
357   wildcard_index_storage_.reset();
358 
359   // Step 2: delete all IntegerIndexStorages. It is safe because there is no
360   //         active IntegerIndexStorage after clearing the map.
361   ICING_ASSIGN_OR_RETURN(
362       std::vector<std::string> property_paths,
363       GetAllExistingPropertyPaths(filesystem_, working_path_));
364   for (const std::string& property_path : property_paths) {
365     ICING_RETURN_IF_ERROR(IntegerIndexStorage::Discard(
366         filesystem_,
367         GetPropertyIndexStoragePath(working_path_, property_path)));
368   }
369 
370   // Step 3: Delete the wildcard property storage
371   std::string wildcard_property_path =
372       GetWildcardPropertyStorageFilePath(working_path_);
373   if (filesystem_.FileExists(wildcard_property_path.c_str()) ||
374       !filesystem_.DeleteFile(wildcard_property_path.c_str())) {
375     return absl_ports::InternalError(absl_ports::StrCat(
376         "Unable to delete file at path ", wildcard_property_path));
377   }
378 
379   info().last_added_document_id = kInvalidDocumentId;
380   return libtextclassifier3::Status::OK;
381 }
382 
383 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeNewFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)384 IntegerIndex::InitializeNewFiles(const Filesystem& filesystem,
385                                  std::string&& working_path,
386                                  int32_t num_data_threshold_for_bucket_split,
387                                  bool pre_mapping_fbv) {
388   // Create working directory.
389   if (!filesystem.CreateDirectoryRecursively(working_path.c_str())) {
390     return absl_ports::InternalError(
391         absl_ports::StrCat("Failed to create directory: ", working_path));
392   }
393 
394   // Initialize metadata file. Create MemoryMappedFile with pre-mapping, and
395   // call GrowAndRemapIfNecessary to grow the underlying file.
396   ICING_ASSIGN_OR_RETURN(
397       MemoryMappedFile metadata_mmapped_file,
398       MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
399                                MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
400                                /*max_file_size=*/kMetadataFileSize,
401                                /*pre_mapping_file_offset=*/0,
402                                /*pre_mapping_mmap_size=*/kMetadataFileSize));
403   ICING_RETURN_IF_ERROR(metadata_mmapped_file.GrowAndRemapIfNecessary(
404       /*file_offset=*/0, /*mmap_size=*/kMetadataFileSize));
405 
406   std::string wildcard_property_path =
407       GetWildcardPropertyStorageFilePath(working_path);
408   auto wildcard_property_storage =
409       std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
410           filesystem, wildcard_property_path);
411 
412   // Create instance.
413   auto new_integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
414       filesystem, std::move(working_path),
415       std::make_unique<PostingListIntegerIndexSerializer>(),
416       std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
417       /*property_to_storage_map=*/{}, std::move(wildcard_property_storage),
418       /*wildcard_properties_set=*/{}, /*wildcard_index_storage=*/nullptr,
419       num_data_threshold_for_bucket_split, pre_mapping_fbv));
420 
421   // Initialize info content by writing mapped memory directly.
422   Info& info_ref = new_integer_index->info();
423   info_ref.magic = Info::kMagic;
424   info_ref.last_added_document_id = kInvalidDocumentId;
425   info_ref.num_data_threshold_for_bucket_split =
426       num_data_threshold_for_bucket_split;
427   // Initialize new PersistentStorage. The initial checksums will be computed
428   // and set via InitializeNewStorage.
429   ICING_RETURN_IF_ERROR(new_integer_index->InitializeNewStorage());
430 
431   return new_integer_index;
432 }
433 
434 /* static */ libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndex>>
InitializeExistingFiles(const Filesystem & filesystem,std::string && working_path,int32_t num_data_threshold_for_bucket_split,bool pre_mapping_fbv)435 IntegerIndex::InitializeExistingFiles(
436     const Filesystem& filesystem, std::string&& working_path,
437     int32_t num_data_threshold_for_bucket_split, bool pre_mapping_fbv) {
438   // Mmap the content of the crcs and info.
439   ICING_ASSIGN_OR_RETURN(
440       MemoryMappedFile metadata_mmapped_file,
441       MemoryMappedFile::Create(filesystem, GetMetadataFilePath(working_path),
442                                MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
443                                /*max_file_size=*/kMetadataFileSize,
444                                /*pre_mapping_file_offset=*/0,
445                                /*pre_mapping_mmap_size=*/kMetadataFileSize));
446   if (metadata_mmapped_file.available_size() != kMetadataFileSize) {
447     return absl_ports::FailedPreconditionError("Incorrect metadata file size");
448   }
449 
450   auto posting_list_serializer =
451       std::make_unique<PostingListIntegerIndexSerializer>();
452 
453   // Initialize all existing integer index storages.
454   ICING_ASSIGN_OR_RETURN(
455       PropertyToStorageMapType property_to_storage_map,
456       GetPropertyIntegerIndexStorageMap(
457           filesystem, working_path, posting_list_serializer.get(),
458           num_data_threshold_for_bucket_split, pre_mapping_fbv));
459 
460   std::string wildcard_property_path =
461       GetWildcardPropertyStorageFilePath(working_path);
462   auto wildcard_property_storage =
463       std::make_unique<FileBackedProto<WildcardPropertyStorage>>(
464           filesystem, wildcard_property_path);
465 
466   ICING_ASSIGN_OR_RETURN(
467       std::unordered_set<std::string> wildcard_properties_set,
468       CreatePropertySet(*wildcard_property_storage));
469 
470   std::unique_ptr<IntegerIndexStorage> wildcard_index_storage;
471   if (!wildcard_properties_set.empty()) {
472     ICING_ASSIGN_OR_RETURN(
473         wildcard_index_storage,
474         IntegerIndexStorage::Create(
475             filesystem,
476             GetPropertyIndexStoragePath(working_path,
477                                         kWildcardPropertyIndexFileName),
478             IntegerIndexStorage::Options(num_data_threshold_for_bucket_split,
479                                          pre_mapping_fbv),
480             posting_list_serializer.get()));
481   }
482 
483   // Create instance.
484   auto integer_index = std::unique_ptr<IntegerIndex>(new IntegerIndex(
485       filesystem, std::move(working_path), std::move(posting_list_serializer),
486       std::make_unique<MemoryMappedFile>(std::move(metadata_mmapped_file)),
487       std::move(property_to_storage_map), std::move(wildcard_property_storage),
488       std::move(wildcard_properties_set), std::move(wildcard_index_storage),
489       num_data_threshold_for_bucket_split, pre_mapping_fbv));
490   // Initialize existing PersistentStorage. Checksums will be validated.
491   ICING_RETURN_IF_ERROR(integer_index->InitializeExistingStorage());
492 
493   // Validate magic.
494   if (integer_index->info().magic != Info::kMagic) {
495     return absl_ports::FailedPreconditionError("Incorrect magic value");
496   }
497 
498   // If num_data_threshold_for_bucket_split mismatches, then return error to let
499   // caller rebuild.
500   if (integer_index->info().num_data_threshold_for_bucket_split !=
501       num_data_threshold_for_bucket_split) {
502     return absl_ports::FailedPreconditionError(
503         "Mismatch num_data_threshold_for_bucket_split");
504   }
505 
506   return integer_index;
507 }
508 
509 libtextclassifier3::StatusOr<std::unique_ptr<IntegerIndexStorage>>
TransferIntegerIndexStorage(const std::vector<DocumentId> & document_id_old_to_new,const IntegerIndexStorage * old_storage,const std::string & property_path,IntegerIndex * new_integer_index) const510 IntegerIndex::TransferIntegerIndexStorage(
511     const std::vector<DocumentId>& document_id_old_to_new,
512     const IntegerIndexStorage* old_storage, const std::string& property_path,
513     IntegerIndex* new_integer_index) const {
514   std::string new_storage_working_path = GetPropertyIndexStoragePath(
515       new_integer_index->working_path_, property_path);
516   ICING_ASSIGN_OR_RETURN(
517       std::unique_ptr<IntegerIndexStorage> new_storage,
518       IntegerIndexStorage::Create(
519           new_integer_index->filesystem_, new_storage_working_path,
520           IntegerIndexStorage::Options(num_data_threshold_for_bucket_split_,
521                                        pre_mapping_fbv_),
522           new_integer_index->posting_list_serializer_.get()));
523 
524   ICING_RETURN_IF_ERROR(
525       old_storage->TransferIndex(document_id_old_to_new, new_storage.get()));
526 
527   if (new_storage->num_data() == 0) {
528     new_storage.reset();
529     ICING_RETURN_IF_ERROR(
530         IntegerIndexStorage::Discard(filesystem_, new_storage_working_path));
531   }
532   return new_storage;
533 }
534 
TransferWildcardStorage(IntegerIndex * new_integer_index) const535 libtextclassifier3::Status IntegerIndex::TransferWildcardStorage(
536     IntegerIndex* new_integer_index) const {
537   auto property_storage = std::make_unique<WildcardPropertyStorage>();
538   property_storage->mutable_property_entries()->Reserve(
539       wildcard_properties_set_.size());
540   for (const std::string& property : wildcard_properties_set_) {
541     property_storage->add_property_entries(property);
542   }
543 
544   ICING_RETURN_IF_ERROR(new_integer_index->wildcard_property_storage_->Write(
545       std::move(property_storage)));
546   new_integer_index->wildcard_properties_set_ = wildcard_properties_set_;
547   return libtextclassifier3::Status::OK;
548 }
549 
TransferIndex(const std::vector<DocumentId> & document_id_old_to_new,IntegerIndex * new_integer_index) const550 libtextclassifier3::Status IntegerIndex::TransferIndex(
551     const std::vector<DocumentId>& document_id_old_to_new,
552     IntegerIndex* new_integer_index) const {
553   // Transfer over the integer index storages
554   std::unique_ptr<IntegerIndexStorage> new_storage;
555   for (const auto& [property_path, old_storage] : property_to_storage_map_) {
556     ICING_ASSIGN_OR_RETURN(
557         new_storage,
558         TransferIntegerIndexStorage(document_id_old_to_new, old_storage.get(),
559                                     property_path, new_integer_index));
560     if (new_storage != nullptr) {
561       new_integer_index->property_to_storage_map_.insert(
562           {property_path, std::move(new_storage)});
563     }
564   }
565   if (wildcard_index_storage_ != nullptr) {
566     ICING_ASSIGN_OR_RETURN(
567         new_storage,
568         TransferIntegerIndexStorage(
569             document_id_old_to_new, wildcard_index_storage_.get(),
570             std::string(kWildcardPropertyIndexFileName), new_integer_index));
571     if (new_storage != nullptr) {
572       new_integer_index->wildcard_index_storage_ = std::move(new_storage);
573 
574       // The only time we need to copy over the list of properties using
575       // wildcard storage is if wildcard_index_storage and new_storage are both
576       // non-null. Otherwise, the new wildcard index storage won't have any
577       // data.
578       ICING_RETURN_IF_ERROR(TransferWildcardStorage(new_integer_index));
579     }
580   }
581 
582   return libtextclassifier3::Status::OK;
583 }
584 
PersistStoragesToDisk(bool force)585 libtextclassifier3::Status IntegerIndex::PersistStoragesToDisk(bool force) {
586   if (!force && !is_storage_dirty()) {
587     return libtextclassifier3::Status::OK;
588   }
589 
590   for (auto& [_, storage] : property_to_storage_map_) {
591     ICING_RETURN_IF_ERROR(storage->PersistToDisk());
592   }
593   // No need to persist wildcard_properties_storage_. All calls to
594   // FileBackedProto::Write are fully written through at the time of the call.
595   if (wildcard_index_storage_) {
596     ICING_RETURN_IF_ERROR(wildcard_index_storage_->PersistToDisk());
597   }
598   return libtextclassifier3::Status::OK;
599 }
600 
PersistMetadataToDisk(bool force)601 libtextclassifier3::Status IntegerIndex::PersistMetadataToDisk(bool force) {
602   if (!force && !is_info_dirty() && !is_storage_dirty()) {
603     return libtextclassifier3::Status::OK;
604   }
605 
606   // Changes should have been applied to the underlying file when using
607   // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, but call msync() as an
608   // extra safety step to ensure they are written out.
609   return metadata_mmapped_file_->PersistToDisk();
610 }
611 
ComputeInfoChecksum(bool force)612 libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeInfoChecksum(
613     bool force) {
614   if (!force && !is_info_dirty()) {
615     return Crc32(crcs().component_crcs.info_crc);
616   }
617 
618   return info().ComputeChecksum();
619 }
620 
ComputeStoragesChecksum(bool force)621 libtextclassifier3::StatusOr<Crc32> IntegerIndex::ComputeStoragesChecksum(
622     bool force) {
623   if (!force && !is_storage_dirty()) {
624     return Crc32(crcs().component_crcs.storages_crc);
625   }
626 
627   // XOR all crcs of all storages. Since XOR is commutative and associative,
628   // the order doesn't matter.
629   uint32_t storages_checksum = 0;
630   for (auto& [property_path, storage] : property_to_storage_map_) {
631     ICING_ASSIGN_OR_RETURN(Crc32 storage_crc, storage->UpdateChecksums());
632     storage_crc.Append(property_path);
633 
634     storages_checksum ^= storage_crc.Get();
635   }
636 
637   if (wildcard_index_storage_ != nullptr) {
638     ICING_ASSIGN_OR_RETURN(Crc32 storage_crc,
639                            wildcard_index_storage_->UpdateChecksums());
640     storages_checksum ^= storage_crc.Get();
641   }
642 
643   ICING_ASSIGN_OR_RETURN(Crc32 wildcard_properties_crc,
644                          wildcard_property_storage_->ComputeChecksum());
645   storages_checksum ^= wildcard_properties_crc.Get();
646 
647   return Crc32(storages_checksum);
648 }
649 
650 }  // namespace lib
651 }  // namespace icing
652