1 // Copyright (C) 2023 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_FILE_PERSISTENT_STORAGE_H_ 16 #define ICING_FILE_PERSISTENT_STORAGE_H_ 17 18 #include <cstdint> 19 #include <string> 20 #include <string_view> 21 22 #include "icing/text_classifier/lib3/utils/base/status.h" 23 #include "icing/text_classifier/lib3/utils/base/statusor.h" 24 #include "icing/absl_ports/canonical_errors.h" 25 #include "icing/absl_ports/str_cat.h" 26 #include "icing/file/filesystem.h" 27 #include "icing/util/crc32.h" 28 #include "icing/util/status-macros.h" 29 30 namespace icing { 31 namespace lib { 32 33 // PersistentStorage: an abstract class for all persistent data structures. 34 // - It provides some common persistent file methods, e.g. PersistToDisk. 35 // - It encapsulates most of the checksum handling logics (including update and 36 // validation). 37 // 38 // Terminology: 39 // - Crcs: checksum section 40 // - Info: (custom) information for derived class 41 // - Metadata: Crcs + Info 42 // 43 // Usually a persistent data structure will have its own custom Info and 44 // storages (single or composite storages) definition. To create a new 45 // persistent data structure via PersistentStorage: 46 // - Decide what type the working path is (single file or directory). See 47 // working_path_ and WorkingPathType for more details. 48 // - Create a new class that inherits PersistentStorage: 49 // - Declare custom Info and design the metadata section layout. 50 // Usually the layout is <Crcs><Info>, and there are 2 common ways to 51 // manage metadata section: 52 // - Have a separate file for metadata. In this case, the new persistent 53 // data structure contains multiple files, so working path should be used 54 // as directory path and multiple files will be stored under it. Example: 55 // PersistentHashMap. 56 // - Have a single file for both metadata and storage data. In this case, 57 // the file layout should be <Crcs><Info><Storage Data>, and 58 // working path should be used as file path. Example: FileBackedVector. 59 // - Handle working path file/directory creation and deletion. 60 // PersistentStorage only provides static Discard() method to use. The 61 // derived class should implement other logics, e.g. working path (file 62 // /directory) creation, check condition to discard working path and start 63 // over new file(s). 64 // - Implement all pure virtual methods: 65 // - PersistStoragesToDisk: persist all (composite) storages. In general, 66 // the implementation will be calling PersistToDisk for all composite 67 // storages. 68 // - PersistMetadataToDisk: persist metadata, including Crcs and Info. 69 // - If the derived class maintains a concrete Crc and (custom) Info 70 // instance, then it should perform write/pwrite into the metadata 71 // section. 72 // - If the derived class uses memory-mapped region directly for metadata, 73 // then it should call MemoryMappedFile::PersistToDisk. 74 // - See crcs() for more details. 75 // - ComputeInfoChecksum: compute the checksum for custom Info. 76 // - ComputeStoragesChecksum: compute the (combined) checksum for all 77 // (composite) storages. In general, the implementation will be calling 78 // UpdateChecksums for all composite storages and XOR all checksums. 79 // - crcs(): provide the reference for PersistentStorage to write checksums. 80 // The derived class can either maintain a concrete Crcs instance, or 81 // reinterpret_cast the memory-mapped region to Crcs reference. Either 82 // choice is fine as long as PersistMetadataToDisk flushes it to disk 83 // correctly. 84 // - Call either InitializeNewStorage or InitializeExistingStorage when creating 85 // and initializing an instance, depending on initializing new storage or from 86 // existing file(s). 87 class PersistentStorage { 88 public: 89 enum class WorkingPathType { 90 kSingleFile, 91 kDirectory, 92 kDummy, 93 }; 94 95 // Crcs and Info will be written into the metadata section. Info is defined by 96 // the actual implementation of each persistent storage. Usually the Metadata 97 // layout is: <Crcs><Info> 98 struct Crcs { 99 struct ComponentCrcs { 100 uint32_t info_crc; 101 uint32_t storages_crc; 102 103 bool operator==(const ComponentCrcs& other) const { 104 return info_crc == other.info_crc && storages_crc == other.storages_crc; 105 } 106 ComputeChecksumCrcs::ComponentCrcs107 Crc32 ComputeChecksum() const { 108 return Crc32(std::string_view(reinterpret_cast<const char*>(this), 109 sizeof(ComponentCrcs))); 110 } 111 } __attribute__((packed)); 112 113 bool operator==(const Crcs& other) const { 114 return all_crc == other.all_crc && component_crcs == other.component_crcs; 115 } 116 117 uint32_t all_crc; 118 ComponentCrcs component_crcs; 119 } __attribute__((packed)); 120 static_assert(sizeof(Crcs) == 12, ""); 121 122 // Deletes working_path according to its type. 123 // 124 // Returns: 125 // - OK on success 126 // - INTERNAL_ERROR on I/O error 127 // - INVALID_ARGUMENT_ERROR if working_path_type is unknown type 128 static libtextclassifier3::Status Discard(const Filesystem& filesystem, 129 const std::string& working_path, 130 WorkingPathType working_path_type); 131 132 virtual ~PersistentStorage() = default; 133 134 // Initializes new persistent storage. It computes the initial checksums and 135 // writes into the metadata file. 136 // 137 // Note: either InitializeNewStorage or InitializeExistingStorage should be 138 // invoked after creating a PersistentStorage instance before using, otherwise 139 // an uninitialized instance will fail to use persistent storage features, 140 // e.g. PersistToDisk, UpdateChecksums. 141 // 142 // Returns: 143 // - OK on success or already initialized 144 // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending 145 // on actual implementation InitializeNewStorage()146 libtextclassifier3::Status InitializeNewStorage() { 147 if (is_initialized_) { 148 return libtextclassifier3::Status::OK; 149 } 150 151 ICING_RETURN_IF_ERROR(UpdateChecksumsInternal()); 152 ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); 153 154 is_initialized_ = true; 155 return libtextclassifier3::Status::OK; 156 } 157 158 // Initializes persistent storage from existing file(s). 159 // 160 // It enforces the following check(s): 161 // - Validate checksums. 162 // 163 // Note: either InitializeNewStorage or InitializeExistingStorage should be 164 // invoked after creating a PersistentStorage instance before using. 165 // 166 // Returns: 167 // - OK on success or already initialized 168 // - FAILED_PRECONDITION_ERROR if checksum validation fails. 169 // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending 170 // on actual implementation InitializeExistingStorage()171 libtextclassifier3::Status InitializeExistingStorage() { 172 if (is_initialized_) { 173 return libtextclassifier3::Status::OK; 174 } 175 176 ICING_RETURN_IF_ERROR(ValidateChecksums()); 177 178 is_initialized_ = true; 179 return libtextclassifier3::Status::OK; 180 } 181 182 // Flushes contents to underlying files. 183 // 1) Flushes storages. 184 // 2) Updates all checksums by new data. 185 // 3) Flushes metadata. 186 // 187 // Returns: 188 // - OK on success 189 // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized 190 // - Any errors from PersistStoragesToDisk, UpdateChecksums, 191 // PersistMetadataToDisk, depending on actual implementation PersistToDisk()192 libtextclassifier3::Status PersistToDisk() { 193 if (!is_initialized_) { 194 return absl_ports::FailedPreconditionError(absl_ports::StrCat( 195 "PersistentStorage ", working_path_, " not initialized")); 196 } 197 198 ICING_RETURN_IF_ERROR(PersistStoragesToDisk()); 199 ICING_RETURN_IF_ERROR(UpdateChecksums()); 200 ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); 201 return libtextclassifier3::Status::OK; 202 } 203 204 // Updates checksums of all components and returns the overall crc (all_crc) 205 // of the persistent storage. 206 // 207 // Returns: 208 // - Overall crc of the persistent storage on success 209 // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized 210 // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending 211 // on actual implementation UpdateChecksums()212 libtextclassifier3::StatusOr<Crc32> UpdateChecksums() { 213 if (!is_initialized_) { 214 return absl_ports::FailedPreconditionError(absl_ports::StrCat( 215 "PersistentStorage ", working_path_, " not initialized")); 216 } 217 218 return UpdateChecksumsInternal(); 219 } 220 221 protected: PersistentStorage(const Filesystem & filesystem,std::string working_path,WorkingPathType working_path_type)222 explicit PersistentStorage(const Filesystem& filesystem, 223 std::string working_path, 224 WorkingPathType working_path_type) 225 : filesystem_(filesystem), 226 working_path_(std::move(working_path)), 227 working_path_type_(working_path_type), 228 is_initialized_(false) {} 229 230 // Flushes contents of metadata. The implementation should flush Crcs and Info 231 // correctly, depending on whether they're using memory-mapped regions or 232 // concrete instances in the derived class. 233 // 234 // Returns: 235 // - OK on success 236 // - Any other errors, depending on actual implementation 237 virtual libtextclassifier3::Status PersistMetadataToDisk() = 0; 238 239 // Flushes contents of all storages to underlying files. 240 // 241 // Returns: 242 // - OK on success 243 // - Any other errors, depending on actual implementation 244 virtual libtextclassifier3::Status PersistStoragesToDisk() = 0; 245 246 // Computes and returns Info checksum. 247 // 248 // This function will be mainly called by UpdateChecksums. 249 // 250 // Returns: 251 // - Crc of the Info on success 252 // - Any other errors, depending on actual implementation 253 virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() = 0; 254 255 // Computes and returns all storages checksum. If there are multiple storages, 256 // usually we XOR their checksums together to a single checksum. 257 // 258 // This function will be mainly called by UpdateChecksums. 259 // 260 // Returns: 261 // - Crc of all storages on success 262 // - Any other errors from depending on actual implementation 263 virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() = 0; 264 265 // Returns the Crcs instance reference. The derived class can either own a 266 // concrete Crcs instance, or reinterpret_cast the memory-mapped region to 267 // Crcs reference. PersistMetadataToDisk should flush it to disk correctly. 268 virtual Crcs& crcs() = 0; 269 virtual const Crcs& crcs() const = 0; 270 271 const Filesystem& filesystem_; // Does not own 272 // Path to the storage. It can be a single file path or a directory path 273 // depending on the implementation of the derived class. 274 // 275 // Note that the derived storage class will take full ownership and of 276 // working_path_, including creation/deletion. It is the caller's 277 // responsibility to specify correct working path and avoid mixing different 278 // persistent storages together under the same path. Also the caller has the 279 // ownership for the parent directory of working_path_, and it is responsible 280 // for parent directory creation/deletion. 281 std::string working_path_; 282 WorkingPathType working_path_type_; 283 284 bool is_initialized_; 285 286 private: 287 // Updates checksums of all components and returns the overall crc (all_crc) 288 // of the persistent storage. Different from UpdateChecksums, it won't check 289 // if PersistentStorage is initialized or not. 290 // 291 // Returns: 292 // - Overall crc of the persistent storage on success 293 // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending 294 // on actual implementation UpdateChecksumsInternal()295 libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal() { 296 Crcs& crcs_ref = crcs(); 297 // Compute and update storages + info checksums. 298 ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum()); 299 ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum()); 300 crcs_ref.component_crcs.info_crc = info_crc.Get(); 301 crcs_ref.component_crcs.storages_crc = storages_crc.Get(); 302 303 // Finally compute and update overall checksum. 304 crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get(); 305 return Crc32(crcs_ref.all_crc); 306 } 307 308 // Validates all checksums of the persistent storage. 309 // 310 // Returns: 311 // - OK on success 312 // - FAILED_PRECONDITION_ERROR if any checksum is incorrect. 313 // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending 314 // on actual implementation ValidateChecksums()315 libtextclassifier3::Status ValidateChecksums() { 316 const Crcs& crcs_ref = crcs(); 317 if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) { 318 return absl_ports::FailedPreconditionError("Invalid all crc"); 319 } 320 321 ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum()); 322 if (crcs_ref.component_crcs.info_crc != info_crc.Get()) { 323 return absl_ports::FailedPreconditionError("Invalid info crc"); 324 } 325 326 ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum()); 327 if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) { 328 return absl_ports::FailedPreconditionError("Invalid storages crc"); 329 } 330 331 return libtextclassifier3::Status::OK; 332 } 333 }; 334 335 } // namespace lib 336 } // namespace icing 337 338 #endif // ICING_FILE_PERSISTENT_STORAGE_H_ 339