• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_FILE_PERSISTENT_STORAGE_H_
16 #define ICING_FILE_PERSISTENT_STORAGE_H_
17 
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/file/filesystem.h"
27 #include "icing/util/crc32.h"
28 #include "icing/util/status-macros.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 // PersistentStorage: an abstract class for all persistent data structures.
34 // - It provides some common persistent file methods, e.g. PersistToDisk.
35 // - It encapsulates most of the checksum handling logics (including update and
36 //   validation).
37 //
38 // Terminology:
39 // - Crcs: checksum section
40 // - Info: (custom) information for derived class
41 // - Metadata: Crcs + Info
42 //
43 // Usually a persistent data structure will have its own custom Info and
44 // storages (single or composite storages) definition. To create a new
45 // persistent data structure via PersistentStorage:
46 // - Decide what type the working path is (single file or directory). See
47 //   working_path_ and WorkingPathType for more details.
48 // - Create a new class that inherits PersistentStorage:
49 //   - Declare custom Info and design the metadata section layout.
50 //     Usually the layout is <Crcs><Info>, and there are 2 common ways to
51 //     manage metadata section:
52 //     - Have a separate file for metadata. In this case, the new persistent
53 //       data structure contains multiple files, so working path should be used
54 //       as directory path and multiple files will be stored under it. Example:
55 //       PersistentHashMap.
56 //     - Have a single file for both metadata and storage data. In this case,
57 //       the file layout should be <Crcs><Info><Storage Data>, and
58 //       working path should be used as file path. Example: FileBackedVector.
59 //   - Handle working path file/directory creation and deletion.
60 //     PersistentStorage only provides static Discard() method to use. The
61 //     derived class should implement other logics, e.g. working path (file
62 //     /directory) creation, check condition to discard working path and start
63 //     over new file(s).
64 //   - Implement all pure virtual methods:
65 //     - PersistStoragesToDisk: persist all (composite) storages. In general,
66 //       the implementation will be calling PersistToDisk for all composite
67 //       storages.
68 //     - PersistMetadataToDisk: persist metadata, including Crcs and Info.
69 //       - If the derived class maintains a concrete Crc and (custom) Info
70 //         instance, then it should perform write/pwrite into the metadata
71 //         section.
72 //       - If the derived class uses memory-mapped region directly for metadata,
73 //         then it should call MemoryMappedFile::PersistToDisk.
74 //       - See crcs() for more details.
75 //     - ComputeInfoChecksum: compute the checksum for custom Info.
76 //     - ComputeStoragesChecksum: compute the (combined) checksum for all
77 //       (composite) storages. In general, the implementation will be calling
78 //       UpdateChecksums for all composite storages and XOR all checksums.
79 //     - crcs(): provide the reference for PersistentStorage to write checksums.
80 //       The derived class can either maintain a concrete Crcs instance, or
81 //       reinterpret_cast the memory-mapped region to Crcs reference. Either
82 //       choice is fine as long as PersistMetadataToDisk flushes it to disk
83 //       correctly.
84 // - Call either InitializeNewStorage or InitializeExistingStorage when creating
85 //   and initializing an instance, depending on initializing new storage or from
86 //   existing file(s).
87 class PersistentStorage {
88  public:
89   enum class WorkingPathType {
90     kSingleFile,
91     kDirectory,
92     kDummy,
93   };
94 
95   // Crcs and Info will be written into the metadata section. Info is defined by
96   // the actual implementation of each persistent storage. Usually the Metadata
97   // layout is: <Crcs><Info>
98   struct Crcs {
99     struct ComponentCrcs {
100       uint32_t info_crc;
101       uint32_t storages_crc;
102 
103       bool operator==(const ComponentCrcs& other) const {
104         return info_crc == other.info_crc && storages_crc == other.storages_crc;
105       }
106 
ComputeChecksumCrcs::ComponentCrcs107       Crc32 ComputeChecksum() const {
108         return Crc32(std::string_view(reinterpret_cast<const char*>(this),
109                                       sizeof(ComponentCrcs)));
110       }
111     } __attribute__((packed));
112 
113     bool operator==(const Crcs& other) const {
114       return all_crc == other.all_crc && component_crcs == other.component_crcs;
115     }
116 
117     uint32_t all_crc;
118     ComponentCrcs component_crcs;
119   } __attribute__((packed));
120   static_assert(sizeof(Crcs) == 12, "");
121 
122   // Deletes working_path according to its type.
123   //
124   // Returns:
125   //   - OK on success
126   //   - INTERNAL_ERROR on I/O error
127   //   - INVALID_ARGUMENT_ERROR if working_path_type is unknown type
128   static libtextclassifier3::Status Discard(const Filesystem& filesystem,
129                                             const std::string& working_path,
130                                             WorkingPathType working_path_type);
131 
132   virtual ~PersistentStorage() = default;
133 
134   // Initializes new persistent storage. It computes the initial checksums and
135   // writes into the metadata file.
136   //
137   // Note: either InitializeNewStorage or InitializeExistingStorage should be
138   // invoked after creating a PersistentStorage instance before using, otherwise
139   // an uninitialized instance will fail to use persistent storage features,
140   // e.g. PersistToDisk, UpdateChecksums.
141   //
142   // Returns:
143   //   - OK on success or already initialized
144   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
145   //     on actual implementation
InitializeNewStorage()146   libtextclassifier3::Status InitializeNewStorage() {
147     if (is_initialized_) {
148       return libtextclassifier3::Status::OK;
149     }
150 
151     ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(/*force=*/true));
152     ICING_RETURN_IF_ERROR(PersistStoragesToDisk(/*force=*/true));
153     ICING_RETURN_IF_ERROR(PersistMetadataToDisk(/*force=*/true));
154 
155     is_initialized_ = true;
156     return libtextclassifier3::Status::OK;
157   }
158 
159   // Initializes persistent storage from existing file(s).
160   //
161   // It enforces the following check(s):
162   // - Validate checksums.
163   //
164   // Note: either InitializeNewStorage or InitializeExistingStorage should be
165   // invoked after creating a PersistentStorage instance before using.
166   //
167   // Returns:
168   //   - OK on success or already initialized
169   //   - FAILED_PRECONDITION_ERROR if checksum validation fails.
170   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
171   //     on actual implementation
InitializeExistingStorage()172   libtextclassifier3::Status InitializeExistingStorage() {
173     if (is_initialized_) {
174       return libtextclassifier3::Status::OK;
175     }
176 
177     ICING_RETURN_IF_ERROR(ValidateChecksums());
178 
179     is_initialized_ = true;
180     return libtextclassifier3::Status::OK;
181   }
182 
183   // Flushes contents to underlying files.
184   // 1) Flushes storages.
185   // 2) Updates all checksums by new data.
186   // 3) Flushes metadata.
187   //
188   // Force flag will be passed down to PersistMetadataToDisk,
189   // PersistStoragesToDisk, ComputeInfoChecksum, ComputeStoragesChecksum.
190   // - If force == true, then performs actual persisting operations/recomputes
191   //   the checksum.
192   // - Otherwise, the derived class can decide itself whether skipping
193   //   persisting operations/doing lazy checksum recomputing if the storage is
194   //   not dirty.
195   //
196   // Returns:
197   //   - OK on success
198   //   - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
199   //   - Any errors from PersistStoragesToDisk, UpdateChecksums,
200   //     PersistMetadataToDisk, depending on actual implementation
201   libtextclassifier3::Status PersistToDisk(bool force = false) {
202     if (!is_initialized_) {
203       return absl_ports::FailedPreconditionError(absl_ports::StrCat(
204           "PersistentStorage ", working_path_, " not initialized"));
205     }
206 
207     ICING_RETURN_IF_ERROR(UpdateChecksumsInternal(force));
208     ICING_RETURN_IF_ERROR(PersistStoragesToDisk(force));
209     ICING_RETURN_IF_ERROR(PersistMetadataToDisk(force));
210     return libtextclassifier3::Status::OK;
211   }
212 
213   // Updates checksums of all components and returns the overall crc (all_crc)
214   // of the persistent storage.
215   //
216   // Force flag will be passed down ComputeInfoChecksum,
217   // ComputeStoragesChecksum.
218   // - If force == true, then recomputes the checksum.
219   // - Otherwise, the derived class can decide itself whether doing lazy
220   //   checksum recomputing if the storage is not dirty.
221   //
222   // Returns:
223   //   - Overall crc of the persistent storage on success
224   //   - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
225   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
226   //     on actual implementation
227   libtextclassifier3::StatusOr<Crc32> UpdateChecksums(bool force = false) {
228     if (!is_initialized_) {
229       return absl_ports::FailedPreconditionError(absl_ports::StrCat(
230           "PersistentStorage ", working_path_, " not initialized"));
231     }
232 
233     return UpdateChecksumsInternal(force);
234   }
235 
236  protected:
PersistentStorage(const Filesystem & filesystem,std::string working_path,WorkingPathType working_path_type)237   explicit PersistentStorage(const Filesystem& filesystem,
238                              std::string working_path,
239                              WorkingPathType working_path_type)
240       : filesystem_(filesystem),
241         working_path_(std::move(working_path)),
242         working_path_type_(working_path_type),
243         is_initialized_(false) {}
244 
245   // Flushes contents of metadata. The implementation should flush Crcs and Info
246   // correctly, depending on whether they're using memory-mapped regions or
247   // concrete instances in the derived class.
248   //
249   // Returns:
250   //   - OK on success
251   //   - Any other errors, depending on actual implementation
252   virtual libtextclassifier3::Status PersistMetadataToDisk(bool force) = 0;
253 
254   // Flushes contents of all storages to underlying files.
255   //
256   // Returns:
257   //   - OK on success
258   //   - Any other errors, depending on actual implementation
259   virtual libtextclassifier3::Status PersistStoragesToDisk(bool force) = 0;
260 
261   // Computes and returns Info checksum.
262   // - If force = true, then recompute the entire checksum.
263   // - Otherwise, the derived class can decide itself whether doing lazy
264   //   checksum computing if the storage is not dirty.
265   //
266   // This function will be mainly called by UpdateChecksums.
267   //
268   // Returns:
269   //   - Crc of the Info on success
270   //   - Any other errors, depending on actual implementation
271   virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum(
272       bool force) = 0;
273 
274   // Computes and returns all storages checksum. If there are multiple storages,
275   // usually we XOR their checksums together to a single checksum.
276   // - If force = true, then recompute the entire checksum.
277   // - Otherwise, the derived class can decide itself whether doing lazy
278   //   checksum computing if the storage is not dirty.
279   //
280   // This function will be mainly called by UpdateChecksums.
281   //
282   // Returns:
283   //   - Crc of all storages on success
284   //   - Any other errors from depending on actual implementation
285   virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum(
286       bool force) = 0;
287 
288   // Returns the Crcs instance reference. The derived class can either own a
289   // concrete Crcs instance, or reinterpret_cast the memory-mapped region to
290   // Crcs reference. PersistMetadataToDisk should flush it to disk correctly.
291   virtual Crcs& crcs() = 0;
292   virtual const Crcs& crcs() const = 0;
293 
294   const Filesystem& filesystem_;  // Does not own
295   // Path to the storage. It can be a single file path or a directory path
296   // depending on the implementation of the derived class.
297   //
298   // Note that the derived storage class will take full ownership and of
299   // working_path_, including creation/deletion. It is the caller's
300   // responsibility to specify correct working path and avoid mixing different
301   // persistent storages together under the same path. Also the caller has the
302   // ownership for the parent directory of working_path_, and it is responsible
303   // for parent directory creation/deletion.
304   std::string working_path_;
305   WorkingPathType working_path_type_;
306 
307   bool is_initialized_;
308 
309  private:
310   // Updates checksums of all components and returns the overall crc (all_crc)
311   // of the persistent storage. Different from UpdateChecksums, it won't check
312   // if PersistentStorage is initialized or not.
313   //
314   // Returns:
315   //   - Overall crc of the persistent storage on success
316   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
317   //     on actual implementation
UpdateChecksumsInternal(bool force)318   libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal(bool force) {
319     Crcs& crcs_ref = crcs();
320     // Compute and update storages + info checksums.
321     ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(force));
322     ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum(force));
323     if (crcs_ref.component_crcs.info_crc == info_crc.Get() &&
324         crcs_ref.component_crcs.storages_crc == storages_crc.Get()) {
325       // If info and storages crc haven't changed, then we don't have to update
326       // checksums.
327       return Crc32(crcs_ref.all_crc);
328     }
329 
330     crcs_ref.component_crcs.info_crc = info_crc.Get();
331     crcs_ref.component_crcs.storages_crc = storages_crc.Get();
332 
333     // Finally compute and update overall checksum.
334     crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get();
335     return Crc32(crcs_ref.all_crc);
336   }
337 
338   // Validates all checksums of the persistent storage.
339   //
340   // Returns:
341   //   - OK on success
342   //   - FAILED_PRECONDITION_ERROR if any checksum is incorrect.
343   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
344   //     on actual implementation
ValidateChecksums()345   libtextclassifier3::Status ValidateChecksums() {
346     const Crcs& crcs_ref = crcs();
347     if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) {
348       return absl_ports::FailedPreconditionError("Invalid all crc");
349     }
350 
351     ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum(/*force=*/true));
352     if (crcs_ref.component_crcs.info_crc != info_crc.Get()) {
353       return absl_ports::FailedPreconditionError("Invalid info crc");
354     }
355 
356     ICING_ASSIGN_OR_RETURN(Crc32 storages_crc,
357                            ComputeStoragesChecksum(/*force=*/true));
358     if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) {
359       return absl_ports::FailedPreconditionError("Invalid storages crc");
360     }
361 
362     return libtextclassifier3::Status::OK;
363   }
364 };
365 
366 }  // namespace lib
367 }  // namespace icing
368 
369 #endif  // ICING_FILE_PERSISTENT_STORAGE_H_
370