• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_FILE_PERSISTENT_STORAGE_H_
16 #define ICING_FILE_PERSISTENT_STORAGE_H_
17 
18 #include <cstdint>
19 #include <string>
20 #include <string_view>
21 
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/file/filesystem.h"
27 #include "icing/util/crc32.h"
28 #include "icing/util/status-macros.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 // PersistentStorage: an abstract class for all persistent data structures.
34 // - It provides some common persistent file methods, e.g. PersistToDisk.
35 // - It encapsulates most of the checksum handling logics (including update and
36 //   validation).
37 //
38 // Terminology:
39 // - Crcs: checksum section
40 // - Info: (custom) information for derived class
41 // - Metadata: Crcs + Info
42 //
43 // Usually a persistent data structure will have its own custom Info and
44 // storages (single or composite storages) definition. To create a new
45 // persistent data structure via PersistentStorage:
46 // - Decide what type the working path is (single file or directory). See
47 //   working_path_ and WorkingPathType for more details.
48 // - Create a new class that inherits PersistentStorage:
49 //   - Declare custom Info and design the metadata section layout.
50 //     Usually the layout is <Crcs><Info>, and there are 2 common ways to
51 //     manage metadata section:
52 //     - Have a separate file for metadata. In this case, the new persistent
53 //       data structure contains multiple files, so working path should be used
54 //       as directory path and multiple files will be stored under it. Example:
55 //       PersistentHashMap.
56 //     - Have a single file for both metadata and storage data. In this case,
57 //       the file layout should be <Crcs><Info><Storage Data>, and
58 //       working path should be used as file path. Example: FileBackedVector.
59 //   - Handle working path file/directory creation and deletion.
60 //     PersistentStorage only provides static Discard() method to use. The
61 //     derived class should implement other logics, e.g. working path (file
62 //     /directory) creation, check condition to discard working path and start
63 //     over new file(s).
64 //   - Implement all pure virtual methods:
65 //     - PersistStoragesToDisk: persist all (composite) storages. In general,
66 //       the implementation will be calling PersistToDisk for all composite
67 //       storages.
68 //     - PersistMetadataToDisk: persist metadata, including Crcs and Info.
69 //       - If the derived class maintains a concrete Crc and (custom) Info
70 //         instance, then it should perform write/pwrite into the metadata
71 //         section.
72 //       - If the derived class uses memory-mapped region directly for metadata,
73 //         then it should call MemoryMappedFile::PersistToDisk.
74 //       - See crcs() for more details.
75 //     - ComputeInfoChecksum: compute the checksum for custom Info.
76 //     - ComputeStoragesChecksum: compute the (combined) checksum for all
77 //       (composite) storages. In general, the implementation will be calling
78 //       UpdateChecksums for all composite storages and XOR all checksums.
79 //     - crcs(): provide the reference for PersistentStorage to write checksums.
80 //       The derived class can either maintain a concrete Crcs instance, or
81 //       reinterpret_cast the memory-mapped region to Crcs reference. Either
82 //       choice is fine as long as PersistMetadataToDisk flushes it to disk
83 //       correctly.
84 // - Call either InitializeNewStorage or InitializeExistingStorage when creating
85 //   and initializing an instance, depending on initializing new storage or from
86 //   existing file(s).
87 class PersistentStorage {
88  public:
89   enum class WorkingPathType {
90     kSingleFile,
91     kDirectory,
92     kDummy,
93   };
94 
95   // Crcs and Info will be written into the metadata section. Info is defined by
96   // the actual implementation of each persistent storage. Usually the Metadata
97   // layout is: <Crcs><Info>
98   struct Crcs {
99     struct ComponentCrcs {
100       uint32_t info_crc;
101       uint32_t storages_crc;
102 
103       bool operator==(const ComponentCrcs& other) const {
104         return info_crc == other.info_crc && storages_crc == other.storages_crc;
105       }
106 
ComputeChecksumCrcs::ComponentCrcs107       Crc32 ComputeChecksum() const {
108         return Crc32(std::string_view(reinterpret_cast<const char*>(this),
109                                       sizeof(ComponentCrcs)));
110       }
111     } __attribute__((packed));
112 
113     bool operator==(const Crcs& other) const {
114       return all_crc == other.all_crc && component_crcs == other.component_crcs;
115     }
116 
117     uint32_t all_crc;
118     ComponentCrcs component_crcs;
119   } __attribute__((packed));
120   static_assert(sizeof(Crcs) == 12, "");
121 
122   // Deletes working_path according to its type.
123   //
124   // Returns:
125   //   - OK on success
126   //   - INTERNAL_ERROR on I/O error
127   //   - INVALID_ARGUMENT_ERROR if working_path_type is unknown type
128   static libtextclassifier3::Status Discard(const Filesystem& filesystem,
129                                             const std::string& working_path,
130                                             WorkingPathType working_path_type);
131 
132   virtual ~PersistentStorage() = default;
133 
134   // Initializes new persistent storage. It computes the initial checksums and
135   // writes into the metadata file.
136   //
137   // Note: either InitializeNewStorage or InitializeExistingStorage should be
138   // invoked after creating a PersistentStorage instance before using, otherwise
139   // an uninitialized instance will fail to use persistent storage features,
140   // e.g. PersistToDisk, UpdateChecksums.
141   //
142   // Returns:
143   //   - OK on success or already initialized
144   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
145   //     on actual implementation
InitializeNewStorage()146   libtextclassifier3::Status InitializeNewStorage() {
147     if (is_initialized_) {
148       return libtextclassifier3::Status::OK;
149     }
150 
151     ICING_RETURN_IF_ERROR(UpdateChecksumsInternal());
152     ICING_RETURN_IF_ERROR(PersistMetadataToDisk());
153 
154     is_initialized_ = true;
155     return libtextclassifier3::Status::OK;
156   }
157 
158   // Initializes persistent storage from existing file(s).
159   //
160   // It enforces the following check(s):
161   // - Validate checksums.
162   //
163   // Note: either InitializeNewStorage or InitializeExistingStorage should be
164   // invoked after creating a PersistentStorage instance before using.
165   //
166   // Returns:
167   //   - OK on success or already initialized
168   //   - FAILED_PRECONDITION_ERROR if checksum validation fails.
169   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
170   //     on actual implementation
InitializeExistingStorage()171   libtextclassifier3::Status InitializeExistingStorage() {
172     if (is_initialized_) {
173       return libtextclassifier3::Status::OK;
174     }
175 
176     ICING_RETURN_IF_ERROR(ValidateChecksums());
177 
178     is_initialized_ = true;
179     return libtextclassifier3::Status::OK;
180   }
181 
182   // Flushes contents to underlying files.
183   // 1) Flushes storages.
184   // 2) Updates all checksums by new data.
185   // 3) Flushes metadata.
186   //
187   // Returns:
188   //   - OK on success
189   //   - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
190   //   - Any errors from PersistStoragesToDisk, UpdateChecksums,
191   //     PersistMetadataToDisk, depending on actual implementation
PersistToDisk()192   libtextclassifier3::Status PersistToDisk() {
193     if (!is_initialized_) {
194       return absl_ports::FailedPreconditionError(absl_ports::StrCat(
195           "PersistentStorage ", working_path_, " not initialized"));
196     }
197 
198     ICING_RETURN_IF_ERROR(PersistStoragesToDisk());
199     ICING_RETURN_IF_ERROR(UpdateChecksums());
200     ICING_RETURN_IF_ERROR(PersistMetadataToDisk());
201     return libtextclassifier3::Status::OK;
202   }
203 
204   // Updates checksums of all components and returns the overall crc (all_crc)
205   // of the persistent storage.
206   //
207   // Returns:
208   //   - Overall crc of the persistent storage on success
209   //   - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized
210   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
211   //     on actual implementation
UpdateChecksums()212   libtextclassifier3::StatusOr<Crc32> UpdateChecksums() {
213     if (!is_initialized_) {
214       return absl_ports::FailedPreconditionError(absl_ports::StrCat(
215           "PersistentStorage ", working_path_, " not initialized"));
216     }
217 
218     return UpdateChecksumsInternal();
219   }
220 
221  protected:
PersistentStorage(const Filesystem & filesystem,std::string working_path,WorkingPathType working_path_type)222   explicit PersistentStorage(const Filesystem& filesystem,
223                              std::string working_path,
224                              WorkingPathType working_path_type)
225       : filesystem_(filesystem),
226         working_path_(std::move(working_path)),
227         working_path_type_(working_path_type),
228         is_initialized_(false) {}
229 
230   // Flushes contents of metadata. The implementation should flush Crcs and Info
231   // correctly, depending on whether they're using memory-mapped regions or
232   // concrete instances in the derived class.
233   //
234   // Returns:
235   //   - OK on success
236   //   - Any other errors, depending on actual implementation
237   virtual libtextclassifier3::Status PersistMetadataToDisk() = 0;
238 
239   // Flushes contents of all storages to underlying files.
240   //
241   // Returns:
242   //   - OK on success
243   //   - Any other errors, depending on actual implementation
244   virtual libtextclassifier3::Status PersistStoragesToDisk() = 0;
245 
246   // Computes and returns Info checksum.
247   //
248   // This function will be mainly called by UpdateChecksums.
249   //
250   // Returns:
251   //   - Crc of the Info on success
252   //   - Any other errors, depending on actual implementation
253   virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() = 0;
254 
255   // Computes and returns all storages checksum. If there are multiple storages,
256   // usually we XOR their checksums together to a single checksum.
257   //
258   // This function will be mainly called by UpdateChecksums.
259   //
260   // Returns:
261   //   - Crc of all storages on success
262   //   - Any other errors from depending on actual implementation
263   virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() = 0;
264 
265   // Returns the Crcs instance reference. The derived class can either own a
266   // concrete Crcs instance, or reinterpret_cast the memory-mapped region to
267   // Crcs reference. PersistMetadataToDisk should flush it to disk correctly.
268   virtual Crcs& crcs() = 0;
269   virtual const Crcs& crcs() const = 0;
270 
271   const Filesystem& filesystem_;  // Does not own
272   // Path to the storage. It can be a single file path or a directory path
273   // depending on the implementation of the derived class.
274   //
275   // Note that the derived storage class will take full ownership and of
276   // working_path_, including creation/deletion. It is the caller's
277   // responsibility to specify correct working path and avoid mixing different
278   // persistent storages together under the same path. Also the caller has the
279   // ownership for the parent directory of working_path_, and it is responsible
280   // for parent directory creation/deletion.
281   std::string working_path_;
282   WorkingPathType working_path_type_;
283 
284   bool is_initialized_;
285 
286  private:
287   // Updates checksums of all components and returns the overall crc (all_crc)
288   // of the persistent storage. Different from UpdateChecksums, it won't check
289   // if PersistentStorage is initialized or not.
290   //
291   // Returns:
292   //   - Overall crc of the persistent storage on success
293   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
294   //     on actual implementation
UpdateChecksumsInternal()295   libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal() {
296     Crcs& crcs_ref = crcs();
297     // Compute and update storages + info checksums.
298     ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum());
299     ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum());
300     crcs_ref.component_crcs.info_crc = info_crc.Get();
301     crcs_ref.component_crcs.storages_crc = storages_crc.Get();
302 
303     // Finally compute and update overall checksum.
304     crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get();
305     return Crc32(crcs_ref.all_crc);
306   }
307 
308   // Validates all checksums of the persistent storage.
309   //
310   // Returns:
311   //   - OK on success
312   //   - FAILED_PRECONDITION_ERROR if any checksum is incorrect.
313   //   - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending
314   //     on actual implementation
ValidateChecksums()315   libtextclassifier3::Status ValidateChecksums() {
316     const Crcs& crcs_ref = crcs();
317     if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) {
318       return absl_ports::FailedPreconditionError("Invalid all crc");
319     }
320 
321     ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum());
322     if (crcs_ref.component_crcs.info_crc != info_crc.Get()) {
323       return absl_ports::FailedPreconditionError("Invalid info crc");
324     }
325 
326     ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum());
327     if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) {
328       return absl_ports::FailedPreconditionError("Invalid storages crc");
329     }
330 
331     return libtextclassifier3::Status::OK;
332   }
333 };
334 
335 }  // namespace lib
336 }  // namespace icing
337 
338 #endif  // ICING_FILE_PERSISTENT_STORAGE_H_
339