• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_STORE_BLOB_STORE_H_
16 #define ICING_STORE_BLOB_STORE_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <unordered_map>
22 #include <unordered_set>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/file/filesystem.h"
29 #include "icing/file/portable-file-backed-proto-log.h"
30 #include "icing/proto/blob.pb.h"
31 #include "icing/proto/document.pb.h"
32 #include "icing/proto/storage.pb.h"
33 #include "icing/util/clock.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 // Provides storage interfaces for Blobs.
39 //
40 // The BlobStore is responsible for storing blobs in a directory and for
41 // ensuring that the directory is in a consistent state.
42 //
43 // A blob is a file that is stored in the BlobStore. A blob is identified by
44 // a blob handle, which is a unique identifier for the blob.
45 //
46 // Any blob that is written to the BlobStore must be committed before it can be
47 // read. A blob can be committed only once. After a blob is committed, it is
48 // not allowed to be updated.
49 //
50 // The BlobStore is not thread-safe.
51 class BlobStore {
52  public:
53   // Builds a string representation of a blob handle.
54   // The string is used as the key in the key mapper.
55   static std::string BuildBlobHandleStr(
56       const PropertyProto::BlobHandleProto& blob_handle);
57 
58   // Factory function to create a BlobStore instance. The base directory is
59   // used to persist blobs. If a blob store was previously created with
60   // this directory, it will reload the files saved by the last instance.
61   //
62   // The callers must create the base directory before calling this function.
63   //
64   // Returns:
65   //   A BlobStore on success
66   //   FAILED_PRECONDITION_ERROR on any null pointer input
67   //   INTERNAL_ERROR on I/O error
68   static libtextclassifier3::StatusOr<BlobStore> Create(
69       const Filesystem* filesystem, std::string base_dir, const Clock* clock,
70       int64_t orphan_blob_time_to_live_ms, int32_t compression_level,
71       bool manage_blob_files);
72 
73   // Gets or creates a file for write only purpose for the given blob handle.
74   // To mark the blob is completed written, CommitBlob must be called. Once
75   // CommitBlob is called, the blob is sealed and rewrite is not allowed.
76   //
77   // If Icing does not manage blob files, this method only creates necessary
78   // metadata for the blob but does not open or manage the file descriptor. The
79   // caller is responsible for opening, writing to, and closing the file using
80   // the returned file name.
81   //
82   // Otherwise, a file descriptor is returned, and it is the user's
83   // responsibility to close the file descriptor after writing is done and
84   // should not operate on the file descriptor after commit or remove it.
85   //
86   // Returns:
87   //   OK with results on success
88   //   InvalidArgumentError on invalid blob handle
89   //   FailedPreconditionError if the blob is already opened for write
90   //   AlreadyExistsError if the blob is already committed
91   //   InternalError on IO error
92   BlobProto OpenWrite(const PropertyProto::BlobHandleProto& blob_handle);
93 
94   // Removes a blob file and blob handle from the blob store.
95   //
96   // This will remove the blob on any state. No matter it's committed or not or
97   // it has reference document links or not.
98   //
99   // If Icing does not manage blob files, this method only removes the metadata
100   // entry from the blob store, but does not delete the actual blob file. The
101   // caller is responsible for deleting the blob file.
102   //
103   // Returns:
104   //   OK with results on success
105   //   InvalidArgumentError on invalid blob handle
106   //   NotFoundError if the blob is not found
107   //   InternalError on IO error
108   BlobProto RemoveBlob(const PropertyProto::BlobHandleProto& blob_handle);
109 
110   // Gets a file for read only purpose for the given blob handle.
111   // The blob must be committed by calling CommitBlob otherwise it is not
112   // accessible.
113   //
114   // If Icing does not manage blob files, this method only returns the file name
115   // associated with the blob but does not open or manage the file descriptor.
116   // The caller is responsible for opening, reading from, and closing the file
117   // using the returned file name.
118   //
119   // Otherwise, a file descriptor is returned, and it is the user's
120   // responsibility to close the file descriptor after reading.
121   //
122   // Returns:
123   //   OK with results on success
124   //   InvalidArgumentError on invalid blob handle
125   //   NotFoundError if the blob is not found or is not committed
126   BlobProto OpenRead(const PropertyProto::BlobHandleProto& blob_handle) const;
127 
128   // Commits the given blob when writing of the blob via OpenWrite is complete.
129   // Before the blob is committed, it is not visible to any reader
130   // via OpenRead. After the blob is committed, it is not allowed to rewrite or
131   // update the content.
132   //
133   // If Icing does not manage blob files, this method marks the blob as
134   // committed in the metadata store. The caller is responsible for verifying
135   // the digest of the blob file.
136   //
137   // Returns:
138   //   OK on success
139   //   AlreadyExistsError if the blob is already committed
140   //   InvalidArgumentError on invalid blob handle or if the digest is mismatch
141   //     with file content
142   //   NotFoundError if the blob is not found
143   BlobProto CommitBlob(const PropertyProto::BlobHandleProto& blob_handle);
144 
145   // Persists the blobs to disk.
146   libtextclassifier3::Status PersistToDisk();
147 
148   // Gets the potentially optimizable blob handles.
149   //
150   // A blob will be consider as a potentially optimizable blob if it created
151   // before the orphan_blob_time_to_live_ms. And the blob should be removed if
152   // it has no reference document links to it.
153   std::unordered_set<std::string> GetPotentiallyOptimizableBlobHandles() const;
154 
155   // Optimize the blob store and remove dead blob files.
156   //
157   // A blob will be consider as a dead blob and removed if it meets BOTH of
158   // following conditions
159   //  1: has no reference document links to it
160   //  2: It's mature.
161   //
162   // Returns:
163   //   The list of expired blob file names to be removed on success. If Icing
164   //   manages blob files, this list will be empty.
165   //   INTERNAL_ERROR on IO error
166   libtextclassifier3::StatusOr<std::vector<std::string>> Optimize(
167       const std::unordered_set<std::string>& dead_blob_handles);
168 
169   // Calculates the StorageInfo for the Blob Store.
170   //
171   // Returns:
172   //   Vector of NamespaceBlobStorageInfoProto contains size of each namespace.
173   //   INTERNAL_ERROR on I/O error
174   libtextclassifier3::StatusOr<std::vector<NamespaceBlobStorageInfoProto>>
175   GetStorageInfo() const;
176 
177  private:
BlobStore(const Filesystem * filesystem,std::string base_dir,const Clock * clock,int64_t orphan_blob_time_to_live_ms,int32_t compression_level,bool manage_blob_files,std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log,std::unordered_map<std::string,int32_t> blob_handle_to_offset,std::unordered_set<std::string> known_file_names)178   explicit BlobStore(
179       const Filesystem* filesystem, std::string base_dir, const Clock* clock,
180       int64_t orphan_blob_time_to_live_ms, int32_t compression_level,
181       bool manage_blob_files,
182       std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log,
183       std::unordered_map<std::string, int32_t> blob_handle_to_offset,
184       std::unordered_set<std::string> known_file_names)
185       : filesystem_(*filesystem),
186         base_dir_(std::move(base_dir)),
187         clock_(*clock),
188         orphan_blob_time_to_live_ms_(orphan_blob_time_to_live_ms),
189         compression_level_(compression_level),
190         manage_blob_files_(manage_blob_files),
191         blob_info_log_(std::move(blob_info_log)),
192         blob_handle_to_offset_(std::move(blob_handle_to_offset)),
193         known_file_names_(std::move(known_file_names)) {}
194 
195   libtextclassifier3::StatusOr<BlobInfoProto> GetBlobInfo(
196       const PropertyProto::BlobHandleProto& blob_handle) const;
197 
198   libtextclassifier3::StatusOr<BlobInfoProto> GetOrCreateBlobInfo(
199       const std::string& blob_handle_str,
200       const PropertyProto::BlobHandleProto& blob_handle);
201 
202   libtextclassifier3::Status CommitBlobMetadata(
203       const PropertyProto::BlobHandleProto& blob_handle);
204 
205   const Filesystem& filesystem_;
206   std::string base_dir_;
207   const Clock& clock_;
208   int64_t orphan_blob_time_to_live_ms_;
209   int32_t compression_level_;
210   bool manage_blob_files_;
211 
212   // The ground truth blob info log file, which is used to read/write/erase
213   // BlobInfoProto.
214   std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log_;
215 
216   // The map for BlobHandle string to the offset of BlobInfoProto in the
217   // BlobInfoProto log file.
218   // The keys are the Encoded CString from BlobHandleProto.
219   std::unordered_map<std::string, int32_t> blob_handle_to_offset_;
220 
221   // The set of used file names to store blobs in the blob store.
222   std::unordered_set<std::string> known_file_names_;
223 
224   bool has_mutated_ = false;
225 };
226 
227 }  // namespace lib
228 }  // namespace icing
229 
230 #endif  // ICING_STORE_BLOB_STORE_H_
231