1 // Copyright (C) 2024 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_STORE_BLOB_STORE_H_ 16 #define ICING_STORE_BLOB_STORE_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <string> 21 #include <unordered_map> 22 #include <unordered_set> 23 #include <utility> 24 #include <vector> 25 26 #include "icing/text_classifier/lib3/utils/base/status.h" 27 #include "icing/text_classifier/lib3/utils/base/statusor.h" 28 #include "icing/file/filesystem.h" 29 #include "icing/file/portable-file-backed-proto-log.h" 30 #include "icing/proto/blob.pb.h" 31 #include "icing/proto/document.pb.h" 32 #include "icing/proto/storage.pb.h" 33 #include "icing/util/clock.h" 34 35 namespace icing { 36 namespace lib { 37 38 // Provides storage interfaces for Blobs. 39 // 40 // The BlobStore is responsible for storing blobs in a directory and for 41 // ensuring that the directory is in a consistent state. 42 // 43 // A blob is a file that is stored in the BlobStore. A blob is identified by 44 // a blob handle, which is a unique identifier for the blob. 45 // 46 // Any blob that is written to the BlobStore must be committed before it can be 47 // read. A blob can be committed only once. After a blob is committed, it is 48 // not allowed to be updated. 49 // 50 // The BlobStore is not thread-safe. 51 class BlobStore { 52 public: 53 // Builds a string representation of a blob handle. 54 // The string is used as the key in the key mapper. 55 static std::string BuildBlobHandleStr( 56 const PropertyProto::BlobHandleProto& blob_handle); 57 58 // Factory function to create a BlobStore instance. The base directory is 59 // used to persist blobs. If a blob store was previously created with 60 // this directory, it will reload the files saved by the last instance. 61 // 62 // The callers must create the base directory before calling this function. 63 // 64 // Returns: 65 // A BlobStore on success 66 // FAILED_PRECONDITION_ERROR on any null pointer input 67 // INTERNAL_ERROR on I/O error 68 static libtextclassifier3::StatusOr<BlobStore> Create( 69 const Filesystem* filesystem, std::string base_dir, const Clock* clock, 70 int64_t orphan_blob_time_to_live_ms, int32_t compression_level, 71 bool manage_blob_files); 72 73 // Gets or creates a file for write only purpose for the given blob handle. 74 // To mark the blob is completed written, CommitBlob must be called. Once 75 // CommitBlob is called, the blob is sealed and rewrite is not allowed. 76 // 77 // If Icing does not manage blob files, this method only creates necessary 78 // metadata for the blob but does not open or manage the file descriptor. The 79 // caller is responsible for opening, writing to, and closing the file using 80 // the returned file name. 81 // 82 // Otherwise, a file descriptor is returned, and it is the user's 83 // responsibility to close the file descriptor after writing is done and 84 // should not operate on the file descriptor after commit or remove it. 85 // 86 // Returns: 87 // OK with results on success 88 // InvalidArgumentError on invalid blob handle 89 // FailedPreconditionError if the blob is already opened for write 90 // AlreadyExistsError if the blob is already committed 91 // InternalError on IO error 92 BlobProto OpenWrite(const PropertyProto::BlobHandleProto& blob_handle); 93 94 // Removes a blob file and blob handle from the blob store. 95 // 96 // This will remove the blob on any state. No matter it's committed or not or 97 // it has reference document links or not. 98 // 99 // If Icing does not manage blob files, this method only removes the metadata 100 // entry from the blob store, but does not delete the actual blob file. The 101 // caller is responsible for deleting the blob file. 102 // 103 // Returns: 104 // OK with results on success 105 // InvalidArgumentError on invalid blob handle 106 // NotFoundError if the blob is not found 107 // InternalError on IO error 108 BlobProto RemoveBlob(const PropertyProto::BlobHandleProto& blob_handle); 109 110 // Gets a file for read only purpose for the given blob handle. 111 // The blob must be committed by calling CommitBlob otherwise it is not 112 // accessible. 113 // 114 // If Icing does not manage blob files, this method only returns the file name 115 // associated with the blob but does not open or manage the file descriptor. 116 // The caller is responsible for opening, reading from, and closing the file 117 // using the returned file name. 118 // 119 // Otherwise, a file descriptor is returned, and it is the user's 120 // responsibility to close the file descriptor after reading. 121 // 122 // Returns: 123 // OK with results on success 124 // InvalidArgumentError on invalid blob handle 125 // NotFoundError if the blob is not found or is not committed 126 BlobProto OpenRead(const PropertyProto::BlobHandleProto& blob_handle) const; 127 128 // Commits the given blob when writing of the blob via OpenWrite is complete. 129 // Before the blob is committed, it is not visible to any reader 130 // via OpenRead. After the blob is committed, it is not allowed to rewrite or 131 // update the content. 132 // 133 // If Icing does not manage blob files, this method marks the blob as 134 // committed in the metadata store. The caller is responsible for verifying 135 // the digest of the blob file. 136 // 137 // Returns: 138 // OK on success 139 // AlreadyExistsError if the blob is already committed 140 // InvalidArgumentError on invalid blob handle or if the digest is mismatch 141 // with file content 142 // NotFoundError if the blob is not found 143 BlobProto CommitBlob(const PropertyProto::BlobHandleProto& blob_handle); 144 145 // Persists the blobs to disk. 146 libtextclassifier3::Status PersistToDisk(); 147 148 // Gets the potentially optimizable blob handles. 149 // 150 // A blob will be consider as a potentially optimizable blob if it created 151 // before the orphan_blob_time_to_live_ms. And the blob should be removed if 152 // it has no reference document links to it. 153 std::unordered_set<std::string> GetPotentiallyOptimizableBlobHandles() const; 154 155 // Optimize the blob store and remove dead blob files. 156 // 157 // A blob will be consider as a dead blob and removed if it meets BOTH of 158 // following conditions 159 // 1: has no reference document links to it 160 // 2: It's mature. 161 // 162 // Returns: 163 // The list of expired blob file names to be removed on success. If Icing 164 // manages blob files, this list will be empty. 165 // INTERNAL_ERROR on IO error 166 libtextclassifier3::StatusOr<std::vector<std::string>> Optimize( 167 const std::unordered_set<std::string>& dead_blob_handles); 168 169 // Calculates the StorageInfo for the Blob Store. 170 // 171 // Returns: 172 // Vector of NamespaceBlobStorageInfoProto contains size of each namespace. 173 // INTERNAL_ERROR on I/O error 174 libtextclassifier3::StatusOr<std::vector<NamespaceBlobStorageInfoProto>> 175 GetStorageInfo() const; 176 177 private: BlobStore(const Filesystem * filesystem,std::string base_dir,const Clock * clock,int64_t orphan_blob_time_to_live_ms,int32_t compression_level,bool manage_blob_files,std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log,std::unordered_map<std::string,int32_t> blob_handle_to_offset,std::unordered_set<std::string> known_file_names)178 explicit BlobStore( 179 const Filesystem* filesystem, std::string base_dir, const Clock* clock, 180 int64_t orphan_blob_time_to_live_ms, int32_t compression_level, 181 bool manage_blob_files, 182 std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log, 183 std::unordered_map<std::string, int32_t> blob_handle_to_offset, 184 std::unordered_set<std::string> known_file_names) 185 : filesystem_(*filesystem), 186 base_dir_(std::move(base_dir)), 187 clock_(*clock), 188 orphan_blob_time_to_live_ms_(orphan_blob_time_to_live_ms), 189 compression_level_(compression_level), 190 manage_blob_files_(manage_blob_files), 191 blob_info_log_(std::move(blob_info_log)), 192 blob_handle_to_offset_(std::move(blob_handle_to_offset)), 193 known_file_names_(std::move(known_file_names)) {} 194 195 libtextclassifier3::StatusOr<BlobInfoProto> GetBlobInfo( 196 const PropertyProto::BlobHandleProto& blob_handle) const; 197 198 libtextclassifier3::StatusOr<BlobInfoProto> GetOrCreateBlobInfo( 199 const std::string& blob_handle_str, 200 const PropertyProto::BlobHandleProto& blob_handle); 201 202 libtextclassifier3::Status CommitBlobMetadata( 203 const PropertyProto::BlobHandleProto& blob_handle); 204 205 const Filesystem& filesystem_; 206 std::string base_dir_; 207 const Clock& clock_; 208 int64_t orphan_blob_time_to_live_ms_; 209 int32_t compression_level_; 210 bool manage_blob_files_; 211 212 // The ground truth blob info log file, which is used to read/write/erase 213 // BlobInfoProto. 214 std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> blob_info_log_; 215 216 // The map for BlobHandle string to the offset of BlobInfoProto in the 217 // BlobInfoProto log file. 218 // The keys are the Encoded CString from BlobHandleProto. 219 std::unordered_map<std::string, int32_t> blob_handle_to_offset_; 220 221 // The set of used file names to store blobs in the blob store. 222 std::unordered_set<std::string> known_file_names_; 223 224 bool has_mutated_ = false; 225 }; 226 227 } // namespace lib 228 } // namespace icing 229 230 #endif // ICING_STORE_BLOB_STORE_H_ 231