1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/store/blob-store.h"
16
17 #include <fcntl.h>
18
19 #include <algorithm>
20 #include <array>
21 #include <cstdint>
22 #include <iterator>
23 #include <limits>
24 #include <memory>
25 #include <string>
26 #include <string_view>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31
32 #include "icing/text_classifier/lib3/utils/base/status.h"
33 #include "icing/text_classifier/lib3/utils/base/statusor.h"
34 #include "icing/absl_ports/canonical_errors.h"
35 #include "icing/absl_ports/str_cat.h"
36 #include "icing/file/constants.h"
37 #include "icing/file/filesystem.h"
38 #include "icing/file/portable-file-backed-proto-log.h"
39 #include "icing/proto/blob.pb.h"
40 #include "icing/proto/document.pb.h"
41 #include "icing/util/clock.h"
42 #include "icing/util/encode-util.h"
43 #include "icing/util/logging.h"
44 #include "icing/util/sha256.h"
45 #include "icing/util/status-macros.h"
46 #include "icing/util/status-util.h"
47
48 namespace icing {
49 namespace lib {
50
51 static constexpr std::string_view kBlobFileDir = "blob_files";
52 static constexpr std::string_view kBlobInfoProtoLogFileName =
53 "blob_info_proto_file";
54 static constexpr int32_t kSha256LengthBytes = 32;
55 static constexpr int32_t kReadBufferSize = 8192;
56
57 namespace {
58
59 using ::icing::lib::status_util::TransformStatus;
60
MakeBlobInfoProtoLogFileName(const std::string & base_dir)61 std::string MakeBlobInfoProtoLogFileName(const std::string& base_dir) {
62 return absl_ports::StrCat(base_dir, "/", kBlobInfoProtoLogFileName);
63 }
64
MakeBlobFileDir(const std::string & base_dir)65 std::string MakeBlobFileDir(const std::string& base_dir) {
66 return absl_ports::StrCat(base_dir, "/", kBlobFileDir);
67 }
68
MakeBlobFilePath(const std::string & base_dir,int64_t creation_time_ms)69 std::string MakeBlobFilePath(const std::string& base_dir,
70 int64_t creation_time_ms) {
71 return absl_ports::StrCat(MakeBlobFileDir(base_dir), "/",
72 std::to_string(creation_time_ms));
73 }
74
ValidateBlobHandle(const PropertyProto::BlobHandleProto & blob_handle)75 libtextclassifier3::Status ValidateBlobHandle(
76 const PropertyProto::BlobHandleProto& blob_handle) {
77 if (blob_handle.digest().size() != kSha256LengthBytes) {
78 return absl_ports::InvalidArgumentError(
79 "Invalid blob handle. The digest is not sha 256 digest.");
80 }
81 if (blob_handle.namespace_().empty()) {
82 return absl_ports::InvalidArgumentError(
83 "Invalid blob handle. The namespace is empty.");
84 }
85 return libtextclassifier3::Status::OK;
86 }
87
88 libtextclassifier3::StatusOr<std::unordered_map<std::string, int32_t>>
LoadBlobHandleToOffsetMapper(PortableFileBackedProtoLog<BlobInfoProto> * blob_info_log)89 LoadBlobHandleToOffsetMapper(
90 PortableFileBackedProtoLog<BlobInfoProto>* blob_info_log) {
91 std::unordered_map<std::string, int32_t> blob_handle_to_offset;
92 auto itr = blob_info_log->GetIterator();
93 while (itr.Advance().ok()) {
94 auto blob_info_proto_or = blob_info_log->ReadProto(itr.GetOffset());
95 if (!blob_info_proto_or.ok()) {
96 if (absl_ports::IsNotFound(blob_info_proto_or.status())) {
97 // Skip erased proto.
98 continue;
99 }
100
101 // Return real error.
102 return std::move(blob_info_proto_or).status();
103 }
104 BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
105
106 std::string blob_handle_str =
107 BlobStore::BuildBlobHandleStr(blob_info_proto.blob_handle());
108 blob_handle_to_offset.insert({std::move(blob_handle_str), itr.GetOffset()});
109 }
110 return blob_handle_to_offset;
111 }
112
CreateBlobProtoFromError(const libtextclassifier3::Status & status)113 BlobProto CreateBlobProtoFromError(const libtextclassifier3::Status& status) {
114 BlobProto blob_proto;
115 TransformStatus(status, blob_proto.mutable_status());
116 return blob_proto;
117 }
118
CreateBlobProtoFromFilename(std::string filename)119 BlobProto CreateBlobProtoFromFilename(std::string filename) {
120 BlobProto blob_proto;
121 blob_proto.mutable_status()->set_code(StatusProto::OK);
122 blob_proto.set_file_name(std::move(filename));
123 return blob_proto;
124 }
125
CreateBlobProtoFromFileDescriptor(int file_descriptor)126 BlobProto CreateBlobProtoFromFileDescriptor(int file_descriptor) {
127 BlobProto blob_proto;
128 blob_proto.mutable_status()->set_code(StatusProto::OK);
129 blob_proto.set_file_descriptor(file_descriptor);
130 return blob_proto;
131 }
132
133 } // namespace
134
BuildBlobHandleStr(const PropertyProto::BlobHandleProto & blob_handle)135 /* static */ std::string BlobStore::BuildBlobHandleStr(
136 const PropertyProto::BlobHandleProto& blob_handle) {
137 return encode_util::EncodeStringToCString(blob_handle.digest() +
138 blob_handle.namespace_());
139 }
140
Create(const Filesystem * filesystem,std::string base_dir,const Clock * clock,int64_t orphan_blob_time_to_live_ms,int32_t compression_level,bool manage_blob_files)141 libtextclassifier3::StatusOr<BlobStore> BlobStore::Create(
142 const Filesystem* filesystem, std::string base_dir, const Clock* clock,
143 int64_t orphan_blob_time_to_live_ms, int32_t compression_level,
144 bool manage_blob_files) {
145 ICING_RETURN_ERROR_IF_NULL(filesystem);
146 ICING_RETURN_ERROR_IF_NULL(clock);
147
148 // Make sure the blob file directory exists.
149 if (!filesystem->CreateDirectoryRecursively(
150 MakeBlobFileDir(base_dir).c_str())) {
151 return absl_ports::InternalError(
152 absl_ports::StrCat("Could not create blob file directory."));
153 }
154
155 // Load existing file names (excluding the directory of key mapper).
156 std::vector<std::string> file_names;
157 if (!filesystem->ListDirectory(MakeBlobFileDir(base_dir).c_str(),
158 &file_names)) {
159 return absl_ports::InternalError("Failed to list directory.");
160 }
161 std::unordered_set<std::string> known_file_names(
162 std::make_move_iterator(file_names.begin()),
163 std::make_move_iterator(file_names.end()));
164 if (orphan_blob_time_to_live_ms <= 0) {
165 orphan_blob_time_to_live_ms = std::numeric_limits<int64_t>::max();
166 }
167
168 std::string blob_info_proto_file_name =
169 MakeBlobInfoProtoLogFileName(base_dir);
170
171 ICING_ASSIGN_OR_RETURN(
172 PortableFileBackedProtoLog<BlobInfoProto>::CreateResult log_create_result,
173 PortableFileBackedProtoLog<BlobInfoProto>::Create(
174 filesystem, blob_info_proto_file_name,
175 PortableFileBackedProtoLog<BlobInfoProto>::Options(
176 /*compress_in=*/true, constants::kMaxProtoSize,
177 compression_level)));
178
179 std::unordered_map<std::string, int> blob_handle_to_offset;
180 ICING_ASSIGN_OR_RETURN(
181 blob_handle_to_offset,
182 LoadBlobHandleToOffsetMapper(log_create_result.proto_log.get()));
183
184 return BlobStore(filesystem, std::move(base_dir), clock,
185 orphan_blob_time_to_live_ms, compression_level,
186 manage_blob_files, std::move(log_create_result.proto_log),
187 std::move(blob_handle_to_offset),
188 std::move(known_file_names));
189 }
190
OpenWrite(const PropertyProto::BlobHandleProto & blob_handle)191 BlobProto BlobStore::OpenWrite(
192 const PropertyProto::BlobHandleProto& blob_handle) {
193 ICING_RETURN_EXPRESSION_IF_ERROR(ValidateBlobHandle(blob_handle),
194 CreateBlobProtoFromError(_));
195 std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
196
197 auto blob_info_itr = blob_handle_to_offset_.find(blob_handle_str);
198 if (blob_info_itr != blob_handle_to_offset_.end()) {
199 ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info,
200 blob_info_log_->ReadProto(blob_info_itr->second),
201 CreateBlobProtoFromError(_));
202 if (blob_info.is_committed()) {
203 // The blob is already committed, return error.
204 return CreateBlobProtoFromError(
205 absl_ports::AlreadyExistsError(absl_ports::StrCat(
206 "Rewriting the committed blob is not allowed for blob handle: ",
207 blob_handle.digest())));
208 }
209 }
210
211 // Create a new blob info and blob file.
212 ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info,
213 GetOrCreateBlobInfo(blob_handle_str, blob_handle),
214 CreateBlobProtoFromError(_));
215
216 if (!manage_blob_files_) {
217 return CreateBlobProtoFromFilename(
218 std::to_string(blob_info.creation_time_ms()));
219 }
220
221 std::string file_path =
222 MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
223 int file_descriptor = filesystem_.OpenForWrite(file_path.c_str());
224 if (file_descriptor < 0) {
225 return CreateBlobProtoFromError(
226 absl_ports::InternalError(absl_ports::StrCat(
227 "Failed to open blob file for handle: ", blob_handle.digest())));
228 }
229 return CreateBlobProtoFromFileDescriptor(file_descriptor);
230 }
231
RemoveBlob(const PropertyProto::BlobHandleProto & blob_handle)232 BlobProto BlobStore::RemoveBlob(
233 const PropertyProto::BlobHandleProto& blob_handle) {
234 ICING_RETURN_EXPRESSION_IF_ERROR(ValidateBlobHandle(blob_handle),
235 CreateBlobProtoFromError(_));
236 std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
237
238 auto blob_info_itr = blob_handle_to_offset_.find(blob_handle_str);
239 if (blob_info_itr == blob_handle_to_offset_.end()) {
240 return CreateBlobProtoFromError(
241 absl_ports::NotFoundError(absl_ports::StrCat(
242 "Cannot find the blob for handle: ", blob_handle.digest())));
243 }
244
245 int64_t blob_info_offset = blob_info_itr->second;
246 ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info,
247 blob_info_log_->ReadProto(blob_info_offset),
248 CreateBlobProtoFromError(_));
249
250 ICING_RETURN_EXPRESSION_IF_ERROR(blob_info_log_->EraseProto(blob_info_offset),
251 CreateBlobProtoFromError(_));
252 blob_handle_to_offset_.erase(blob_info_itr);
253 has_mutated_ = true;
254
255 if (!manage_blob_files_) {
256 return CreateBlobProtoFromFilename(
257 std::to_string(blob_info.creation_time_ms()));
258 }
259
260 std::string file_path =
261 MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
262 if (!filesystem_.DeleteFile(file_path.c_str())) {
263 return CreateBlobProtoFromError(
264 absl_ports::InternalError(absl_ports::StrCat(
265 "Failed to abandon blob file for handle: ", blob_handle.digest())));
266 }
267
268 BlobProto blob_proto;
269 blob_proto.mutable_status()->set_code(StatusProto::OK);
270 return blob_proto;
271 }
272
GetBlobInfo(const PropertyProto::BlobHandleProto & blob_handle) const273 libtextclassifier3::StatusOr<BlobInfoProto> BlobStore::GetBlobInfo(
274 const PropertyProto::BlobHandleProto& blob_handle) const {
275 ICING_RETURN_IF_ERROR(ValidateBlobHandle(blob_handle));
276 std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
277 auto itr = blob_handle_to_offset_.find(blob_handle_str);
278 if (itr == blob_handle_to_offset_.end()) {
279 return absl_ports::NotFoundError(absl_ports::StrCat(
280 "Cannot find the blob for handle: ", blob_handle.digest()));
281 }
282 return blob_info_log_->ReadProto(itr->second);
283 }
284
OpenRead(const PropertyProto::BlobHandleProto & blob_handle) const285 BlobProto BlobStore::OpenRead(
286 const PropertyProto::BlobHandleProto& blob_handle) const {
287 ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info, GetBlobInfo(blob_handle),
288 CreateBlobProtoFromError(_));
289 if (!blob_info.is_committed()) {
290 // The blob is not committed, return error.
291 return CreateBlobProtoFromError(
292 absl_ports::NotFoundError(absl_ports::StrCat(
293 "Cannot find the blob for handle: ", blob_handle.digest())));
294 }
295
296 if (!manage_blob_files_) {
297 return CreateBlobProtoFromFilename(
298 std::to_string(blob_info.creation_time_ms()));
299 }
300
301 std::string file_path =
302 MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
303 int file_descriptor = filesystem_.OpenForRead(file_path.c_str());
304 if (file_descriptor < 0) {
305 return CreateBlobProtoFromError(
306 absl_ports::InternalError(absl_ports::StrCat(
307 "Failed to open blob file for handle: ", blob_handle.digest())));
308 }
309 return CreateBlobProtoFromFileDescriptor(file_descriptor);
310 }
311
CommitBlobMetadata(const PropertyProto::BlobHandleProto & blob_handle)312 libtextclassifier3::Status BlobStore::CommitBlobMetadata(
313 const PropertyProto::BlobHandleProto& blob_handle) {
314 ICING_RETURN_IF_ERROR(ValidateBlobHandle(blob_handle));
315
316 std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
317
318 auto pending_blob_info_itr = blob_handle_to_offset_.find(blob_handle_str);
319 if (pending_blob_info_itr == blob_handle_to_offset_.end()) {
320 return absl_ports::NotFoundError(absl_ports::StrCat(
321 "Cannot find the blob for handle: ", blob_handle.digest()));
322 }
323 int64_t pending_blob_info_offset = pending_blob_info_itr->second;
324
325 ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info_proto,
326 blob_info_log_->ReadProto(pending_blob_info_offset));
327
328 // Check if the blob is already committed.
329 if (blob_info_proto.is_committed()) {
330 return absl_ports::AlreadyExistsError(absl_ports::StrCat(
331 "The blob is already committed for handle: ", blob_handle.digest()));
332 }
333
334 // Update the blob info proto to committed.
335 ICING_RETURN_IF_ERROR(blob_info_log_->EraseProto(pending_blob_info_offset));
336 has_mutated_ = true;
337 blob_info_proto.set_is_committed(true);
338 auto blob_info_offset_or = blob_info_log_->WriteProto(blob_info_proto);
339 if (!blob_info_offset_or.ok()) {
340 ICING_LOG(ERROR) << blob_info_offset_or.status().error_message()
341 << "Failed to write blob info";
342 return blob_info_offset_or.status();
343 }
344 blob_handle_to_offset_[blob_handle_str] = blob_info_offset_or.ValueOrDie();
345 return libtextclassifier3::Status::OK;
346 }
347
CommitBlob(const PropertyProto::BlobHandleProto & blob_handle)348 BlobProto BlobStore::CommitBlob(
349 const PropertyProto::BlobHandleProto& blob_handle) {
350 BlobProto blob_proto;
351 blob_proto.mutable_status()->set_code(StatusProto::OK);
352
353 if (!manage_blob_files_) {
354 ICING_RETURN_EXPRESSION_IF_ERROR(CommitBlobMetadata(blob_handle),
355 CreateBlobProtoFromError(_))
356 return blob_proto;
357 }
358
359 ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info, GetBlobInfo(blob_handle),
360 CreateBlobProtoFromError(_));
361 std::string file_path =
362 MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
363 // Read the file and verify the digest.
364 Sha256 sha256;
365 {
366 ScopedFd sfd(filesystem_.OpenForRead(file_path.c_str()));
367 if (!sfd.is_valid()) {
368 return CreateBlobProtoFromError(
369 absl_ports::InternalError(absl_ports::StrCat(
370 "Failed to open blob file for handle: ", blob_handle.digest())));
371 }
372
373 int64_t file_size = filesystem_.GetFileSize(sfd.get());
374 if (file_size == Filesystem::kBadFileSize) {
375 return CreateBlobProtoFromError(
376 absl_ports::InternalError(absl_ports::StrCat(
377 "Failed to get file size for handle: ", blob_handle.digest())));
378 }
379
380 // Read 8 KiB per iteration
381 int64_t prev_total_read_size = 0;
382 uint8_t buffer[kReadBufferSize];
383 while (prev_total_read_size < file_size) {
384 int32_t size_to_read =
385 std::min<int32_t>(kReadBufferSize, file_size - prev_total_read_size);
386 if (!filesystem_.Read(sfd.get(), buffer, size_to_read)) {
387 return CreateBlobProtoFromError(absl_ports::InternalError(
388 absl_ports::StrCat("Failed to read blob file for handle: ",
389 blob_handle.digest())));
390 }
391
392 sha256.Update(buffer, size_to_read);
393 prev_total_read_size += size_to_read;
394 }
395 }
396
397 std::array<uint8_t, 32> hash = std::move(sha256).Finalize();
398 const std::string& digest = blob_handle.digest();
399
400 if (digest.length() != hash.size() ||
401 digest.compare(0, digest.length(),
402 reinterpret_cast<const char*>(hash.data()),
403 hash.size()) != 0) {
404 // The blob content doesn't match to the digest. Delete this corrupted blob.
405 BlobProto remove_blob_result = RemoveBlob(blob_handle);
406 if (remove_blob_result.status().code() != StatusProto::OK) {
407 return remove_blob_result;
408 }
409 return CreateBlobProtoFromError(absl_ports::InvalidArgumentError(
410 "The blob content doesn't match to the digest."));
411 }
412 // Mark the blob as committed.
413 ICING_RETURN_EXPRESSION_IF_ERROR(CommitBlobMetadata(blob_handle),
414 CreateBlobProtoFromError(_));
415 return blob_proto;
416 }
417
PersistToDisk()418 libtextclassifier3::Status BlobStore::PersistToDisk() {
419 if (has_mutated_) {
420 ICING_RETURN_IF_ERROR(blob_info_log_->PersistToDisk());
421 has_mutated_ = false;
422 }
423 return libtextclassifier3::Status::OK;
424 }
425
GetOrCreateBlobInfo(const std::string & blob_handle_str,const PropertyProto::BlobHandleProto & blob_handle)426 libtextclassifier3::StatusOr<BlobInfoProto> BlobStore::GetOrCreateBlobInfo(
427 const std::string& blob_handle_str,
428 const PropertyProto::BlobHandleProto& blob_handle) {
429 auto itr = blob_handle_to_offset_.find(blob_handle_str);
430 if (itr != blob_handle_to_offset_.end()) {
431 return blob_info_log_->ReadProto(itr->second);
432 }
433
434 // Create a new blob info, we are using creation time as the unique file
435 // name.
436 int64_t timestamp = clock_.GetSystemTimeMilliseconds();
437 std::string file_name = std::to_string(timestamp);
438 while (known_file_names_.find(file_name) != known_file_names_.end()) {
439 ++timestamp;
440 file_name = std::to_string(timestamp);
441 }
442 known_file_names_.insert(file_name);
443
444 BlobInfoProto blob_info_proto;
445 *blob_info_proto.mutable_blob_handle() = blob_handle;
446 blob_info_proto.set_creation_time_ms(timestamp);
447 blob_info_proto.set_is_committed(false);
448
449 auto blob_info_offset_or = blob_info_log_->WriteProto(blob_info_proto);
450 if (!blob_info_offset_or.ok()) {
451 ICING_LOG(ERROR) << blob_info_offset_or.status().error_message()
452 << "Failed to write blob info";
453 return blob_info_offset_or.status();
454 }
455
456 has_mutated_ = true;
457 blob_handle_to_offset_[blob_handle_str] = blob_info_offset_or.ValueOrDie();
458
459 return blob_info_proto;
460 }
461
462 std::unordered_set<std::string>
GetPotentiallyOptimizableBlobHandles() const463 BlobStore::GetPotentiallyOptimizableBlobHandles() const {
464 int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
465 if (orphan_blob_time_to_live_ms_ > current_time_ms) {
466 // Nothing to optimize, return empty set.
467 return std::unordered_set<std::string>();
468 }
469 int64_t expired_threshold = current_time_ms - orphan_blob_time_to_live_ms_;
470 std::unordered_set<std::string> expired_blob_handles;
471 auto itr = blob_info_log_->GetIterator();
472 while (itr.Advance().ok()) {
473 auto blob_info_proto_or = blob_info_log_->ReadProto(itr.GetOffset());
474 if (!blob_info_proto_or.ok()) {
475 continue;
476 }
477 BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
478 if (blob_info_proto.creation_time_ms() < expired_threshold) {
479 expired_blob_handles.insert(
480 BuildBlobHandleStr(blob_info_proto.blob_handle()));
481 }
482 }
483 return expired_blob_handles;
484 }
485
Optimize(const std::unordered_set<std::string> & dead_blob_handles)486 libtextclassifier3::StatusOr<std::vector<std::string>> BlobStore::Optimize(
487 const std::unordered_set<std::string>& dead_blob_handles) {
488 std::vector<std::string> blob_file_names_to_remove;
489 blob_file_names_to_remove.reserve(dead_blob_handles.size());
490
491 // Create the temp blob info log file.
492 std::string temp_blob_info_proto_file_name =
493 absl_ports::StrCat(MakeBlobInfoProtoLogFileName(base_dir_), "_temp");
494 if (!filesystem_.DeleteFile(temp_blob_info_proto_file_name.c_str())) {
495 return absl_ports::InternalError(
496 "Unable to delete temp file to prepare to build new blob proto file.");
497 }
498
499 ICING_ASSIGN_OR_RETURN(PortableFileBackedProtoLog<BlobInfoProto>::CreateResult
500 temp_log_create_result,
501 PortableFileBackedProtoLog<BlobInfoProto>::Create(
502 &filesystem_, temp_blob_info_proto_file_name,
503 PortableFileBackedProtoLog<BlobInfoProto>::Options(
504 /*compress_in=*/true, constants::kMaxProtoSize,
505 compression_level_)));
506 std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> new_blob_info_log =
507 std::move(temp_log_create_result.proto_log);
508
509 auto itr = blob_info_log_->GetIterator();
510 std::unordered_map<std::string, int32_t> new_blob_handle_to_offset;
511 while (itr.Advance().ok()) {
512 auto blob_info_proto_or = blob_info_log_->ReadProto(itr.GetOffset());
513 if (!blob_info_proto_or.ok()) {
514 if (absl_ports::IsNotFound(blob_info_proto_or.status())) {
515 // Skip erased proto.
516 continue;
517 }
518
519 // Return real error.
520 return std::move(blob_info_proto_or).status();
521 }
522 BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
523 std::string blob_handle_str =
524 BuildBlobHandleStr(blob_info_proto.blob_handle());
525 if (dead_blob_handles.find(blob_handle_str) != dead_blob_handles.end()) {
526 // Delete all dead blob files.
527
528 if (manage_blob_files_) {
529 std::string file_path =
530 MakeBlobFilePath(base_dir_, blob_info_proto.creation_time_ms());
531 if (!filesystem_.DeleteFile(file_path.c_str())) {
532 return absl_ports::InternalError(
533 absl_ports::StrCat("Failed to delete blob file: ", file_path));
534 }
535 } else {
536 blob_file_names_to_remove.push_back(
537 std::to_string(blob_info_proto.creation_time_ms()));
538 }
539 } else {
540 // Write the alive blob info to the new blob info log file.
541 ICING_ASSIGN_OR_RETURN(int32_t new_offset,
542 new_blob_info_log->WriteProto(blob_info_proto));
543 new_blob_handle_to_offset[blob_handle_str] = new_offset;
544 }
545 }
546 new_blob_info_log->PersistToDisk();
547 new_blob_info_log.reset();
548 blob_info_log_.reset();
549 std::string old_blob_info_proto_file_name =
550 MakeBlobInfoProtoLogFileName(base_dir_);
551 // Then we swap the new key mapper directory with the old one.
552 if (!filesystem_.SwapFiles(old_blob_info_proto_file_name.c_str(),
553 temp_blob_info_proto_file_name.c_str())) {
554 return absl_ports::InternalError(
555 "Unable to apply new blob store due to failed swap!");
556 }
557
558 // Delete the temp file, don't need to throw error if it fails, it will be
559 // deleted in the next run.
560 filesystem_.DeleteFile(temp_blob_info_proto_file_name.c_str());
561
562 ICING_ASSIGN_OR_RETURN(
563 PortableFileBackedProtoLog<BlobInfoProto>::CreateResult log_create_result,
564 PortableFileBackedProtoLog<BlobInfoProto>::Create(
565 &filesystem_, old_blob_info_proto_file_name,
566 PortableFileBackedProtoLog<BlobInfoProto>::Options(
567 /*compress_in=*/true, constants::kMaxProtoSize,
568 compression_level_)));
569 blob_info_log_ = std::move(log_create_result.proto_log);
570 blob_handle_to_offset_ = std::move(new_blob_handle_to_offset);
571 return blob_file_names_to_remove;
572 }
573
574 libtextclassifier3::StatusOr<std::vector<NamespaceBlobStorageInfoProto>>
GetStorageInfo() const575 BlobStore::GetStorageInfo() const {
576 // Get the file size of each namespace offset.
577 std::unordered_map<std::string, NamespaceBlobStorageInfoProto>
578 namespace_to_storage_info;
579 auto itr = blob_info_log_->GetIterator();
580 while (itr.Advance().ok()) {
581 auto blob_info_proto_or = blob_info_log_->ReadProto(itr.GetOffset());
582 if (!blob_info_proto_or.ok()) {
583 if (absl_ports::IsNotFound(blob_info_proto_or.status())) {
584 // Skip erased proto.
585 continue;
586 }
587
588 // Return real error.
589 return std::move(blob_info_proto_or).status();
590 }
591 BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
592
593 std::string file_path =
594 MakeBlobFilePath(base_dir_, blob_info_proto.creation_time_ms());
595 std::string name_space = blob_info_proto.blob_handle().namespace_();
596 NamespaceBlobStorageInfoProto& namespace_blob_storage_info =
597 namespace_to_storage_info[name_space];
598 namespace_blob_storage_info.set_namespace_(name_space);
599
600 if (manage_blob_files_) {
601 int64_t file_size = filesystem_.GetFileSize(file_path.c_str());
602 if (file_size == Filesystem::kBadFileSize) {
603 ICING_LOG(WARNING) << "Bad file size for blob file: " << file_path;
604 continue;
605 }
606 namespace_blob_storage_info.set_blob_size(
607 namespace_blob_storage_info.blob_size() + file_size);
608 namespace_blob_storage_info.set_num_blobs(
609 namespace_blob_storage_info.num_blobs() + 1);
610 } else {
611 namespace_blob_storage_info.add_blob_file_names(
612 std::to_string(blob_info_proto.creation_time_ms()));
613 }
614 }
615
616 // Create the namespace blob storage info for each namespace.
617 std::vector<NamespaceBlobStorageInfoProto> namespace_blob_storage_infos;
618 namespace_blob_storage_infos.reserve(namespace_to_storage_info.size());
619 for (const auto& [_, namespace_blob_storage_info] :
620 namespace_to_storage_info) {
621 namespace_blob_storage_infos.push_back(
622 std::move(namespace_blob_storage_info));
623 }
624
625 return namespace_blob_storage_infos;
626 }
627
628 } // namespace lib
629 } // namespace icing
630