• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/store/blob-store.h"
16 
17 #include <fcntl.h>
18 
19 #include <algorithm>
20 #include <array>
21 #include <cstdint>
22 #include <iterator>
23 #include <limits>
24 #include <memory>
25 #include <string>
26 #include <string_view>
27 #include <unordered_map>
28 #include <unordered_set>
29 #include <utility>
30 #include <vector>
31 
32 #include "icing/text_classifier/lib3/utils/base/status.h"
33 #include "icing/text_classifier/lib3/utils/base/statusor.h"
34 #include "icing/absl_ports/canonical_errors.h"
35 #include "icing/absl_ports/str_cat.h"
36 #include "icing/file/constants.h"
37 #include "icing/file/filesystem.h"
38 #include "icing/file/portable-file-backed-proto-log.h"
39 #include "icing/proto/blob.pb.h"
40 #include "icing/proto/document.pb.h"
41 #include "icing/util/clock.h"
42 #include "icing/util/encode-util.h"
43 #include "icing/util/logging.h"
44 #include "icing/util/sha256.h"
45 #include "icing/util/status-macros.h"
46 #include "icing/util/status-util.h"
47 
48 namespace icing {
49 namespace lib {
50 
51 static constexpr std::string_view kBlobFileDir = "blob_files";
52 static constexpr std::string_view kBlobInfoProtoLogFileName =
53     "blob_info_proto_file";
54 static constexpr int32_t kSha256LengthBytes = 32;
55 static constexpr int32_t kReadBufferSize = 8192;
56 
57 namespace {
58 
59 using ::icing::lib::status_util::TransformStatus;
60 
MakeBlobInfoProtoLogFileName(const std::string & base_dir)61 std::string MakeBlobInfoProtoLogFileName(const std::string& base_dir) {
62   return absl_ports::StrCat(base_dir, "/", kBlobInfoProtoLogFileName);
63 }
64 
MakeBlobFileDir(const std::string & base_dir)65 std::string MakeBlobFileDir(const std::string& base_dir) {
66   return absl_ports::StrCat(base_dir, "/", kBlobFileDir);
67 }
68 
MakeBlobFilePath(const std::string & base_dir,int64_t creation_time_ms)69 std::string MakeBlobFilePath(const std::string& base_dir,
70                              int64_t creation_time_ms) {
71   return absl_ports::StrCat(MakeBlobFileDir(base_dir), "/",
72                             std::to_string(creation_time_ms));
73 }
74 
ValidateBlobHandle(const PropertyProto::BlobHandleProto & blob_handle)75 libtextclassifier3::Status ValidateBlobHandle(
76     const PropertyProto::BlobHandleProto& blob_handle) {
77   if (blob_handle.digest().size() != kSha256LengthBytes) {
78     return absl_ports::InvalidArgumentError(
79         "Invalid blob handle. The digest is not sha 256 digest.");
80   }
81   if (blob_handle.namespace_().empty()) {
82     return absl_ports::InvalidArgumentError(
83         "Invalid blob handle. The namespace is empty.");
84   }
85   return libtextclassifier3::Status::OK;
86 }
87 
88 libtextclassifier3::StatusOr<std::unordered_map<std::string, int32_t>>
LoadBlobHandleToOffsetMapper(PortableFileBackedProtoLog<BlobInfoProto> * blob_info_log)89 LoadBlobHandleToOffsetMapper(
90     PortableFileBackedProtoLog<BlobInfoProto>* blob_info_log) {
91   std::unordered_map<std::string, int32_t> blob_handle_to_offset;
92   auto itr = blob_info_log->GetIterator();
93   while (itr.Advance().ok()) {
94     auto blob_info_proto_or = blob_info_log->ReadProto(itr.GetOffset());
95     if (!blob_info_proto_or.ok()) {
96       if (absl_ports::IsNotFound(blob_info_proto_or.status())) {
97         // Skip erased proto.
98         continue;
99       }
100 
101       // Return real error.
102       return std::move(blob_info_proto_or).status();
103     }
104     BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
105 
106     std::string blob_handle_str =
107         BlobStore::BuildBlobHandleStr(blob_info_proto.blob_handle());
108     blob_handle_to_offset.insert({std::move(blob_handle_str), itr.GetOffset()});
109   }
110   return blob_handle_to_offset;
111 }
112 
CreateBlobProtoFromError(const libtextclassifier3::Status & status)113 BlobProto CreateBlobProtoFromError(const libtextclassifier3::Status& status) {
114   BlobProto blob_proto;
115   TransformStatus(status, blob_proto.mutable_status());
116   return blob_proto;
117 }
118 
CreateBlobProtoFromFilename(std::string filename)119 BlobProto CreateBlobProtoFromFilename(std::string filename) {
120   BlobProto blob_proto;
121   blob_proto.mutable_status()->set_code(StatusProto::OK);
122   blob_proto.set_file_name(std::move(filename));
123   return blob_proto;
124 }
125 
CreateBlobProtoFromFileDescriptor(int file_descriptor)126 BlobProto CreateBlobProtoFromFileDescriptor(int file_descriptor) {
127   BlobProto blob_proto;
128   blob_proto.mutable_status()->set_code(StatusProto::OK);
129   blob_proto.set_file_descriptor(file_descriptor);
130   return blob_proto;
131 }
132 
133 }  // namespace
134 
BuildBlobHandleStr(const PropertyProto::BlobHandleProto & blob_handle)135 /* static */ std::string BlobStore::BuildBlobHandleStr(
136     const PropertyProto::BlobHandleProto& blob_handle) {
137   return encode_util::EncodeStringToCString(blob_handle.digest() +
138                                             blob_handle.namespace_());
139 }
140 
Create(const Filesystem * filesystem,std::string base_dir,const Clock * clock,int64_t orphan_blob_time_to_live_ms,int32_t compression_level,bool manage_blob_files)141 libtextclassifier3::StatusOr<BlobStore> BlobStore::Create(
142     const Filesystem* filesystem, std::string base_dir, const Clock* clock,
143     int64_t orphan_blob_time_to_live_ms, int32_t compression_level,
144     bool manage_blob_files) {
145   ICING_RETURN_ERROR_IF_NULL(filesystem);
146   ICING_RETURN_ERROR_IF_NULL(clock);
147 
148   // Make sure the blob file directory exists.
149   if (!filesystem->CreateDirectoryRecursively(
150           MakeBlobFileDir(base_dir).c_str())) {
151     return absl_ports::InternalError(
152         absl_ports::StrCat("Could not create blob file directory."));
153   }
154 
155   // Load existing file names (excluding the directory of key mapper).
156   std::vector<std::string> file_names;
157   if (!filesystem->ListDirectory(MakeBlobFileDir(base_dir).c_str(),
158                                  &file_names)) {
159     return absl_ports::InternalError("Failed to list directory.");
160   }
161   std::unordered_set<std::string> known_file_names(
162       std::make_move_iterator(file_names.begin()),
163       std::make_move_iterator(file_names.end()));
164   if (orphan_blob_time_to_live_ms <= 0) {
165     orphan_blob_time_to_live_ms = std::numeric_limits<int64_t>::max();
166   }
167 
168   std::string blob_info_proto_file_name =
169       MakeBlobInfoProtoLogFileName(base_dir);
170 
171   ICING_ASSIGN_OR_RETURN(
172       PortableFileBackedProtoLog<BlobInfoProto>::CreateResult log_create_result,
173       PortableFileBackedProtoLog<BlobInfoProto>::Create(
174           filesystem, blob_info_proto_file_name,
175           PortableFileBackedProtoLog<BlobInfoProto>::Options(
176               /*compress_in=*/true, constants::kMaxProtoSize,
177               compression_level)));
178 
179   std::unordered_map<std::string, int> blob_handle_to_offset;
180   ICING_ASSIGN_OR_RETURN(
181       blob_handle_to_offset,
182       LoadBlobHandleToOffsetMapper(log_create_result.proto_log.get()));
183 
184   return BlobStore(filesystem, std::move(base_dir), clock,
185                    orphan_blob_time_to_live_ms, compression_level,
186                    manage_blob_files, std::move(log_create_result.proto_log),
187                    std::move(blob_handle_to_offset),
188                    std::move(known_file_names));
189 }
190 
OpenWrite(const PropertyProto::BlobHandleProto & blob_handle)191 BlobProto BlobStore::OpenWrite(
192     const PropertyProto::BlobHandleProto& blob_handle) {
193   ICING_RETURN_EXPRESSION_IF_ERROR(ValidateBlobHandle(blob_handle),
194                                    CreateBlobProtoFromError(_));
195   std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
196 
197   auto blob_info_itr = blob_handle_to_offset_.find(blob_handle_str);
198   if (blob_info_itr != blob_handle_to_offset_.end()) {
199     ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info,
200                            blob_info_log_->ReadProto(blob_info_itr->second),
201                            CreateBlobProtoFromError(_));
202     if (blob_info.is_committed()) {
203       // The blob is already committed, return error.
204       return CreateBlobProtoFromError(
205           absl_ports::AlreadyExistsError(absl_ports::StrCat(
206               "Rewriting the committed blob is not allowed for blob handle: ",
207               blob_handle.digest())));
208     }
209   }
210 
211   // Create a new blob info and blob file.
212   ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info,
213                          GetOrCreateBlobInfo(blob_handle_str, blob_handle),
214                          CreateBlobProtoFromError(_));
215 
216   if (!manage_blob_files_) {
217     return CreateBlobProtoFromFilename(
218         std::to_string(blob_info.creation_time_ms()));
219   }
220 
221   std::string file_path =
222       MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
223   int file_descriptor = filesystem_.OpenForWrite(file_path.c_str());
224   if (file_descriptor < 0) {
225     return CreateBlobProtoFromError(
226         absl_ports::InternalError(absl_ports::StrCat(
227             "Failed to open blob file for handle: ", blob_handle.digest())));
228   }
229   return CreateBlobProtoFromFileDescriptor(file_descriptor);
230 }
231 
RemoveBlob(const PropertyProto::BlobHandleProto & blob_handle)232 BlobProto BlobStore::RemoveBlob(
233     const PropertyProto::BlobHandleProto& blob_handle) {
234   ICING_RETURN_EXPRESSION_IF_ERROR(ValidateBlobHandle(blob_handle),
235                                    CreateBlobProtoFromError(_));
236   std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
237 
238   auto blob_info_itr = blob_handle_to_offset_.find(blob_handle_str);
239   if (blob_info_itr == blob_handle_to_offset_.end()) {
240     return CreateBlobProtoFromError(
241         absl_ports::NotFoundError(absl_ports::StrCat(
242             "Cannot find the blob for handle: ", blob_handle.digest())));
243   }
244 
245   int64_t blob_info_offset = blob_info_itr->second;
246   ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info,
247                          blob_info_log_->ReadProto(blob_info_offset),
248                          CreateBlobProtoFromError(_));
249 
250   ICING_RETURN_EXPRESSION_IF_ERROR(blob_info_log_->EraseProto(blob_info_offset),
251                                    CreateBlobProtoFromError(_));
252   blob_handle_to_offset_.erase(blob_info_itr);
253   has_mutated_ = true;
254 
255   if (!manage_blob_files_) {
256     return CreateBlobProtoFromFilename(
257         std::to_string(blob_info.creation_time_ms()));
258   }
259 
260   std::string file_path =
261       MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
262   if (!filesystem_.DeleteFile(file_path.c_str())) {
263     return CreateBlobProtoFromError(
264         absl_ports::InternalError(absl_ports::StrCat(
265             "Failed to abandon blob file for handle: ", blob_handle.digest())));
266   }
267 
268   BlobProto blob_proto;
269   blob_proto.mutable_status()->set_code(StatusProto::OK);
270   return blob_proto;
271 }
272 
GetBlobInfo(const PropertyProto::BlobHandleProto & blob_handle) const273 libtextclassifier3::StatusOr<BlobInfoProto> BlobStore::GetBlobInfo(
274     const PropertyProto::BlobHandleProto& blob_handle) const {
275   ICING_RETURN_IF_ERROR(ValidateBlobHandle(blob_handle));
276   std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
277   auto itr = blob_handle_to_offset_.find(blob_handle_str);
278   if (itr == blob_handle_to_offset_.end()) {
279     return absl_ports::NotFoundError(absl_ports::StrCat(
280         "Cannot find the blob for handle: ", blob_handle.digest()));
281   }
282   return blob_info_log_->ReadProto(itr->second);
283 }
284 
OpenRead(const PropertyProto::BlobHandleProto & blob_handle) const285 BlobProto BlobStore::OpenRead(
286     const PropertyProto::BlobHandleProto& blob_handle) const {
287   ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info, GetBlobInfo(blob_handle),
288                          CreateBlobProtoFromError(_));
289   if (!blob_info.is_committed()) {
290     // The blob is not committed, return error.
291     return CreateBlobProtoFromError(
292         absl_ports::NotFoundError(absl_ports::StrCat(
293             "Cannot find the blob for handle: ", blob_handle.digest())));
294   }
295 
296   if (!manage_blob_files_) {
297     return CreateBlobProtoFromFilename(
298         std::to_string(blob_info.creation_time_ms()));
299   }
300 
301   std::string file_path =
302       MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
303   int file_descriptor = filesystem_.OpenForRead(file_path.c_str());
304   if (file_descriptor < 0) {
305     return CreateBlobProtoFromError(
306         absl_ports::InternalError(absl_ports::StrCat(
307             "Failed to open blob file for handle: ", blob_handle.digest())));
308   }
309   return CreateBlobProtoFromFileDescriptor(file_descriptor);
310 }
311 
CommitBlobMetadata(const PropertyProto::BlobHandleProto & blob_handle)312 libtextclassifier3::Status BlobStore::CommitBlobMetadata(
313     const PropertyProto::BlobHandleProto& blob_handle) {
314   ICING_RETURN_IF_ERROR(ValidateBlobHandle(blob_handle));
315 
316   std::string blob_handle_str = BuildBlobHandleStr(blob_handle);
317 
318   auto pending_blob_info_itr = blob_handle_to_offset_.find(blob_handle_str);
319   if (pending_blob_info_itr == blob_handle_to_offset_.end()) {
320     return absl_ports::NotFoundError(absl_ports::StrCat(
321         "Cannot find the blob for handle: ", blob_handle.digest()));
322   }
323   int64_t pending_blob_info_offset = pending_blob_info_itr->second;
324 
325   ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info_proto,
326                          blob_info_log_->ReadProto(pending_blob_info_offset));
327 
328   // Check if the blob is already committed.
329   if (blob_info_proto.is_committed()) {
330     return absl_ports::AlreadyExistsError(absl_ports::StrCat(
331         "The blob is already committed for handle: ", blob_handle.digest()));
332   }
333 
334   // Update the blob info proto to committed.
335   ICING_RETURN_IF_ERROR(blob_info_log_->EraseProto(pending_blob_info_offset));
336   has_mutated_ = true;
337   blob_info_proto.set_is_committed(true);
338   auto blob_info_offset_or = blob_info_log_->WriteProto(blob_info_proto);
339   if (!blob_info_offset_or.ok()) {
340     ICING_LOG(ERROR) << blob_info_offset_or.status().error_message()
341                      << "Failed to write blob info";
342     return blob_info_offset_or.status();
343   }
344   blob_handle_to_offset_[blob_handle_str] = blob_info_offset_or.ValueOrDie();
345   return libtextclassifier3::Status::OK;
346 }
347 
CommitBlob(const PropertyProto::BlobHandleProto & blob_handle)348 BlobProto BlobStore::CommitBlob(
349     const PropertyProto::BlobHandleProto& blob_handle) {
350   BlobProto blob_proto;
351   blob_proto.mutable_status()->set_code(StatusProto::OK);
352 
353   if (!manage_blob_files_) {
354     ICING_RETURN_EXPRESSION_IF_ERROR(CommitBlobMetadata(blob_handle),
355                                      CreateBlobProtoFromError(_))
356     return blob_proto;
357   }
358 
359   ICING_ASSIGN_OR_RETURN(BlobInfoProto blob_info, GetBlobInfo(blob_handle),
360                          CreateBlobProtoFromError(_));
361   std::string file_path =
362       MakeBlobFilePath(base_dir_, blob_info.creation_time_ms());
363   // Read the file and verify the digest.
364   Sha256 sha256;
365   {
366     ScopedFd sfd(filesystem_.OpenForRead(file_path.c_str()));
367     if (!sfd.is_valid()) {
368       return CreateBlobProtoFromError(
369           absl_ports::InternalError(absl_ports::StrCat(
370               "Failed to open blob file for handle: ", blob_handle.digest())));
371     }
372 
373     int64_t file_size = filesystem_.GetFileSize(sfd.get());
374     if (file_size == Filesystem::kBadFileSize) {
375       return CreateBlobProtoFromError(
376           absl_ports::InternalError(absl_ports::StrCat(
377               "Failed to get file size for handle: ", blob_handle.digest())));
378     }
379 
380     // Read 8 KiB per iteration
381     int64_t prev_total_read_size = 0;
382     uint8_t buffer[kReadBufferSize];
383     while (prev_total_read_size < file_size) {
384       int32_t size_to_read =
385           std::min<int32_t>(kReadBufferSize, file_size - prev_total_read_size);
386       if (!filesystem_.Read(sfd.get(), buffer, size_to_read)) {
387         return CreateBlobProtoFromError(absl_ports::InternalError(
388             absl_ports::StrCat("Failed to read blob file for handle: ",
389                                blob_handle.digest())));
390       }
391 
392       sha256.Update(buffer, size_to_read);
393       prev_total_read_size += size_to_read;
394     }
395   }
396 
397   std::array<uint8_t, 32> hash = std::move(sha256).Finalize();
398   const std::string& digest = blob_handle.digest();
399 
400   if (digest.length() != hash.size() ||
401       digest.compare(0, digest.length(),
402                      reinterpret_cast<const char*>(hash.data()),
403                      hash.size()) != 0) {
404     // The blob content doesn't match to the digest. Delete this corrupted blob.
405     BlobProto remove_blob_result = RemoveBlob(blob_handle);
406     if (remove_blob_result.status().code() != StatusProto::OK) {
407       return remove_blob_result;
408     }
409     return CreateBlobProtoFromError(absl_ports::InvalidArgumentError(
410         "The blob content doesn't match to the digest."));
411   }
412   // Mark the blob as committed.
413   ICING_RETURN_EXPRESSION_IF_ERROR(CommitBlobMetadata(blob_handle),
414                                    CreateBlobProtoFromError(_));
415   return blob_proto;
416 }
417 
PersistToDisk()418 libtextclassifier3::Status BlobStore::PersistToDisk() {
419   if (has_mutated_) {
420     ICING_RETURN_IF_ERROR(blob_info_log_->PersistToDisk());
421     has_mutated_ = false;
422   }
423   return libtextclassifier3::Status::OK;
424 }
425 
GetOrCreateBlobInfo(const std::string & blob_handle_str,const PropertyProto::BlobHandleProto & blob_handle)426 libtextclassifier3::StatusOr<BlobInfoProto> BlobStore::GetOrCreateBlobInfo(
427     const std::string& blob_handle_str,
428     const PropertyProto::BlobHandleProto& blob_handle) {
429   auto itr = blob_handle_to_offset_.find(blob_handle_str);
430   if (itr != blob_handle_to_offset_.end()) {
431     return blob_info_log_->ReadProto(itr->second);
432   }
433 
434   // Create a new blob info, we are using creation time as the unique file
435   // name.
436   int64_t timestamp = clock_.GetSystemTimeMilliseconds();
437   std::string file_name = std::to_string(timestamp);
438   while (known_file_names_.find(file_name) != known_file_names_.end()) {
439     ++timestamp;
440     file_name = std::to_string(timestamp);
441   }
442   known_file_names_.insert(file_name);
443 
444   BlobInfoProto blob_info_proto;
445   *blob_info_proto.mutable_blob_handle() = blob_handle;
446   blob_info_proto.set_creation_time_ms(timestamp);
447   blob_info_proto.set_is_committed(false);
448 
449   auto blob_info_offset_or = blob_info_log_->WriteProto(blob_info_proto);
450   if (!blob_info_offset_or.ok()) {
451     ICING_LOG(ERROR) << blob_info_offset_or.status().error_message()
452                      << "Failed to write blob info";
453     return blob_info_offset_or.status();
454   }
455 
456   has_mutated_ = true;
457   blob_handle_to_offset_[blob_handle_str] = blob_info_offset_or.ValueOrDie();
458 
459   return blob_info_proto;
460 }
461 
462 std::unordered_set<std::string>
GetPotentiallyOptimizableBlobHandles() const463 BlobStore::GetPotentiallyOptimizableBlobHandles() const {
464   int64_t current_time_ms = clock_.GetSystemTimeMilliseconds();
465   if (orphan_blob_time_to_live_ms_ > current_time_ms) {
466     // Nothing to optimize, return empty set.
467     return std::unordered_set<std::string>();
468   }
469   int64_t expired_threshold = current_time_ms - orphan_blob_time_to_live_ms_;
470   std::unordered_set<std::string> expired_blob_handles;
471   auto itr = blob_info_log_->GetIterator();
472   while (itr.Advance().ok()) {
473     auto blob_info_proto_or = blob_info_log_->ReadProto(itr.GetOffset());
474     if (!blob_info_proto_or.ok()) {
475       continue;
476     }
477     BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
478     if (blob_info_proto.creation_time_ms() < expired_threshold) {
479       expired_blob_handles.insert(
480           BuildBlobHandleStr(blob_info_proto.blob_handle()));
481     }
482   }
483   return expired_blob_handles;
484 }
485 
Optimize(const std::unordered_set<std::string> & dead_blob_handles)486 libtextclassifier3::StatusOr<std::vector<std::string>> BlobStore::Optimize(
487     const std::unordered_set<std::string>& dead_blob_handles) {
488   std::vector<std::string> blob_file_names_to_remove;
489   blob_file_names_to_remove.reserve(dead_blob_handles.size());
490 
491   // Create the temp blob info log file.
492   std::string temp_blob_info_proto_file_name =
493       absl_ports::StrCat(MakeBlobInfoProtoLogFileName(base_dir_), "_temp");
494   if (!filesystem_.DeleteFile(temp_blob_info_proto_file_name.c_str())) {
495     return absl_ports::InternalError(
496         "Unable to delete temp file to prepare to build new blob proto file.");
497   }
498 
499   ICING_ASSIGN_OR_RETURN(PortableFileBackedProtoLog<BlobInfoProto>::CreateResult
500                              temp_log_create_result,
501                          PortableFileBackedProtoLog<BlobInfoProto>::Create(
502                              &filesystem_, temp_blob_info_proto_file_name,
503                              PortableFileBackedProtoLog<BlobInfoProto>::Options(
504                                  /*compress_in=*/true, constants::kMaxProtoSize,
505                                  compression_level_)));
506   std::unique_ptr<PortableFileBackedProtoLog<BlobInfoProto>> new_blob_info_log =
507       std::move(temp_log_create_result.proto_log);
508 
509   auto itr = blob_info_log_->GetIterator();
510   std::unordered_map<std::string, int32_t> new_blob_handle_to_offset;
511   while (itr.Advance().ok()) {
512     auto blob_info_proto_or = blob_info_log_->ReadProto(itr.GetOffset());
513     if (!blob_info_proto_or.ok()) {
514       if (absl_ports::IsNotFound(blob_info_proto_or.status())) {
515         // Skip erased proto.
516         continue;
517       }
518 
519       // Return real error.
520       return std::move(blob_info_proto_or).status();
521     }
522     BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
523     std::string blob_handle_str =
524         BuildBlobHandleStr(blob_info_proto.blob_handle());
525     if (dead_blob_handles.find(blob_handle_str) != dead_blob_handles.end()) {
526       // Delete all dead blob files.
527 
528       if (manage_blob_files_) {
529         std::string file_path =
530             MakeBlobFilePath(base_dir_, blob_info_proto.creation_time_ms());
531         if (!filesystem_.DeleteFile(file_path.c_str())) {
532           return absl_ports::InternalError(
533               absl_ports::StrCat("Failed to delete blob file: ", file_path));
534         }
535       } else {
536         blob_file_names_to_remove.push_back(
537             std::to_string(blob_info_proto.creation_time_ms()));
538       }
539     } else {
540       // Write the alive blob info to the new blob info log file.
541       ICING_ASSIGN_OR_RETURN(int32_t new_offset,
542                              new_blob_info_log->WriteProto(blob_info_proto));
543       new_blob_handle_to_offset[blob_handle_str] = new_offset;
544     }
545   }
546   new_blob_info_log->PersistToDisk();
547   new_blob_info_log.reset();
548   blob_info_log_.reset();
549   std::string old_blob_info_proto_file_name =
550       MakeBlobInfoProtoLogFileName(base_dir_);
551   // Then we swap the new key mapper directory with the old one.
552   if (!filesystem_.SwapFiles(old_blob_info_proto_file_name.c_str(),
553                              temp_blob_info_proto_file_name.c_str())) {
554     return absl_ports::InternalError(
555         "Unable to apply new blob store due to failed swap!");
556   }
557 
558   // Delete the temp file, don't need to throw error if it fails, it will be
559   // deleted in the next run.
560   filesystem_.DeleteFile(temp_blob_info_proto_file_name.c_str());
561 
562   ICING_ASSIGN_OR_RETURN(
563       PortableFileBackedProtoLog<BlobInfoProto>::CreateResult log_create_result,
564       PortableFileBackedProtoLog<BlobInfoProto>::Create(
565           &filesystem_, old_blob_info_proto_file_name,
566           PortableFileBackedProtoLog<BlobInfoProto>::Options(
567               /*compress_in=*/true, constants::kMaxProtoSize,
568               compression_level_)));
569   blob_info_log_ = std::move(log_create_result.proto_log);
570   blob_handle_to_offset_ = std::move(new_blob_handle_to_offset);
571   return blob_file_names_to_remove;
572 }
573 
574 libtextclassifier3::StatusOr<std::vector<NamespaceBlobStorageInfoProto>>
GetStorageInfo() const575 BlobStore::GetStorageInfo() const {
576   // Get the file size of each namespace offset.
577   std::unordered_map<std::string, NamespaceBlobStorageInfoProto>
578       namespace_to_storage_info;
579   auto itr = blob_info_log_->GetIterator();
580   while (itr.Advance().ok()) {
581     auto blob_info_proto_or = blob_info_log_->ReadProto(itr.GetOffset());
582     if (!blob_info_proto_or.ok()) {
583       if (absl_ports::IsNotFound(blob_info_proto_or.status())) {
584         // Skip erased proto.
585         continue;
586       }
587 
588       // Return real error.
589       return std::move(blob_info_proto_or).status();
590     }
591     BlobInfoProto blob_info_proto = std::move(blob_info_proto_or).ValueOrDie();
592 
593     std::string file_path =
594         MakeBlobFilePath(base_dir_, blob_info_proto.creation_time_ms());
595     std::string name_space = blob_info_proto.blob_handle().namespace_();
596     NamespaceBlobStorageInfoProto& namespace_blob_storage_info =
597         namespace_to_storage_info[name_space];
598     namespace_blob_storage_info.set_namespace_(name_space);
599 
600     if (manage_blob_files_) {
601       int64_t file_size = filesystem_.GetFileSize(file_path.c_str());
602       if (file_size == Filesystem::kBadFileSize) {
603         ICING_LOG(WARNING) << "Bad file size for blob file: " << file_path;
604         continue;
605       }
606       namespace_blob_storage_info.set_blob_size(
607           namespace_blob_storage_info.blob_size() + file_size);
608       namespace_blob_storage_info.set_num_blobs(
609           namespace_blob_storage_info.num_blobs() + 1);
610     } else {
611       namespace_blob_storage_info.add_blob_file_names(
612           std::to_string(blob_info_proto.creation_time_ms()));
613     }
614   }
615 
616   // Create the namespace blob storage info for each namespace.
617   std::vector<NamespaceBlobStorageInfoProto> namespace_blob_storage_infos;
618   namespace_blob_storage_infos.reserve(namespace_to_storage_info.size());
619   for (const auto& [_, namespace_blob_storage_info] :
620        namespace_to_storage_info) {
621     namespace_blob_storage_infos.push_back(
622         std::move(namespace_blob_storage_info));
623   }
624 
625   return namespace_blob_storage_infos;
626 }
627 
628 }  // namespace lib
629 }  // namespace icing
630