• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2021 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/store/document-log-creator.h"
16 
17 #include <memory>
18 #include <string>
19 #include <utility>
20 
21 #include "icing/text_classifier/lib3/utils/base/logging.h"
22 #include "icing/text_classifier/lib3/utils/base/status.h"
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/annotate.h"
25 #include "icing/absl_ports/canonical_errors.h"
26 #include "icing/absl_ports/str_cat.h"
27 #include "icing/file/file-backed-proto-log.h"
28 #include "icing/file/filesystem.h"
29 #include "icing/file/portable-file-backed-proto-log.h"
30 #include "icing/proto/document_wrapper.pb.h"
31 #include "icing/util/logging.h"
32 #include "icing/util/status-macros.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 namespace {
38 
39 // Used in DocumentId mapper to mark a document as deleted
40 constexpr char kDocumentLogFilename[] = "document_log";
41 
DocumentLogFilenameV0()42 std::string DocumentLogFilenameV0() {
43   // Originally only had this one version, no suffix.
44   return kDocumentLogFilename;
45 }
46 
DocumentLogFilenameV1()47 std::string DocumentLogFilenameV1() {
48   return absl_ports::StrCat(kDocumentLogFilename, "_v1");
49 }
50 
MakeDocumentLogFilenameV0(const std::string & base_dir)51 std::string MakeDocumentLogFilenameV0(const std::string& base_dir) {
52   return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV0());
53 }
54 
MakeDocumentLogFilenameV1(const std::string & base_dir)55 std::string MakeDocumentLogFilenameV1(const std::string& base_dir) {
56   return absl_ports::StrCat(base_dir, "/", DocumentLogFilenameV1());
57 }
58 
59 }  // namespace
60 
GetDocumentLogFilename()61 std::string DocumentLogCreator::GetDocumentLogFilename() {
62   // This should always return the latest version of the document log in use.
63   // The current latest version is V1.
64   return DocumentLogFilenameV1();
65 }
66 
67 libtextclassifier3::StatusOr<DocumentLogCreator::CreateResult>
Create(const Filesystem * filesystem,const std::string & base_dir)68 DocumentLogCreator::Create(const Filesystem* filesystem,
69                            const std::string& base_dir) {
70   bool v0_exists =
71       filesystem->FileExists(MakeDocumentLogFilenameV0(base_dir).c_str());
72   bool regen_derived_files = false;
73 
74 #ifdef ENABLE_V1_MIGRATION
75   bool v1_exists =
76       filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
77 
78   if (v0_exists && !v1_exists) {
79     ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
80 
81     // Need to regenerate derived files since documents may be written to a
82     // different file offset in the log.
83     regen_derived_files = true;
84   } else if (!v1_exists) {
85     // First time initializing a v1 log. There are no existing derived files at
86     // this point, so we should generate some. "regenerate" here also means
87     // "generate for the first time", i.e. we shouldn't expect there to be any
88     // existing derived files.
89     regen_derived_files = true;
90   }
91 #else  // !ENABLE_V1_MIGRATION
92   if (v0_exists) {
93     // If migration from v0 to v1 is not enabled, then simply delete the v0 file
94     // and treat this as if it's our first time initializing a v1 log.
95     regen_derived_files = true;
96     filesystem->DeleteFile(MakeDocumentLogFilenameV0(base_dir).c_str());
97   }
98 #endif  // ENABLED_V1_MIGRATION
99 
100   ICING_ASSIGN_OR_RETURN(
101       PortableFileBackedProtoLog<DocumentWrapper>::CreateResult
102           log_create_result,
103       PortableFileBackedProtoLog<DocumentWrapper>::Create(
104           filesystem, MakeDocumentLogFilenameV1(base_dir),
105           PortableFileBackedProtoLog<DocumentWrapper>::Options(
106               /*compress_in=*/true)));
107 
108   CreateResult create_result = {std::move(log_create_result),
109                                 regen_derived_files};
110   return create_result;
111 }
112 
MigrateFromV0ToV1(const Filesystem * filesystem,const std::string & base_dir)113 libtextclassifier3::Status DocumentLogCreator::MigrateFromV0ToV1(
114     const Filesystem* filesystem, const std::string& base_dir) {
115   ICING_VLOG(1) << "Migrating from v0 to v1 document log.";
116 
117   // Our v0 proto log was non-portable, create it so we can read protos out from
118   // it.
119   auto v0_create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
120       filesystem, MakeDocumentLogFilenameV0(base_dir),
121       FileBackedProtoLog<DocumentWrapper>::Options(
122           /*compress_in=*/true));
123   if (!v0_create_result_or.ok()) {
124     return absl_ports::Annotate(
125         v0_create_result_or.status(),
126         "Failed to initialize v0 document log while migrating.");
127     return v0_create_result_or.status();
128   }
129   FileBackedProtoLog<DocumentWrapper>::CreateResult v0_create_result =
130       std::move(v0_create_result_or).ValueOrDie();
131   std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> v0_proto_log =
132       std::move(v0_create_result.proto_log);
133 
134   // Create a v1 portable proto log that we will write our protos to.
135   auto v1_create_result_or =
136       PortableFileBackedProtoLog<DocumentWrapper>::Create(
137           filesystem, MakeDocumentLogFilenameV1(base_dir),
138           PortableFileBackedProtoLog<DocumentWrapper>::Options(
139               /*compress_in=*/true));
140   if (!v1_create_result_or.ok()) {
141     return absl_ports::Annotate(
142         v1_create_result_or.status(),
143         "Failed to initialize v1 document log while migrating.");
144   }
145   PortableFileBackedProtoLog<DocumentWrapper>::CreateResult v1_create_result =
146       std::move(v1_create_result_or).ValueOrDie();
147   std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> v1_proto_log =
148       std::move(v1_create_result.proto_log);
149 
150   // Dummy empty document to be used when copying over deleted documents.
151   DocumentProto empty_document;
152 
153   // Start reading out from the old log and putting them in the new log.
154   auto iterator = v0_proto_log->GetIterator();
155   auto iterator_status = iterator.Advance();
156   while (iterator_status.ok()) {
157     libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
158         v0_proto_log->ReadProto(iterator.GetOffset());
159 
160     bool deleted_document = false;
161     DocumentWrapper document_wrapper;
162     if (absl_ports::IsNotFound(document_wrapper_or.status())) {
163       // Proto was erased, we can skip copying this into our new log.
164       *document_wrapper.mutable_document() = empty_document;
165       deleted_document = true;
166     } else if (!document_wrapper_or.ok()) {
167       // Some real error, pass up
168       return document_wrapper_or.status();
169     } else {
170       document_wrapper = std::move(document_wrapper_or).ValueOrDie();
171     }
172 
173     auto offset_or = v1_proto_log->WriteProto(document_wrapper);
174     if (!offset_or.ok()) {
175       return absl_ports::Annotate(
176           offset_or.status(),
177           "Failed to write proto to v1 document log while migrating.");
178     }
179 
180     // If the original document was deleted, erase the proto we just wrote.
181     // We do this to maintain the document_ids, i.e. we still want document_id 2
182     // to point to a deleted document even though we may not have the document
183     // contents anymore. DocumentStore guarantees that the document_ids don't
184     // change unless an Optimize is triggered.
185     if (deleted_document) {
186       int64_t offset = offset_or.ValueOrDie();
187       auto erased_status = v1_proto_log->EraseProto(offset);
188       if (!erased_status.ok()) {
189         return absl_ports::Annotate(
190             erased_status,
191             "Failed to erase proto in v1 document log while migrating.");
192       }
193     }
194 
195     iterator_status = iterator.Advance();
196   }
197 
198   // Close out our file log pointers.
199   v0_proto_log.reset();
200   v1_proto_log.reset();
201 
202   return libtextclassifier3::Status::OK;
203 }
204 
205 }  // namespace lib
206 }  // namespace icing
207