1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/version-util.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_set>
22 #include <utility>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/file/file-backed-proto.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/index/index.h"
31 #include "icing/proto/initialize.pb.h"
32 #include "icing/util/status-macros.h"
33
34 namespace icing {
35 namespace lib {
36
37 namespace version_util {
38
39 namespace {
40
ReadV1VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)41 libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
42 const Filesystem& filesystem, const std::string& version_file_dir,
43 const std::string& index_base_dir) {
44 // 1. Read the version info.
45 const std::string v1_version_filepath =
46 MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
47 VersionInfo existing_version_info(-1, -1);
48 if (filesystem.FileExists(v1_version_filepath.c_str()) &&
49 !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
50 sizeof(VersionInfo), /*offset=*/0)) {
51 return absl_ports::InternalError("Failed to read v1 version file");
52 }
53
54 // 2. Check the Index magic to see if we're actually on version 0.
55 libtextclassifier3::StatusOr<int> existing_flash_index_magic =
56 Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
57 if (!existing_flash_index_magic.ok()) {
58 if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
59 // Flash index magic doesn't exist. In this case, we're unable to
60 // determine the version change state correctly (regardless of the
61 // existence of the version file), so invalidate VersionInfo by setting
62 // version to -1, but still keep the max_version value read in step 1.
63 existing_version_info.version = -1;
64 return existing_version_info;
65 }
66 // Real error.
67 return std::move(existing_flash_index_magic).status();
68 }
69 if (existing_flash_index_magic.ValueOrDie() ==
70 kVersionZeroFlashIndexMagic) {
71 existing_version_info.version = 0;
72 if (existing_version_info.max_version == -1) {
73 existing_version_info.max_version = 0;
74 }
75 }
76
77 return existing_version_info;
78 }
79
ReadV2VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir)80 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
81 const Filesystem& filesystem, const std::string& version_file_dir) {
82 // Read the v2 version file. V2 version file stores the
83 // IcingSearchEngineVersionProto as a file-backed proto.
84 const std::string v2_version_filepath =
85 MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
86 FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
87 filesystem, v2_version_filepath);
88 ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
89 v2_version_file.Read());
90
91 return *v2_version_proto;
92 }
93
94 } // namespace
95
ReadVersion(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)96 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
97 const Filesystem& filesystem, const std::string& version_file_dir,
98 const std::string& index_base_dir) {
99 // 1. Read the v1 version file
100 ICING_ASSIGN_OR_RETURN(
101 VersionInfo v1_version_info,
102 ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
103 if (!v1_version_info.IsValid()) {
104 // This happens if IcingLib's state is invalid (e.g. flash index header file
105 // is missing). Return the invalid version numbers in this case.
106 IcingSearchEngineVersionProto version_proto;
107 version_proto.set_version(v1_version_info.version);
108 version_proto.set_max_version(v1_version_info.max_version);
109 return version_proto;
110 }
111
112 // 2. Read the v2 version file
113 auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
114 if (!v2_version_proto.ok()) {
115 if (!absl_ports::IsNotFound(v2_version_proto.status())) {
116 // Real error.
117 return std::move(v2_version_proto).status();
118 }
119 // The v2 version file has not been written
120 IcingSearchEngineVersionProto version_proto;
121 if (v1_version_info.version < kFirstV2Version) {
122 // There are two scenarios for this case:
123 // 1. It's the first time that we're upgrading from a lower version to a
124 // version >= kFirstV2Version.
125 // - It's expected that the v2 version file has not been written yet in
126 // this case and we return the v1 version numbers instead.
127 // 2. We're rolling forward from a version < kFirstV2Version, after
128 // rolling back from a previous version >= kFirstV2Version, and for
129 // some unknown reason we lost the v2 version file in the previous
130 // version.
131 // - e.g. version #4 -> version #1 -> version #4, but we lost the v2
132 // file during version #1.
133 // - This is a rollforward case, but it's still fine to return the v1
134 // version number here as ShouldRebuildDerivedFiles can handle
135 // rollforwards correctly.
136 version_proto.set_version(v1_version_info.version);
137 version_proto.set_max_version(v1_version_info.max_version);
138 } else {
139 // Something weird has happened. During last initialization we were
140 // already on a version >= kFirstV2Version, so the v2 version file
141 // should have been written.
142 // Return an invalid version number in this case and trigger rebuilding
143 // everything.
144 version_proto.set_version(-1);
145 version_proto.set_max_version(v1_version_info.max_version);
146 }
147 return version_proto;
148 }
149
150 // 3. Check if versions match. If not, it means that we're rolling forward
151 // from a version < kFirstV2Version. In order to trigger rebuilding
152 // everything, we return an invalid version number in this case.
153 IcingSearchEngineVersionProto v2_version_proto_value =
154 std::move(v2_version_proto).ValueOrDie();
155 if (v1_version_info.version != v2_version_proto_value.version()) {
156 v2_version_proto_value.set_version(-1);
157 v2_version_proto_value.mutable_enabled_features()->Clear();
158 }
159
160 return v2_version_proto_value;
161 }
162
WriteV1Version(const Filesystem & filesystem,const std::string & version_file_dir,const VersionInfo & version_info)163 libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
164 const std::string& version_file_dir,
165 const VersionInfo& version_info) {
166 ScopedFd scoped_fd(filesystem.OpenForWrite(
167 MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
168 if (!scoped_fd.is_valid() ||
169 !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
170 sizeof(VersionInfo)) ||
171 !filesystem.DataSync(scoped_fd.get())) {
172 return absl_ports::InternalError("Failed to write v1 version file");
173 }
174 return libtextclassifier3::Status::OK;
175 }
176
WriteV2Version(const Filesystem & filesystem,const std::string & version_file_dir,std::unique_ptr<IcingSearchEngineVersionProto> version_proto)177 libtextclassifier3::Status WriteV2Version(
178 const Filesystem& filesystem, const std::string& version_file_dir,
179 std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
180 FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
181 filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
182 libtextclassifier3::Status v2_write_status =
183 v2_version_file.Write(std::move(version_proto));
184 if (!v2_write_status.ok()) {
185 return absl_ports::InternalError(absl_ports::StrCat(
186 "Failed to write v2 version file: ", v2_write_status.error_message()));
187 }
188 return libtextclassifier3::Status::OK;
189 }
190
DiscardVersionFiles(const Filesystem & filesystem,std::string_view version_file_dir)191 libtextclassifier3::Status DiscardVersionFiles(
192 const Filesystem& filesystem, std::string_view version_file_dir) {
193 if (!filesystem.DeleteFile(
194 MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
195 !filesystem.DeleteFile(
196 MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
197 return absl_ports::InternalError("Failed to discard version files");
198 }
199 return libtextclassifier3::Status::OK;
200 }
201
GetVersionStateChange(const VersionInfo & existing_version_info,int32_t curr_version)202 StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
203 int32_t curr_version) {
204 if (!existing_version_info.IsValid()) {
205 return StateChange::kUndetermined;
206 }
207
208 if (existing_version_info.version == 0) {
209 return (existing_version_info.max_version == existing_version_info.version)
210 ? StateChange::kVersionZeroUpgrade
211 : StateChange::kVersionZeroRollForward;
212 }
213
214 if (existing_version_info.version == curr_version) {
215 return StateChange::kCompatible;
216 } else if (existing_version_info.version > curr_version) {
217 return StateChange::kRollBack;
218 } else { // existing_version_info.version < curr_version
219 return (existing_version_info.max_version == existing_version_info.version)
220 ? StateChange::kUpgrade
221 : StateChange::kRollForward;
222 }
223 }
224
CalculateRequiredDerivedFilesRebuild(const IcingSearchEngineVersionProto & prev_version_proto,const IcingSearchEngineVersionProto & curr_version_proto)225 DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild(
226 const IcingSearchEngineVersionProto& prev_version_proto,
227 const IcingSearchEngineVersionProto& curr_version_proto) {
228 // 1. Do version check using version and max_version numbers
229 if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
230 curr_version_proto.version())) {
231 return DerivedFilesRebuildResult(
232 /*needs_document_store_derived_files_rebuild=*/true,
233 /*needs_schema_store_derived_files_rebuild=*/true,
234 /*needs_term_index_rebuild=*/true,
235 /*needs_integer_index_rebuild=*/true,
236 /*needs_qualified_id_join_index_rebuild=*/true);
237 }
238
239 // 2. Compare the previous enabled features with the current enabled features
240 // and rebuild if there are differences.
241 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
242 prev_features;
243 for (const auto& feature : prev_version_proto.enabled_features()) {
244 prev_features.insert(feature.feature_type());
245 }
246 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
247 curr_features;
248 for (const auto& feature : curr_version_proto.enabled_features()) {
249 curr_features.insert(feature.feature_type());
250 }
251 DerivedFilesRebuildResult result;
252 for (const auto& prev_feature : prev_features) {
253 // If there is an UNKNOWN feature in the previous feature set (note that we
254 // never use UNKNOWN when writing the version proto), it means that:
255 // - The previous version proto contains a feature enum that is only defined
256 // in a newer version.
257 // - We've now rolled back to an old version that doesn't understand this
258 // new enum value, and proto serialization defaults it to 0 (UNKNOWN).
259 // - In this case we need to rebuild everything.
260 if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
261 return DerivedFilesRebuildResult(
262 /*needs_document_store_derived_files_rebuild=*/true,
263 /*needs_schema_store_derived_files_rebuild=*/true,
264 /*needs_term_index_rebuild=*/true,
265 /*needs_integer_index_rebuild=*/true,
266 /*needs_qualified_id_join_index_rebuild=*/true);
267 }
268 if (curr_features.find(prev_feature) == curr_features.end()) {
269 DerivedFilesRebuildResult required_rebuilds =
270 GetFeatureDerivedFilesRebuildResult(prev_feature);
271 result.CombineWithOtherRebuildResultOr(required_rebuilds);
272 }
273 }
274 for (const auto& curr_feature : curr_features) {
275 if (prev_features.find(curr_feature) == prev_features.end()) {
276 DerivedFilesRebuildResult required_rebuilds =
277 GetFeatureDerivedFilesRebuildResult(curr_feature);
278 result.CombineWithOtherRebuildResultOr(required_rebuilds);
279 }
280 }
281 return result;
282 }
283
ShouldRebuildDerivedFiles(const VersionInfo & existing_version_info,int32_t curr_version)284 bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
285 int32_t curr_version) {
286 StateChange state_change =
287 GetVersionStateChange(existing_version_info, curr_version);
288 switch (state_change) {
289 case StateChange::kCompatible:
290 return false;
291 case StateChange::kUndetermined:
292 [[fallthrough]];
293 case StateChange::kRollBack:
294 [[fallthrough]];
295 case StateChange::kRollForward:
296 [[fallthrough]];
297 case StateChange::kVersionZeroRollForward:
298 [[fallthrough]];
299 case StateChange::kVersionZeroUpgrade:
300 return true;
301 case StateChange::kUpgrade:
302 break;
303 }
304
305 bool should_rebuild = false;
306 int32_t existing_version = existing_version_info.version;
307 while (existing_version < curr_version) {
308 switch (existing_version) {
309 case 1: {
310 // version 1 -> version 2 upgrade, no need to rebuild
311 break;
312 }
313 case 2: {
314 // version 2 -> version 3 upgrade, no need to rebuild
315 break;
316 }
317 case 3: {
318 // version 3 -> version 4 upgrade, no need to rebuild
319 break;
320 }
321 default:
322 // This should not happen. Rebuild anyway if unsure.
323 should_rebuild |= true;
324 }
325 ++existing_version;
326 }
327 return should_rebuild;
328 }
329
GetFeatureDerivedFilesRebuildResult(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)330 DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult(
331 IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
332 switch (feature) {
333 case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
334 return DerivedFilesRebuildResult(
335 /*needs_document_store_derived_files_rebuild=*/false,
336 /*needs_schema_store_derived_files_rebuild=*/false,
337 /*needs_term_index_rebuild=*/true,
338 /*needs_integer_index_rebuild=*/false,
339 /*needs_qualified_id_join_index_rebuild=*/false);
340 }
341 case IcingSearchEngineFeatureInfoProto::UNKNOWN:
342 return DerivedFilesRebuildResult(
343 /*needs_document_store_derived_files_rebuild=*/true,
344 /*needs_schema_store_derived_files_rebuild=*/true,
345 /*needs_term_index_rebuild=*/true,
346 /*needs_integer_index_rebuild=*/true,
347 /*needs_qualified_id_join_index_rebuild=*/true);
348 }
349 }
350
GetFeatureInfoProto(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)351 IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
352 IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
353 IcingSearchEngineFeatureInfoProto info;
354 info.set_feature_type(feature);
355
356 DerivedFilesRebuildResult result =
357 GetFeatureDerivedFilesRebuildResult(feature);
358 info.set_needs_document_store_rebuild(
359 result.needs_document_store_derived_files_rebuild);
360 info.set_needs_schema_store_rebuild(
361 result.needs_schema_store_derived_files_rebuild);
362 info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
363 info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
364 info.set_needs_qualified_id_join_index_rebuild(
365 result.needs_qualified_id_join_index_rebuild);
366
367 return info;
368 }
369
AddEnabledFeatures(const IcingSearchEngineOptions & options,IcingSearchEngineVersionProto * version_proto)370 void AddEnabledFeatures(const IcingSearchEngineOptions& options,
371 IcingSearchEngineVersionProto* version_proto) {
372 auto* enabled_features = version_proto->mutable_enabled_features();
373 // HasPropertyOperator feature
374 if (options.build_property_existence_metadata_hits()) {
375 enabled_features->Add(GetFeatureInfoProto(
376 IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
377 }
378 }
379
380 } // namespace version_util
381
382 } // namespace lib
383 } // namespace icing
384