1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/file/version-util.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_set>
22 #include <utility>
23
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/file/derived-file-util.h"
29 #include "icing/file/file-backed-proto.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/index/index.h"
32 #include "icing/proto/initialize.pb.h"
33 #include "icing/util/status-macros.h"
34
35 namespace icing {
36 namespace lib {
37
38 namespace version_util {
39
40 namespace {
41
ReadV1VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)42 libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
43 const Filesystem& filesystem, const std::string& version_file_dir,
44 const std::string& index_base_dir) {
45 // 1. Read the version info.
46 const std::string v1_version_filepath =
47 MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
48 VersionInfo existing_version_info(-1, -1);
49 if (filesystem.FileExists(v1_version_filepath.c_str()) &&
50 !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
51 sizeof(VersionInfo), /*offset=*/0)) {
52 return absl_ports::InternalError("Failed to read v1 version file");
53 }
54
55 // 2. Check the Index magic to see if we're actually on version 0.
56 libtextclassifier3::StatusOr<int> existing_flash_index_magic =
57 Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
58 if (!existing_flash_index_magic.ok()) {
59 if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
60 // Flash index magic doesn't exist. In this case, we're unable to
61 // determine the version change state correctly (regardless of the
62 // existence of the version file), so invalidate VersionInfo by setting
63 // version to -1, but still keep the max_version value read in step 1.
64 existing_version_info.version = -1;
65 return existing_version_info;
66 }
67 // Real error.
68 return std::move(existing_flash_index_magic).status();
69 }
70 if (existing_flash_index_magic.ValueOrDie() == kVersionZeroFlashIndexMagic) {
71 existing_version_info.version = 0;
72 if (existing_version_info.max_version == -1) {
73 existing_version_info.max_version = 0;
74 }
75 }
76
77 return existing_version_info;
78 }
79
ReadV2VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir)80 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
81 const Filesystem& filesystem, const std::string& version_file_dir) {
82 // Read the v2 version file. V2 version file stores the
83 // IcingSearchEngineVersionProto as a file-backed proto.
84 const std::string v2_version_filepath =
85 MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
86 FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
87 filesystem, v2_version_filepath);
88 ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
89 v2_version_file.Read());
90
91 return *v2_version_proto;
92 }
93
94 } // namespace
95
ReadVersion(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)96 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
97 const Filesystem& filesystem, const std::string& version_file_dir,
98 const std::string& index_base_dir) {
99 // 1. Read the v1 version file
100 ICING_ASSIGN_OR_RETURN(
101 VersionInfo v1_version_info,
102 ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
103 if (!v1_version_info.IsValid()) {
104 // This happens if IcingLib's state is invalid (e.g. flash index header file
105 // is missing). Return the invalid version numbers in this case.
106 IcingSearchEngineVersionProto version_proto;
107 version_proto.set_version(v1_version_info.version);
108 version_proto.set_max_version(v1_version_info.max_version);
109 return version_proto;
110 }
111
112 // 2. Read the v2 version file
113 auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
114 if (!v2_version_proto.ok()) {
115 if (!absl_ports::IsNotFound(v2_version_proto.status())) {
116 // Real error.
117 return std::move(v2_version_proto).status();
118 }
119 // The v2 version file has not been written
120 IcingSearchEngineVersionProto version_proto;
121 if (v1_version_info.version < kFirstV2Version) {
122 // There are two scenarios for this case:
123 // 1. It's the first time that we're upgrading from a lower version to a
124 // version >= kFirstV2Version.
125 // - It's expected that the v2 version file has not been written yet in
126 // this case and we return the v1 version numbers instead.
127 // 2. We're rolling forward from a version < kFirstV2Version, after
128 // rolling back from a previous version >= kFirstV2Version, and for
129 // some unknown reason we lost the v2 version file in the previous
130 // version.
131 // - e.g. version #4 -> version #1 -> version #4, but we lost the v2
132 // file during version #1.
133 // - This is a rollforward case, but it's still fine to return the v1
134 // version number here as ShouldRebuildDerivedFiles can handle
135 // rollforwards correctly.
136 version_proto.set_version(v1_version_info.version);
137 version_proto.set_max_version(v1_version_info.max_version);
138 } else {
139 // Something weird has happened. During last initialization we were
140 // already on a version >= kFirstV2Version, so the v2 version file
141 // should have been written.
142 // Return an invalid version number in this case and trigger rebuilding
143 // everything.
144 version_proto.set_version(-1);
145 version_proto.set_max_version(v1_version_info.max_version);
146 }
147 return version_proto;
148 }
149
150 // 3. Check if versions match. If not, it means that we're rolling forward
151 // from a version < kFirstV2Version. In order to trigger rebuilding
152 // everything, we return an invalid version number in this case.
153 IcingSearchEngineVersionProto v2_version_proto_value =
154 std::move(v2_version_proto).ValueOrDie();
155 if (v1_version_info.version != v2_version_proto_value.version()) {
156 v2_version_proto_value.set_version(-1);
157 v2_version_proto_value.mutable_enabled_features()->Clear();
158 }
159
160 return v2_version_proto_value;
161 }
162
WriteV1Version(const Filesystem & filesystem,const std::string & version_file_dir,const VersionInfo & version_info)163 libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
164 const std::string& version_file_dir,
165 const VersionInfo& version_info) {
166 ScopedFd scoped_fd(filesystem.OpenForWrite(
167 MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
168 if (!scoped_fd.is_valid() ||
169 !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
170 sizeof(VersionInfo)) ||
171 !filesystem.DataSync(scoped_fd.get())) {
172 return absl_ports::InternalError("Failed to write v1 version file");
173 }
174 return libtextclassifier3::Status::OK;
175 }
176
WriteV2Version(const Filesystem & filesystem,const std::string & version_file_dir,std::unique_ptr<IcingSearchEngineVersionProto> version_proto)177 libtextclassifier3::Status WriteV2Version(
178 const Filesystem& filesystem, const std::string& version_file_dir,
179 std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
180 FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
181 filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
182 libtextclassifier3::Status v2_write_status =
183 v2_version_file.Write(std::move(version_proto));
184 if (!v2_write_status.ok()) {
185 return absl_ports::InternalError(absl_ports::StrCat(
186 "Failed to write v2 version file: ", v2_write_status.error_message()));
187 }
188 return libtextclassifier3::Status::OK;
189 }
190
DiscardVersionFiles(const Filesystem & filesystem,std::string_view version_file_dir)191 libtextclassifier3::Status DiscardVersionFiles(
192 const Filesystem& filesystem, std::string_view version_file_dir) {
193 if (!filesystem.DeleteFile(
194 MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
195 !filesystem.DeleteFile(
196 MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
197 return absl_ports::InternalError("Failed to discard version files");
198 }
199 return libtextclassifier3::Status::OK;
200 }
201
GetVersionStateChange(const VersionInfo & existing_version_info,int32_t curr_version)202 StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
203 int32_t curr_version) {
204 if (!existing_version_info.IsValid()) {
205 return StateChange::kUndetermined;
206 }
207
208 if (existing_version_info.version == 0) {
209 return (existing_version_info.max_version == existing_version_info.version)
210 ? StateChange::kVersionZeroUpgrade
211 : StateChange::kVersionZeroRollForward;
212 }
213
214 if (existing_version_info.version == curr_version) {
215 return StateChange::kCompatible;
216 } else if (existing_version_info.version > curr_version) {
217 return StateChange::kRollBack;
218 } else { // existing_version_info.version < curr_version
219 return (existing_version_info.max_version == existing_version_info.version)
220 ? StateChange::kUpgrade
221 : StateChange::kRollForward;
222 }
223 }
224
CalculateRequiredDerivedFilesRebuild(const IcingSearchEngineVersionProto & prev_version_proto,const IcingSearchEngineVersionProto & curr_version_proto)225 derived_file_util::DerivedFilesRebuildInfo CalculateRequiredDerivedFilesRebuild(
226 const IcingSearchEngineVersionProto& prev_version_proto,
227 const IcingSearchEngineVersionProto& curr_version_proto) {
228 // 1. Do version check using version and max_version numbers
229 if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
230 curr_version_proto.version())) {
231 return derived_file_util::DerivedFilesRebuildInfo(
232 /*needs_document_store_derived_files_rebuild=*/true,
233 /*needs_schema_store_derived_files_rebuild=*/true,
234 /*needs_term_index_rebuild=*/true,
235 /*needs_integer_index_rebuild=*/true,
236 /*needs_qualified_id_join_index_rebuild=*/true,
237 /*needs_embedding_index_rebuild=*/true);
238 }
239
240 // 2. Compare the previous enabled features with the current enabled features
241 // and rebuild if there are differences.
242 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
243 prev_features;
244 for (const auto& feature : prev_version_proto.enabled_features()) {
245 prev_features.insert(feature.feature_type());
246 }
247 std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
248 curr_features;
249 for (const auto& feature : curr_version_proto.enabled_features()) {
250 curr_features.insert(feature.feature_type());
251 }
252 derived_file_util::DerivedFilesRebuildInfo result;
253 for (const auto& prev_feature : prev_features) {
254 // If there is an UNKNOWN feature in the previous feature set (note that we
255 // never use UNKNOWN when writing the version proto), it means that:
256 // - The previous version proto contains a feature enum that is only defined
257 // in a newer version.
258 // - We've now rolled back to an old version that doesn't understand this
259 // new enum value, and proto serialization defaults it to 0 (UNKNOWN).
260 // - In this case we need to rebuild everything.
261 if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
262 return derived_file_util::DerivedFilesRebuildInfo(
263 /*needs_document_store_derived_files_rebuild=*/true,
264 /*needs_schema_store_derived_files_rebuild=*/true,
265 /*needs_term_index_rebuild=*/true,
266 /*needs_integer_index_rebuild=*/true,
267 /*needs_qualified_id_join_index_rebuild=*/true,
268 /*needs_embedding_index_rebuild=*/true);
269 }
270 if (curr_features.find(prev_feature) == curr_features.end()) {
271 derived_file_util::DerivedFilesRebuildInfo required_rebuilds =
272 GetFeatureDerivedFilesRebuildInfo(prev_feature);
273 result |= required_rebuilds;
274 }
275 }
276 for (const auto& curr_feature : curr_features) {
277 if (prev_features.find(curr_feature) == prev_features.end()) {
278 derived_file_util::DerivedFilesRebuildInfo required_rebuilds =
279 GetFeatureDerivedFilesRebuildInfo(curr_feature);
280 result |= required_rebuilds;
281 }
282 }
283 return result;
284 }
285
ShouldRebuildDerivedFiles(const VersionInfo & existing_version_info,int32_t curr_version)286 bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
287 int32_t curr_version) {
288 StateChange state_change =
289 GetVersionStateChange(existing_version_info, curr_version);
290 switch (state_change) {
291 case StateChange::kCompatible:
292 return false;
293 case StateChange::kUndetermined:
294 [[fallthrough]];
295 case StateChange::kRollBack:
296 [[fallthrough]];
297 case StateChange::kRollForward:
298 [[fallthrough]];
299 case StateChange::kVersionZeroRollForward:
300 [[fallthrough]];
301 case StateChange::kVersionZeroUpgrade:
302 return true;
303 case StateChange::kUpgrade:
304 break;
305 }
306
307 bool should_rebuild = false;
308 int32_t existing_version = existing_version_info.version;
309 while (existing_version < curr_version) {
310 // LINT.IfChange(should_rebuild_derived_files_upgrade_check)
311 switch (existing_version) {
312 case 1: {
313 // version 1 -> version 2 upgrade, no need to rebuild
314 break;
315 }
316 case 2: {
317 // version 2 -> version 3 upgrade, no need to rebuild
318 break;
319 }
320 case 3: {
321 // version 3 -> version 4 upgrade, no need to rebuild
322 break;
323 }
324 case 4: {
325 // version 4 -> version 5 upgrade, no need to rebuild
326 break;
327 }
328 case 5: {
329 // version 5 -> version 6 upgrade, no need to rebuild
330 break;
331 }
332 case 6: {
333 // version 6 -> version 7 upgrade, no need to rebuild
334 break;
335 }
336 default:
337 // This should not happen. Rebuild anyway if unsure.
338 should_rebuild |= true;
339 }
340 // LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
341 ++existing_version;
342 }
343 return should_rebuild;
344 }
345
GetFeatureDerivedFilesRebuildInfo(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)346 derived_file_util::DerivedFilesRebuildInfo GetFeatureDerivedFilesRebuildInfo(
347 IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
348 switch (feature) {
349 case IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES: {
350 return derived_file_util::DerivedFilesRebuildInfo(
351 /*needs_document_store_derived_files_rebuild=*/true,
352 /*needs_schema_store_derived_files_rebuild=*/false,
353 /*needs_term_index_rebuild=*/false,
354 /*needs_integer_index_rebuild=*/false,
355 /*needs_qualified_id_join_index_rebuild=*/false,
356 /*needs_embedding_index_rebuild=*/false);
357 }
358 case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
359 return derived_file_util::DerivedFilesRebuildInfo(
360 /*needs_document_store_derived_files_rebuild=*/false,
361 /*needs_schema_store_derived_files_rebuild=*/false,
362 /*needs_term_index_rebuild=*/true,
363 /*needs_integer_index_rebuild=*/false,
364 /*needs_qualified_id_join_index_rebuild=*/false,
365 /*needs_embedding_index_rebuild=*/false);
366 }
367 case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX: {
368 return derived_file_util::DerivedFilesRebuildInfo(
369 /*needs_document_store_derived_files_rebuild=*/false,
370 /*needs_schema_store_derived_files_rebuild=*/false,
371 /*needs_term_index_rebuild=*/false,
372 /*needs_integer_index_rebuild=*/false,
373 /*needs_qualified_id_join_index_rebuild=*/false,
374 /*needs_embedding_index_rebuild=*/true);
375 }
376 case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION: {
377 return derived_file_util::DerivedFilesRebuildInfo(
378 /*needs_document_store_derived_files_rebuild=*/false,
379 /*needs_schema_store_derived_files_rebuild=*/false,
380 /*needs_term_index_rebuild=*/false,
381 /*needs_integer_index_rebuild=*/false,
382 /*needs_qualified_id_join_index_rebuild=*/false,
383 /*needs_embedding_index_rebuild=*/true);
384 }
385 case IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE: {
386 // The schema database feature requires schema-store migration, which is
387 // done separately from derived files rebuild.
388 return derived_file_util::DerivedFilesRebuildInfo(
389 /*needs_document_store_derived_files_rebuild=*/false,
390 /*needs_schema_store_derived_files_rebuild=*/false,
391 /*needs_term_index_rebuild=*/false,
392 /*needs_integer_index_rebuild=*/false,
393 /*needs_qualified_id_join_index_rebuild=*/false,
394 /*needs_embedding_index_rebuild=*/false);
395 }
396 case IcingSearchEngineFeatureInfoProto::
397 FEATURE_QUALIFIED_ID_JOIN_INDEX_V3: {
398 return derived_file_util::DerivedFilesRebuildInfo(
399 /*needs_document_store_derived_files_rebuild=*/false,
400 /*needs_schema_store_derived_files_rebuild=*/false,
401 /*needs_term_index_rebuild=*/false,
402 /*needs_integer_index_rebuild=*/false,
403 /*needs_qualified_id_join_index_rebuild=*/true,
404 /*needs_embedding_index_rebuild=*/false);
405 }
406 case IcingSearchEngineFeatureInfoProto::UNKNOWN:
407 return derived_file_util::DerivedFilesRebuildInfo(
408 /*needs_document_store_derived_files_rebuild=*/true,
409 /*needs_schema_store_derived_files_rebuild=*/true,
410 /*needs_term_index_rebuild=*/true,
411 /*needs_integer_index_rebuild=*/true,
412 /*needs_qualified_id_join_index_rebuild=*/true,
413 /*needs_embedding_index_rebuild=*/true);
414 }
415 }
416
SchemaDatabaseMigrationRequired(const IcingSearchEngineVersionProto & prev_version_proto)417 bool SchemaDatabaseMigrationRequired(
418 const IcingSearchEngineVersionProto& prev_version_proto) {
419 if (prev_version_proto.version() < kSchemaDatabaseVersion) {
420 return true;
421 }
422 for (const auto& feature : prev_version_proto.enabled_features()) {
423 // The schema database feature was enabled in the previous version, so no
424 // need to migrate.
425 if (feature.feature_type() ==
426 IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE) {
427 return false;
428 }
429 }
430 return true;
431 }
432
GetFeatureInfoProto(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)433 IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
434 IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
435 IcingSearchEngineFeatureInfoProto info;
436 info.set_feature_type(feature);
437
438 derived_file_util::DerivedFilesRebuildInfo result =
439 GetFeatureDerivedFilesRebuildInfo(feature);
440 info.set_needs_document_store_rebuild(
441 result.needs_document_store_derived_files_rebuild);
442 info.set_needs_schema_store_rebuild(
443 result.needs_schema_store_derived_files_rebuild);
444 info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
445 info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
446 info.set_needs_qualified_id_join_index_rebuild(
447 result.needs_qualified_id_join_index_rebuild);
448 info.set_needs_embedding_index_rebuild(result.needs_embedding_index_rebuild);
449
450 return info;
451 }
452
AddEnabledFeatures(const IcingSearchEngineOptions & options,IcingSearchEngineVersionProto * version_proto)453 void AddEnabledFeatures(const IcingSearchEngineOptions& options,
454 IcingSearchEngineVersionProto* version_proto) {
455 auto* enabled_features = version_proto->mutable_enabled_features();
456 // HasPropertyOperator feature
457 if (options.build_property_existence_metadata_hits()) {
458 enabled_features->Add(GetFeatureInfoProto(
459 IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
460 }
461 // EmbeddingIndex feature
462 if (options.enable_embedding_index()) {
463 enabled_features->Add(GetFeatureInfoProto(
464 IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX));
465 }
466 if (options.enable_scorable_properties()) {
467 enabled_features->Add(GetFeatureInfoProto(
468 IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES));
469 }
470 // EmbeddingQuantization feature
471 if (options.enable_embedding_quantization()) {
472 enabled_features->Add(GetFeatureInfoProto(
473 IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION));
474 }
475 // SchemaDatabase feature
476 if (options.enable_schema_database()) {
477 enabled_features->Add(GetFeatureInfoProto(
478 IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE));
479 }
480 // QualifiedIdJoinIndex V3 feature
481 if (options.enable_qualified_id_join_index_v3()) {
482 enabled_features->Add(GetFeatureInfoProto(
483 IcingSearchEngineFeatureInfoProto::FEATURE_QUALIFIED_ID_JOIN_INDEX_V3));
484 }
485 }
486
487 } // namespace version_util
488
489 } // namespace lib
490 } // namespace icing
491