• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/version-util.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_set>
22 #include <utility>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/file/derived-file-util.h"
29 #include "icing/file/file-backed-proto.h"
30 #include "icing/file/filesystem.h"
31 #include "icing/index/index.h"
32 #include "icing/proto/initialize.pb.h"
33 #include "icing/util/status-macros.h"
34 
35 namespace icing {
36 namespace lib {
37 
38 namespace version_util {
39 
40 namespace {
41 
ReadV1VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)42 libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
43     const Filesystem& filesystem, const std::string& version_file_dir,
44     const std::string& index_base_dir) {
45   // 1. Read the version info.
46   const std::string v1_version_filepath =
47       MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
48   VersionInfo existing_version_info(-1, -1);
49   if (filesystem.FileExists(v1_version_filepath.c_str()) &&
50       !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
51                         sizeof(VersionInfo), /*offset=*/0)) {
52     return absl_ports::InternalError("Failed to read v1 version file");
53   }
54 
55   // 2. Check the Index magic to see if we're actually on version 0.
56   libtextclassifier3::StatusOr<int> existing_flash_index_magic =
57       Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
58   if (!existing_flash_index_magic.ok()) {
59     if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
60       // Flash index magic doesn't exist. In this case, we're unable to
61       // determine the version change state correctly (regardless of the
62       // existence of the version file), so invalidate VersionInfo by setting
63       // version to -1, but still keep the max_version value read in step 1.
64       existing_version_info.version = -1;
65       return existing_version_info;
66     }
67     // Real error.
68     return std::move(existing_flash_index_magic).status();
69   }
70   if (existing_flash_index_magic.ValueOrDie() == kVersionZeroFlashIndexMagic) {
71     existing_version_info.version = 0;
72     if (existing_version_info.max_version == -1) {
73       existing_version_info.max_version = 0;
74     }
75   }
76 
77   return existing_version_info;
78 }
79 
ReadV2VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir)80 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
81     const Filesystem& filesystem, const std::string& version_file_dir) {
82   // Read the v2 version file. V2 version file stores the
83   // IcingSearchEngineVersionProto as a file-backed proto.
84   const std::string v2_version_filepath =
85       MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
86   FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
87       filesystem, v2_version_filepath);
88   ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
89                          v2_version_file.Read());
90 
91   return *v2_version_proto;
92 }
93 
94 }  // namespace
95 
ReadVersion(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)96 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
97     const Filesystem& filesystem, const std::string& version_file_dir,
98     const std::string& index_base_dir) {
99   // 1. Read the v1 version file
100   ICING_ASSIGN_OR_RETURN(
101       VersionInfo v1_version_info,
102       ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
103   if (!v1_version_info.IsValid()) {
104     // This happens if IcingLib's state is invalid (e.g. flash index header file
105     // is missing). Return the invalid version numbers in this case.
106     IcingSearchEngineVersionProto version_proto;
107     version_proto.set_version(v1_version_info.version);
108     version_proto.set_max_version(v1_version_info.max_version);
109     return version_proto;
110   }
111 
112   // 2. Read the v2 version file
113   auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
114   if (!v2_version_proto.ok()) {
115     if (!absl_ports::IsNotFound(v2_version_proto.status())) {
116       // Real error.
117       return std::move(v2_version_proto).status();
118     }
119     // The v2 version file has not been written
120     IcingSearchEngineVersionProto version_proto;
121     if (v1_version_info.version < kFirstV2Version) {
122       // There are two scenarios for this case:
123       // 1. It's the first time that we're upgrading from a lower version to a
124       //    version >= kFirstV2Version.
125       //    - It's expected that the v2 version file has not been written yet in
126       //      this case and we return the v1 version numbers instead.
127       // 2. We're rolling forward from a version < kFirstV2Version, after
128       //    rolling back from a previous version >= kFirstV2Version, and for
129       //    some unknown reason we lost the v2 version file in the previous
130       //    version.
131       //    - e.g. version #4 -> version #1 -> version #4, but we lost the v2
132       //      file during version #1.
133       //    - This is a rollforward case, but it's still fine to return the v1
134       //      version number here as ShouldRebuildDerivedFiles can handle
135       //      rollforwards correctly.
136       version_proto.set_version(v1_version_info.version);
137       version_proto.set_max_version(v1_version_info.max_version);
138     } else {
139       // Something weird has happened. During last initialization we were
140       // already on a version >= kFirstV2Version, so the v2 version file
141       // should have been written.
142       // Return an invalid version number in this case and trigger rebuilding
143       // everything.
144       version_proto.set_version(-1);
145       version_proto.set_max_version(v1_version_info.max_version);
146     }
147     return version_proto;
148   }
149 
150   // 3. Check if versions match. If not, it means that we're rolling forward
151   // from a version < kFirstV2Version. In order to trigger rebuilding
152   // everything, we return an invalid version number in this case.
153   IcingSearchEngineVersionProto v2_version_proto_value =
154       std::move(v2_version_proto).ValueOrDie();
155   if (v1_version_info.version != v2_version_proto_value.version()) {
156     v2_version_proto_value.set_version(-1);
157     v2_version_proto_value.mutable_enabled_features()->Clear();
158   }
159 
160   return v2_version_proto_value;
161 }
162 
WriteV1Version(const Filesystem & filesystem,const std::string & version_file_dir,const VersionInfo & version_info)163 libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
164                                           const std::string& version_file_dir,
165                                           const VersionInfo& version_info) {
166   ScopedFd scoped_fd(filesystem.OpenForWrite(
167       MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
168   if (!scoped_fd.is_valid() ||
169       !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
170                          sizeof(VersionInfo)) ||
171       !filesystem.DataSync(scoped_fd.get())) {
172     return absl_ports::InternalError("Failed to write v1 version file");
173   }
174   return libtextclassifier3::Status::OK;
175 }
176 
WriteV2Version(const Filesystem & filesystem,const std::string & version_file_dir,std::unique_ptr<IcingSearchEngineVersionProto> version_proto)177 libtextclassifier3::Status WriteV2Version(
178     const Filesystem& filesystem, const std::string& version_file_dir,
179     std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
180   FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
181       filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
182   libtextclassifier3::Status v2_write_status =
183       v2_version_file.Write(std::move(version_proto));
184   if (!v2_write_status.ok()) {
185     return absl_ports::InternalError(absl_ports::StrCat(
186         "Failed to write v2 version file: ", v2_write_status.error_message()));
187   }
188   return libtextclassifier3::Status::OK;
189 }
190 
DiscardVersionFiles(const Filesystem & filesystem,std::string_view version_file_dir)191 libtextclassifier3::Status DiscardVersionFiles(
192     const Filesystem& filesystem, std::string_view version_file_dir) {
193   if (!filesystem.DeleteFile(
194           MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
195       !filesystem.DeleteFile(
196           MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
197     return absl_ports::InternalError("Failed to discard version files");
198   }
199   return libtextclassifier3::Status::OK;
200 }
201 
GetVersionStateChange(const VersionInfo & existing_version_info,int32_t curr_version)202 StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
203                                   int32_t curr_version) {
204   if (!existing_version_info.IsValid()) {
205     return StateChange::kUndetermined;
206   }
207 
208   if (existing_version_info.version == 0) {
209     return (existing_version_info.max_version == existing_version_info.version)
210                ? StateChange::kVersionZeroUpgrade
211                : StateChange::kVersionZeroRollForward;
212   }
213 
214   if (existing_version_info.version == curr_version) {
215     return StateChange::kCompatible;
216   } else if (existing_version_info.version > curr_version) {
217     return StateChange::kRollBack;
218   } else {  // existing_version_info.version < curr_version
219     return (existing_version_info.max_version == existing_version_info.version)
220                ? StateChange::kUpgrade
221                : StateChange::kRollForward;
222   }
223 }
224 
CalculateRequiredDerivedFilesRebuild(const IcingSearchEngineVersionProto & prev_version_proto,const IcingSearchEngineVersionProto & curr_version_proto)225 derived_file_util::DerivedFilesRebuildInfo CalculateRequiredDerivedFilesRebuild(
226     const IcingSearchEngineVersionProto& prev_version_proto,
227     const IcingSearchEngineVersionProto& curr_version_proto) {
228   // 1. Do version check using version and max_version numbers
229   if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
230                                 curr_version_proto.version())) {
231     return derived_file_util::DerivedFilesRebuildInfo(
232         /*needs_document_store_derived_files_rebuild=*/true,
233         /*needs_schema_store_derived_files_rebuild=*/true,
234         /*needs_term_index_rebuild=*/true,
235         /*needs_integer_index_rebuild=*/true,
236         /*needs_qualified_id_join_index_rebuild=*/true,
237         /*needs_embedding_index_rebuild=*/true);
238   }
239 
240   // 2. Compare the previous enabled features with the current enabled features
241   // and rebuild if there are differences.
242   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
243       prev_features;
244   for (const auto& feature : prev_version_proto.enabled_features()) {
245     prev_features.insert(feature.feature_type());
246   }
247   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
248       curr_features;
249   for (const auto& feature : curr_version_proto.enabled_features()) {
250     curr_features.insert(feature.feature_type());
251   }
252   derived_file_util::DerivedFilesRebuildInfo result;
253   for (const auto& prev_feature : prev_features) {
254     // If there is an UNKNOWN feature in the previous feature set (note that we
255     // never use UNKNOWN  when writing the version proto), it means that:
256     // - The previous version proto contains a feature enum that is only defined
257     //   in a newer version.
258     // - We've now rolled back to an old version that doesn't understand this
259     //   new enum value, and proto serialization defaults it to 0 (UNKNOWN).
260     // - In this case we need to rebuild everything.
261     if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
262       return derived_file_util::DerivedFilesRebuildInfo(
263           /*needs_document_store_derived_files_rebuild=*/true,
264           /*needs_schema_store_derived_files_rebuild=*/true,
265           /*needs_term_index_rebuild=*/true,
266           /*needs_integer_index_rebuild=*/true,
267           /*needs_qualified_id_join_index_rebuild=*/true,
268           /*needs_embedding_index_rebuild=*/true);
269     }
270     if (curr_features.find(prev_feature) == curr_features.end()) {
271       derived_file_util::DerivedFilesRebuildInfo required_rebuilds =
272           GetFeatureDerivedFilesRebuildInfo(prev_feature);
273       result |= required_rebuilds;
274     }
275   }
276   for (const auto& curr_feature : curr_features) {
277     if (prev_features.find(curr_feature) == prev_features.end()) {
278       derived_file_util::DerivedFilesRebuildInfo required_rebuilds =
279           GetFeatureDerivedFilesRebuildInfo(curr_feature);
280       result |= required_rebuilds;
281     }
282   }
283   return result;
284 }
285 
ShouldRebuildDerivedFiles(const VersionInfo & existing_version_info,int32_t curr_version)286 bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
287                                int32_t curr_version) {
288   StateChange state_change =
289       GetVersionStateChange(existing_version_info, curr_version);
290   switch (state_change) {
291     case StateChange::kCompatible:
292       return false;
293     case StateChange::kUndetermined:
294       [[fallthrough]];
295     case StateChange::kRollBack:
296       [[fallthrough]];
297     case StateChange::kRollForward:
298       [[fallthrough]];
299     case StateChange::kVersionZeroRollForward:
300       [[fallthrough]];
301     case StateChange::kVersionZeroUpgrade:
302       return true;
303     case StateChange::kUpgrade:
304       break;
305   }
306 
307   bool should_rebuild = false;
308   int32_t existing_version = existing_version_info.version;
309   while (existing_version < curr_version) {
310     // LINT.IfChange(should_rebuild_derived_files_upgrade_check)
311     switch (existing_version) {
312       case 1: {
313         // version 1 -> version 2 upgrade, no need to rebuild
314         break;
315       }
316       case 2: {
317         // version 2 -> version 3 upgrade, no need to rebuild
318         break;
319       }
320       case 3: {
321         // version 3 -> version 4 upgrade, no need to rebuild
322         break;
323       }
324       case 4: {
325         // version 4 -> version 5 upgrade, no need to rebuild
326         break;
327       }
328       case 5: {
329         // version 5 -> version 6 upgrade, no need to rebuild
330         break;
331       }
332       case 6: {
333         // version 6 -> version 7 upgrade, no need to rebuild
334         break;
335       }
336       default:
337         // This should not happen. Rebuild anyway if unsure.
338         should_rebuild |= true;
339     }
340     // LINT.ThenChange(//depot/google3/icing/file/version-util.h:kVersion)
341     ++existing_version;
342   }
343   return should_rebuild;
344 }
345 
GetFeatureDerivedFilesRebuildInfo(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)346 derived_file_util::DerivedFilesRebuildInfo GetFeatureDerivedFilesRebuildInfo(
347     IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
348   switch (feature) {
349     case IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES: {
350       return derived_file_util::DerivedFilesRebuildInfo(
351           /*needs_document_store_derived_files_rebuild=*/true,
352           /*needs_schema_store_derived_files_rebuild=*/false,
353           /*needs_term_index_rebuild=*/false,
354           /*needs_integer_index_rebuild=*/false,
355           /*needs_qualified_id_join_index_rebuild=*/false,
356           /*needs_embedding_index_rebuild=*/false);
357     }
358     case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
359       return derived_file_util::DerivedFilesRebuildInfo(
360           /*needs_document_store_derived_files_rebuild=*/false,
361           /*needs_schema_store_derived_files_rebuild=*/false,
362           /*needs_term_index_rebuild=*/true,
363           /*needs_integer_index_rebuild=*/false,
364           /*needs_qualified_id_join_index_rebuild=*/false,
365           /*needs_embedding_index_rebuild=*/false);
366     }
367     case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX: {
368       return derived_file_util::DerivedFilesRebuildInfo(
369           /*needs_document_store_derived_files_rebuild=*/false,
370           /*needs_schema_store_derived_files_rebuild=*/false,
371           /*needs_term_index_rebuild=*/false,
372           /*needs_integer_index_rebuild=*/false,
373           /*needs_qualified_id_join_index_rebuild=*/false,
374           /*needs_embedding_index_rebuild=*/true);
375     }
376     case IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION: {
377       return derived_file_util::DerivedFilesRebuildInfo(
378           /*needs_document_store_derived_files_rebuild=*/false,
379           /*needs_schema_store_derived_files_rebuild=*/false,
380           /*needs_term_index_rebuild=*/false,
381           /*needs_integer_index_rebuild=*/false,
382           /*needs_qualified_id_join_index_rebuild=*/false,
383           /*needs_embedding_index_rebuild=*/true);
384     }
385     case IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE: {
386       // The schema database feature requires schema-store migration, which is
387       // done separately from derived files rebuild.
388       return derived_file_util::DerivedFilesRebuildInfo(
389           /*needs_document_store_derived_files_rebuild=*/false,
390           /*needs_schema_store_derived_files_rebuild=*/false,
391           /*needs_term_index_rebuild=*/false,
392           /*needs_integer_index_rebuild=*/false,
393           /*needs_qualified_id_join_index_rebuild=*/false,
394           /*needs_embedding_index_rebuild=*/false);
395     }
396     case IcingSearchEngineFeatureInfoProto::
397         FEATURE_QUALIFIED_ID_JOIN_INDEX_V3: {
398       return derived_file_util::DerivedFilesRebuildInfo(
399           /*needs_document_store_derived_files_rebuild=*/false,
400           /*needs_schema_store_derived_files_rebuild=*/false,
401           /*needs_term_index_rebuild=*/false,
402           /*needs_integer_index_rebuild=*/false,
403           /*needs_qualified_id_join_index_rebuild=*/true,
404           /*needs_embedding_index_rebuild=*/false);
405     }
406     case IcingSearchEngineFeatureInfoProto::UNKNOWN:
407       return derived_file_util::DerivedFilesRebuildInfo(
408           /*needs_document_store_derived_files_rebuild=*/true,
409           /*needs_schema_store_derived_files_rebuild=*/true,
410           /*needs_term_index_rebuild=*/true,
411           /*needs_integer_index_rebuild=*/true,
412           /*needs_qualified_id_join_index_rebuild=*/true,
413           /*needs_embedding_index_rebuild=*/true);
414   }
415 }
416 
SchemaDatabaseMigrationRequired(const IcingSearchEngineVersionProto & prev_version_proto)417 bool SchemaDatabaseMigrationRequired(
418     const IcingSearchEngineVersionProto& prev_version_proto) {
419   if (prev_version_proto.version() < kSchemaDatabaseVersion) {
420     return true;
421   }
422   for (const auto& feature : prev_version_proto.enabled_features()) {
423     // The schema database feature was enabled in the previous version, so no
424     // need to migrate.
425     if (feature.feature_type() ==
426         IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE) {
427       return false;
428     }
429   }
430   return true;
431 }
432 
GetFeatureInfoProto(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)433 IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
434     IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
435   IcingSearchEngineFeatureInfoProto info;
436   info.set_feature_type(feature);
437 
438   derived_file_util::DerivedFilesRebuildInfo result =
439       GetFeatureDerivedFilesRebuildInfo(feature);
440   info.set_needs_document_store_rebuild(
441       result.needs_document_store_derived_files_rebuild);
442   info.set_needs_schema_store_rebuild(
443       result.needs_schema_store_derived_files_rebuild);
444   info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
445   info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
446   info.set_needs_qualified_id_join_index_rebuild(
447       result.needs_qualified_id_join_index_rebuild);
448   info.set_needs_embedding_index_rebuild(result.needs_embedding_index_rebuild);
449 
450   return info;
451 }
452 
AddEnabledFeatures(const IcingSearchEngineOptions & options,IcingSearchEngineVersionProto * version_proto)453 void AddEnabledFeatures(const IcingSearchEngineOptions& options,
454                         IcingSearchEngineVersionProto* version_proto) {
455   auto* enabled_features = version_proto->mutable_enabled_features();
456   // HasPropertyOperator feature
457   if (options.build_property_existence_metadata_hits()) {
458     enabled_features->Add(GetFeatureInfoProto(
459         IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
460   }
461   // EmbeddingIndex feature
462   if (options.enable_embedding_index()) {
463     enabled_features->Add(GetFeatureInfoProto(
464         IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_INDEX));
465   }
466   if (options.enable_scorable_properties()) {
467     enabled_features->Add(GetFeatureInfoProto(
468         IcingSearchEngineFeatureInfoProto::FEATURE_SCORABLE_PROPERTIES));
469   }
470   // EmbeddingQuantization feature
471   if (options.enable_embedding_quantization()) {
472     enabled_features->Add(GetFeatureInfoProto(
473         IcingSearchEngineFeatureInfoProto::FEATURE_EMBEDDING_QUANTIZATION));
474   }
475   // SchemaDatabase feature
476   if (options.enable_schema_database()) {
477     enabled_features->Add(GetFeatureInfoProto(
478         IcingSearchEngineFeatureInfoProto::FEATURE_SCHEMA_DATABASE));
479   }
480   // QualifiedIdJoinIndex V3 feature
481   if (options.enable_qualified_id_join_index_v3()) {
482     enabled_features->Add(GetFeatureInfoProto(
483         IcingSearchEngineFeatureInfoProto::FEATURE_QUALIFIED_ID_JOIN_INDEX_V3));
484   }
485 }
486 
487 }  // namespace version_util
488 
489 }  // namespace lib
490 }  // namespace icing
491