• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/version-util.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_set>
22 #include <utility>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/canonical_errors.h"
27 #include "icing/absl_ports/str_cat.h"
28 #include "icing/file/file-backed-proto.h"
29 #include "icing/file/filesystem.h"
30 #include "icing/index/index.h"
31 #include "icing/proto/initialize.pb.h"
32 #include "icing/util/status-macros.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 namespace version_util {
38 
39 namespace {
40 
ReadV1VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)41 libtextclassifier3::StatusOr<VersionInfo> ReadV1VersionInfo(
42     const Filesystem& filesystem, const std::string& version_file_dir,
43     const std::string& index_base_dir) {
44   // 1. Read the version info.
45   const std::string v1_version_filepath =
46       MakeVersionFilePath(version_file_dir, kVersionFilenameV1);
47   VersionInfo existing_version_info(-1, -1);
48   if (filesystem.FileExists(v1_version_filepath.c_str()) &&
49       !filesystem.PRead(v1_version_filepath.c_str(), &existing_version_info,
50                         sizeof(VersionInfo), /*offset=*/0)) {
51     return absl_ports::InternalError("Failed to read v1 version file");
52   }
53 
54   // 2. Check the Index magic to see if we're actually on version 0.
55   libtextclassifier3::StatusOr<int> existing_flash_index_magic =
56       Index::ReadFlashIndexMagic(&filesystem, index_base_dir);
57   if (!existing_flash_index_magic.ok()) {
58     if (absl_ports::IsNotFound(existing_flash_index_magic.status())) {
59       // Flash index magic doesn't exist. In this case, we're unable to
60       // determine the version change state correctly (regardless of the
61       // existence of the version file), so invalidate VersionInfo by setting
62       // version to -1, but still keep the max_version value read in step 1.
63       existing_version_info.version = -1;
64       return existing_version_info;
65     }
66     // Real error.
67     return std::move(existing_flash_index_magic).status();
68   }
69   if (existing_flash_index_magic.ValueOrDie() ==
70       kVersionZeroFlashIndexMagic) {
71     existing_version_info.version = 0;
72     if (existing_version_info.max_version == -1) {
73       existing_version_info.max_version = 0;
74     }
75   }
76 
77   return existing_version_info;
78 }
79 
ReadV2VersionInfo(const Filesystem & filesystem,const std::string & version_file_dir)80 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadV2VersionInfo(
81     const Filesystem& filesystem, const std::string& version_file_dir) {
82   // Read the v2 version file. V2 version file stores the
83   // IcingSearchEngineVersionProto as a file-backed proto.
84   const std::string v2_version_filepath =
85       MakeVersionFilePath(version_file_dir, kVersionFilenameV2);
86   FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
87       filesystem, v2_version_filepath);
88   ICING_ASSIGN_OR_RETURN(const IcingSearchEngineVersionProto* v2_version_proto,
89                          v2_version_file.Read());
90 
91   return *v2_version_proto;
92 }
93 
94 }  // namespace
95 
ReadVersion(const Filesystem & filesystem,const std::string & version_file_dir,const std::string & index_base_dir)96 libtextclassifier3::StatusOr<IcingSearchEngineVersionProto> ReadVersion(
97     const Filesystem& filesystem, const std::string& version_file_dir,
98     const std::string& index_base_dir) {
99   // 1. Read the v1 version file
100   ICING_ASSIGN_OR_RETURN(
101       VersionInfo v1_version_info,
102       ReadV1VersionInfo(filesystem, version_file_dir, index_base_dir));
103   if (!v1_version_info.IsValid()) {
104     // This happens if IcingLib's state is invalid (e.g. flash index header file
105     // is missing). Return the invalid version numbers in this case.
106     IcingSearchEngineVersionProto version_proto;
107     version_proto.set_version(v1_version_info.version);
108     version_proto.set_max_version(v1_version_info.max_version);
109     return version_proto;
110   }
111 
112   // 2. Read the v2 version file
113   auto v2_version_proto = ReadV2VersionInfo(filesystem, version_file_dir);
114   if (!v2_version_proto.ok()) {
115     if (!absl_ports::IsNotFound(v2_version_proto.status())) {
116       // Real error.
117       return std::move(v2_version_proto).status();
118     }
119     // The v2 version file has not been written
120     IcingSearchEngineVersionProto version_proto;
121     if (v1_version_info.version < kFirstV2Version) {
122       // There are two scenarios for this case:
123       // 1. It's the first time that we're upgrading from a lower version to a
124       //    version >= kFirstV2Version.
125       //    - It's expected that the v2 version file has not been written yet in
126       //      this case and we return the v1 version numbers instead.
127       // 2. We're rolling forward from a version < kFirstV2Version, after
128       //    rolling back from a previous version >= kFirstV2Version, and for
129       //    some unknown reason we lost the v2 version file in the previous
130       //    version.
131       //    - e.g. version #4 -> version #1 -> version #4, but we lost the v2
132       //      file during version #1.
133       //    - This is a rollforward case, but it's still fine to return the v1
134       //      version number here as ShouldRebuildDerivedFiles can handle
135       //      rollforwards correctly.
136       version_proto.set_version(v1_version_info.version);
137       version_proto.set_max_version(v1_version_info.max_version);
138     } else {
139       // Something weird has happened. During last initialization we were
140       // already on a version >= kFirstV2Version, so the v2 version file
141       // should have been written.
142       // Return an invalid version number in this case and trigger rebuilding
143       // everything.
144       version_proto.set_version(-1);
145       version_proto.set_max_version(v1_version_info.max_version);
146     }
147     return version_proto;
148   }
149 
150   // 3. Check if versions match. If not, it means that we're rolling forward
151   // from a version < kFirstV2Version. In order to trigger rebuilding
152   // everything, we return an invalid version number in this case.
153   IcingSearchEngineVersionProto v2_version_proto_value =
154       std::move(v2_version_proto).ValueOrDie();
155   if (v1_version_info.version != v2_version_proto_value.version()) {
156     v2_version_proto_value.set_version(-1);
157     v2_version_proto_value.mutable_enabled_features()->Clear();
158   }
159 
160   return v2_version_proto_value;
161 }
162 
WriteV1Version(const Filesystem & filesystem,const std::string & version_file_dir,const VersionInfo & version_info)163 libtextclassifier3::Status WriteV1Version(const Filesystem& filesystem,
164                                           const std::string& version_file_dir,
165                                           const VersionInfo& version_info) {
166   ScopedFd scoped_fd(filesystem.OpenForWrite(
167       MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()));
168   if (!scoped_fd.is_valid() ||
169       !filesystem.PWrite(scoped_fd.get(), /*offset=*/0, &version_info,
170                          sizeof(VersionInfo)) ||
171       !filesystem.DataSync(scoped_fd.get())) {
172     return absl_ports::InternalError("Failed to write v1 version file");
173   }
174   return libtextclassifier3::Status::OK;
175 }
176 
WriteV2Version(const Filesystem & filesystem,const std::string & version_file_dir,std::unique_ptr<IcingSearchEngineVersionProto> version_proto)177 libtextclassifier3::Status WriteV2Version(
178     const Filesystem& filesystem, const std::string& version_file_dir,
179     std::unique_ptr<IcingSearchEngineVersionProto> version_proto) {
180   FileBackedProto<IcingSearchEngineVersionProto> v2_version_file(
181       filesystem, MakeVersionFilePath(version_file_dir, kVersionFilenameV2));
182   libtextclassifier3::Status v2_write_status =
183       v2_version_file.Write(std::move(version_proto));
184   if (!v2_write_status.ok()) {
185     return absl_ports::InternalError(absl_ports::StrCat(
186         "Failed to write v2 version file: ", v2_write_status.error_message()));
187   }
188   return libtextclassifier3::Status::OK;
189 }
190 
DiscardVersionFiles(const Filesystem & filesystem,std::string_view version_file_dir)191 libtextclassifier3::Status DiscardVersionFiles(
192     const Filesystem& filesystem, std::string_view version_file_dir) {
193   if (!filesystem.DeleteFile(
194           MakeVersionFilePath(version_file_dir, kVersionFilenameV1).c_str()) ||
195       !filesystem.DeleteFile(
196           MakeVersionFilePath(version_file_dir, kVersionFilenameV2).c_str())) {
197     return absl_ports::InternalError("Failed to discard version files");
198   }
199   return libtextclassifier3::Status::OK;
200 }
201 
GetVersionStateChange(const VersionInfo & existing_version_info,int32_t curr_version)202 StateChange GetVersionStateChange(const VersionInfo& existing_version_info,
203                                   int32_t curr_version) {
204   if (!existing_version_info.IsValid()) {
205     return StateChange::kUndetermined;
206   }
207 
208   if (existing_version_info.version == 0) {
209     return (existing_version_info.max_version == existing_version_info.version)
210                ? StateChange::kVersionZeroUpgrade
211                : StateChange::kVersionZeroRollForward;
212   }
213 
214   if (existing_version_info.version == curr_version) {
215     return StateChange::kCompatible;
216   } else if (existing_version_info.version > curr_version) {
217     return StateChange::kRollBack;
218   } else {  // existing_version_info.version < curr_version
219     return (existing_version_info.max_version == existing_version_info.version)
220                ? StateChange::kUpgrade
221                : StateChange::kRollForward;
222   }
223 }
224 
CalculateRequiredDerivedFilesRebuild(const IcingSearchEngineVersionProto & prev_version_proto,const IcingSearchEngineVersionProto & curr_version_proto)225 DerivedFilesRebuildResult CalculateRequiredDerivedFilesRebuild(
226     const IcingSearchEngineVersionProto& prev_version_proto,
227     const IcingSearchEngineVersionProto& curr_version_proto) {
228   // 1. Do version check using version and max_version numbers
229   if (ShouldRebuildDerivedFiles(GetVersionInfoFromProto(prev_version_proto),
230                                 curr_version_proto.version())) {
231     return DerivedFilesRebuildResult(
232         /*needs_document_store_derived_files_rebuild=*/true,
233         /*needs_schema_store_derived_files_rebuild=*/true,
234         /*needs_term_index_rebuild=*/true,
235         /*needs_integer_index_rebuild=*/true,
236         /*needs_qualified_id_join_index_rebuild=*/true);
237   }
238 
239   // 2. Compare the previous enabled features with the current enabled features
240   // and rebuild if there are differences.
241   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
242       prev_features;
243   for (const auto& feature : prev_version_proto.enabled_features()) {
244     prev_features.insert(feature.feature_type());
245   }
246   std::unordered_set<IcingSearchEngineFeatureInfoProto::FlaggedFeatureType>
247       curr_features;
248   for (const auto& feature : curr_version_proto.enabled_features()) {
249     curr_features.insert(feature.feature_type());
250   }
251   DerivedFilesRebuildResult result;
252   for (const auto& prev_feature : prev_features) {
253     // If there is an UNKNOWN feature in the previous feature set (note that we
254     // never use UNKNOWN  when writing the version proto), it means that:
255     // - The previous version proto contains a feature enum that is only defined
256     //   in a newer version.
257     // - We've now rolled back to an old version that doesn't understand this
258     //   new enum value, and proto serialization defaults it to 0 (UNKNOWN).
259     // - In this case we need to rebuild everything.
260     if (prev_feature == IcingSearchEngineFeatureInfoProto::UNKNOWN) {
261       return DerivedFilesRebuildResult(
262           /*needs_document_store_derived_files_rebuild=*/true,
263           /*needs_schema_store_derived_files_rebuild=*/true,
264           /*needs_term_index_rebuild=*/true,
265           /*needs_integer_index_rebuild=*/true,
266           /*needs_qualified_id_join_index_rebuild=*/true);
267     }
268     if (curr_features.find(prev_feature) == curr_features.end()) {
269       DerivedFilesRebuildResult required_rebuilds =
270           GetFeatureDerivedFilesRebuildResult(prev_feature);
271       result.CombineWithOtherRebuildResultOr(required_rebuilds);
272     }
273   }
274   for (const auto& curr_feature : curr_features) {
275     if (prev_features.find(curr_feature) == prev_features.end()) {
276       DerivedFilesRebuildResult required_rebuilds =
277           GetFeatureDerivedFilesRebuildResult(curr_feature);
278       result.CombineWithOtherRebuildResultOr(required_rebuilds);
279     }
280   }
281   return result;
282 }
283 
ShouldRebuildDerivedFiles(const VersionInfo & existing_version_info,int32_t curr_version)284 bool ShouldRebuildDerivedFiles(const VersionInfo& existing_version_info,
285                                int32_t curr_version) {
286   StateChange state_change =
287       GetVersionStateChange(existing_version_info, curr_version);
288   switch (state_change) {
289     case StateChange::kCompatible:
290       return false;
291     case StateChange::kUndetermined:
292       [[fallthrough]];
293     case StateChange::kRollBack:
294       [[fallthrough]];
295     case StateChange::kRollForward:
296       [[fallthrough]];
297     case StateChange::kVersionZeroRollForward:
298       [[fallthrough]];
299     case StateChange::kVersionZeroUpgrade:
300       return true;
301     case StateChange::kUpgrade:
302       break;
303   }
304 
305   bool should_rebuild = false;
306   int32_t existing_version = existing_version_info.version;
307   while (existing_version < curr_version) {
308     switch (existing_version) {
309       case 1: {
310         // version 1 -> version 2 upgrade, no need to rebuild
311         break;
312       }
313       case 2: {
314         // version 2 -> version 3 upgrade, no need to rebuild
315         break;
316       }
317       case 3: {
318         // version 3 -> version 4 upgrade, no need to rebuild
319         break;
320       }
321       default:
322         // This should not happen. Rebuild anyway if unsure.
323         should_rebuild |= true;
324     }
325     ++existing_version;
326   }
327   return should_rebuild;
328 }
329 
GetFeatureDerivedFilesRebuildResult(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)330 DerivedFilesRebuildResult GetFeatureDerivedFilesRebuildResult(
331     IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
332   switch (feature) {
333     case IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR: {
334       return DerivedFilesRebuildResult(
335           /*needs_document_store_derived_files_rebuild=*/false,
336           /*needs_schema_store_derived_files_rebuild=*/false,
337           /*needs_term_index_rebuild=*/true,
338           /*needs_integer_index_rebuild=*/false,
339           /*needs_qualified_id_join_index_rebuild=*/false);
340     }
341     case IcingSearchEngineFeatureInfoProto::UNKNOWN:
342       return DerivedFilesRebuildResult(
343           /*needs_document_store_derived_files_rebuild=*/true,
344           /*needs_schema_store_derived_files_rebuild=*/true,
345           /*needs_term_index_rebuild=*/true,
346           /*needs_integer_index_rebuild=*/true,
347           /*needs_qualified_id_join_index_rebuild=*/true);
348   }
349 }
350 
GetFeatureInfoProto(IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature)351 IcingSearchEngineFeatureInfoProto GetFeatureInfoProto(
352     IcingSearchEngineFeatureInfoProto::FlaggedFeatureType feature) {
353   IcingSearchEngineFeatureInfoProto info;
354   info.set_feature_type(feature);
355 
356   DerivedFilesRebuildResult result =
357       GetFeatureDerivedFilesRebuildResult(feature);
358   info.set_needs_document_store_rebuild(
359       result.needs_document_store_derived_files_rebuild);
360   info.set_needs_schema_store_rebuild(
361       result.needs_schema_store_derived_files_rebuild);
362   info.set_needs_term_index_rebuild(result.needs_term_index_rebuild);
363   info.set_needs_integer_index_rebuild(result.needs_integer_index_rebuild);
364   info.set_needs_qualified_id_join_index_rebuild(
365       result.needs_qualified_id_join_index_rebuild);
366 
367   return info;
368 }
369 
AddEnabledFeatures(const IcingSearchEngineOptions & options,IcingSearchEngineVersionProto * version_proto)370 void AddEnabledFeatures(const IcingSearchEngineOptions& options,
371                         IcingSearchEngineVersionProto* version_proto) {
372   auto* enabled_features = version_proto->mutable_enabled_features();
373   // HasPropertyOperator feature
374   if (options.build_property_existence_metadata_hits()) {
375     enabled_features->Add(GetFeatureInfoProto(
376         IcingSearchEngineFeatureInfoProto::FEATURE_HAS_PROPERTY_OPERATOR));
377   }
378 }
379 
380 }  // namespace version_util
381 
382 }  // namespace lib
383 }  // namespace icing
384