• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/icing-search-engine.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_map>
22 #include <unordered_set>
23 #include <utility>
24 #include <vector>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/absl_ports/annotate.h"
29 #include "icing/absl_ports/canonical_errors.h"
30 #include "icing/absl_ports/mutex.h"
31 #include "icing/absl_ports/str_cat.h"
32 #include "icing/file/destructible-file.h"
33 #include "icing/file/file-backed-proto.h"
34 #include "icing/file/filesystem.h"
35 #include "icing/file/version-util.h"
36 #include "icing/index/data-indexing-handler.h"
37 #include "icing/index/hit/doc-hit-info.h"
38 #include "icing/index/index-processor.h"
39 #include "icing/index/index.h"
40 #include "icing/index/integer-section-indexing-handler.h"
41 #include "icing/index/iterator/doc-hit-info-iterator.h"
42 #include "icing/index/numeric/integer-index.h"
43 #include "icing/index/string-section-indexing-handler.h"
44 #include "icing/join/join-processor.h"
45 #include "icing/join/qualified-id-join-indexing-handler.h"
46 #include "icing/join/qualified-id-type-joinable-index.h"
47 #include "icing/legacy/index/icing-filesystem.h"
48 #include "icing/portable/endian.h"
49 #include "icing/proto/debug.pb.h"
50 #include "icing/proto/document.pb.h"
51 #include "icing/proto/initialize.pb.h"
52 #include "icing/proto/internal/optimize.pb.h"
53 #include "icing/proto/logging.pb.h"
54 #include "icing/proto/optimize.pb.h"
55 #include "icing/proto/persist.pb.h"
56 #include "icing/proto/reset.pb.h"
57 #include "icing/proto/schema.pb.h"
58 #include "icing/proto/scoring.pb.h"
59 #include "icing/proto/search.pb.h"
60 #include "icing/proto/status.pb.h"
61 #include "icing/proto/storage.pb.h"
62 #include "icing/proto/term.pb.h"
63 #include "icing/proto/usage.pb.h"
64 #include "icing/query/advanced_query_parser/lexer.h"
65 #include "icing/query/query-features.h"
66 #include "icing/query/query-processor.h"
67 #include "icing/query/query-results.h"
68 #include "icing/query/suggestion-processor.h"
69 #include "icing/result/page-result.h"
70 #include "icing/result/projection-tree.h"
71 #include "icing/result/projector.h"
72 #include "icing/result/result-adjustment-info.h"
73 #include "icing/result/result-retriever-v2.h"
74 #include "icing/schema/schema-store.h"
75 #include "icing/schema/schema-util.h"
76 #include "icing/schema/section.h"
77 #include "icing/scoring/advanced_scoring/score-expression.h"
78 #include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
79 #include "icing/scoring/scored-document-hit.h"
80 #include "icing/scoring/scored-document-hits-ranker.h"
81 #include "icing/scoring/scoring-processor.h"
82 #include "icing/store/document-id.h"
83 #include "icing/store/document-store.h"
84 #include "icing/tokenization/language-segmenter-factory.h"
85 #include "icing/tokenization/language-segmenter.h"
86 #include "icing/transform/normalizer-factory.h"
87 #include "icing/transform/normalizer.h"
88 #include "icing/util/clock.h"
89 #include "icing/util/crc32.h"
90 #include "icing/util/logging.h"
91 #include "icing/util/status-macros.h"
92 #include "icing/util/tokenized-document.h"
93 #include "unicode/uloc.h"
94 
95 namespace icing {
96 namespace lib {
97 
98 namespace {
99 
100 constexpr std::string_view kVersionFilename = "version";
101 constexpr std::string_view kDocumentSubfolderName = "document_dir";
102 constexpr std::string_view kIndexSubfolderName = "index_dir";
103 constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
104 constexpr std::string_view kQualifiedIdJoinIndexSubfolderName =
105     "qualified_id_join_index_dir";
106 constexpr std::string_view kSchemaSubfolderName = "schema_dir";
107 constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
108 constexpr std::string_view kInitMarkerFilename = "init_marker";
109 constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
110 
111 // The maximum number of unsuccessful initialization attempts from the current
112 // state that we will tolerate before deleting all data and starting from a
113 // fresh state.
114 constexpr int kMaxUnsuccessfulInitAttempts = 5;
115 
116 // A pair that holds namespace and type.
117 struct NamespaceTypePair {
118   std::string namespace_;
119   std::string type;
120 
operator ==icing::lib::__anon92ce90ac0111::NamespaceTypePair121   bool operator==(const NamespaceTypePair& other) const {
122     return namespace_ == other.namespace_ && type == other.type;
123   }
124 };
125 
126 struct NamespaceTypePairHasher {
operator ()icing::lib::__anon92ce90ac0111::NamespaceTypePairHasher127   std::size_t operator()(const NamespaceTypePair& pair) const {
128     return std::hash<std::string>()(pair.namespace_) ^
129            std::hash<std::string>()(pair.type);
130   }
131 };
132 
ValidateResultSpec(const DocumentStore * document_store,const ResultSpecProto & result_spec)133 libtextclassifier3::Status ValidateResultSpec(
134     const DocumentStore* document_store, const ResultSpecProto& result_spec) {
135   if (result_spec.num_per_page() < 0) {
136     return absl_ports::InvalidArgumentError(
137         "ResultSpecProto.num_per_page cannot be negative.");
138   }
139   if (result_spec.num_total_bytes_per_page_threshold() <= 0) {
140     return absl_ports::InvalidArgumentError(
141         "ResultSpecProto.num_total_bytes_per_page_threshold cannot be "
142         "non-positive.");
143   }
144   // Validate ResultGroupings.
145   std::unordered_set<int32_t> unique_entry_ids;
146   ResultSpecProto::ResultGroupingType result_grouping_type =
147       result_spec.result_group_type();
148   for (const ResultSpecProto::ResultGrouping& result_grouping :
149        result_spec.result_groupings()) {
150     if (result_grouping.max_results() <= 0) {
151       return absl_ports::InvalidArgumentError(
152           "Cannot specify a result grouping with max results <= 0.");
153     }
154     for (const ResultSpecProto::ResultGrouping::Entry& entry :
155          result_grouping.entry_groupings()) {
156       const std::string& name_space = entry.namespace_();
157       const std::string& schema = entry.schema();
158       auto entry_id_or = document_store->GetResultGroupingEntryId(
159           result_grouping_type, name_space, schema);
160       if (!entry_id_or.ok()) {
161         continue;
162       }
163       int32_t entry_id = entry_id_or.ValueOrDie();
164       if (unique_entry_ids.find(entry_id) != unique_entry_ids.end()) {
165         return absl_ports::InvalidArgumentError(
166             "Entry Ids must be unique across result groups.");
167       }
168       unique_entry_ids.insert(entry_id);
169     }
170   }
171   return libtextclassifier3::Status::OK;
172 }
173 
ValidateSearchSpec(const SearchSpecProto & search_spec,const PerformanceConfiguration & configuration)174 libtextclassifier3::Status ValidateSearchSpec(
175     const SearchSpecProto& search_spec,
176     const PerformanceConfiguration& configuration) {
177   if (search_spec.query().size() > configuration.max_query_length) {
178     return absl_ports::InvalidArgumentError(
179         absl_ports::StrCat("SearchSpecProto.query is longer than the maximum "
180                            "allowed query length: ",
181                            std::to_string(configuration.max_query_length)));
182   }
183   // Check that no unknown features have been enabled in the search spec.
184   std::unordered_set<Feature> query_features_set = GetQueryFeaturesSet();
185   for (const Feature feature : search_spec.enabled_features()) {
186     if (query_features_set.find(feature) == query_features_set.end()) {
187       return absl_ports::InvalidArgumentError(
188           absl_ports::StrCat("Unknown feature in "
189                              "SearchSpecProto.enabled_features: ",
190                              feature));
191     }
192   }
193   return libtextclassifier3::Status::OK;
194 }
195 
ValidateSuggestionSpec(const SuggestionSpecProto & suggestion_spec,const PerformanceConfiguration & configuration)196 libtextclassifier3::Status ValidateSuggestionSpec(
197     const SuggestionSpecProto& suggestion_spec,
198     const PerformanceConfiguration& configuration) {
199   if (suggestion_spec.prefix().empty()) {
200     return absl_ports::InvalidArgumentError(
201         absl_ports::StrCat("SuggestionSpecProto.prefix is empty!"));
202   }
203   if (suggestion_spec.scoring_spec().scoring_match_type() ==
204       TermMatchType::UNKNOWN) {
205     return absl_ports::InvalidArgumentError(
206         absl_ports::StrCat("SuggestionSpecProto.term_match_type is unknown!"));
207   }
208   if (suggestion_spec.num_to_return() <= 0) {
209     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
210         "SuggestionSpecProto.num_to_return must be positive."));
211   }
212   if (suggestion_spec.prefix().size() > configuration.max_query_length) {
213     return absl_ports::InvalidArgumentError(
214         absl_ports::StrCat("SuggestionSpecProto.prefix is longer than the "
215                            "maximum allowed prefix length: ",
216                            std::to_string(configuration.max_query_length)));
217   }
218   return libtextclassifier3::Status::OK;
219 }
220 
221 // Version file is a single file under base_dir containing version info of the
222 // existing data.
MakeVersionFilePath(const std::string & base_dir)223 std::string MakeVersionFilePath(const std::string& base_dir) {
224   return absl_ports::StrCat(base_dir, "/", kVersionFilename);
225 }
226 
227 // Document store files are in a standalone subfolder for easier file
228 // management. We can delete and recreate the subfolder and not touch/affect
229 // anything else.
MakeDocumentDirectoryPath(const std::string & base_dir)230 std::string MakeDocumentDirectoryPath(const std::string& base_dir) {
231   return absl_ports::StrCat(base_dir, "/", kDocumentSubfolderName);
232 }
233 
234 // Makes a temporary folder path for the document store which will be used
235 // during full optimization.
MakeDocumentTemporaryDirectoryPath(const std::string & base_dir)236 std::string MakeDocumentTemporaryDirectoryPath(const std::string& base_dir) {
237   return absl_ports::StrCat(base_dir, "/", kDocumentSubfolderName,
238                             "_optimize_tmp");
239 }
240 
241 // Index files are in a standalone subfolder because for easier file management.
242 // We can delete and recreate the subfolder and not touch/affect anything
243 // else.
MakeIndexDirectoryPath(const std::string & base_dir)244 std::string MakeIndexDirectoryPath(const std::string& base_dir) {
245   return absl_ports::StrCat(base_dir, "/", kIndexSubfolderName);
246 }
247 
248 // Working path for integer index. Integer index is derived from
249 // PersistentStorage and it will take full ownership of this working path,
250 // including creation/deletion. See PersistentStorage for more details about
251 // working path.
MakeIntegerIndexWorkingPath(const std::string & base_dir)252 std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) {
253   return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName);
254 }
255 
256 // Working path for qualified id join index. It is derived from
257 // PersistentStorage and it will take full ownership of this working path,
258 // including creation/deletion. See PersistentStorage for more details about
259 // working path.
MakeQualifiedIdJoinIndexWorkingPath(const std::string & base_dir)260 std::string MakeQualifiedIdJoinIndexWorkingPath(const std::string& base_dir) {
261   return absl_ports::StrCat(base_dir, "/", kQualifiedIdJoinIndexSubfolderName);
262 }
263 
264 // SchemaStore files are in a standalone subfolder for easier file management.
265 // We can delete and recreate the subfolder and not touch/affect anything
266 // else.
MakeSchemaDirectoryPath(const std::string & base_dir)267 std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
268   return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
269 }
270 
MakeSetSchemaMarkerFilePath(const std::string & base_dir)271 std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
272   return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
273 }
274 
MakeInitMarkerFilePath(const std::string & base_dir)275 std::string MakeInitMarkerFilePath(const std::string& base_dir) {
276   return absl_ports::StrCat(base_dir, "/", kInitMarkerFilename);
277 }
278 
TransformStatus(const libtextclassifier3::Status & internal_status,StatusProto * status_proto)279 void TransformStatus(const libtextclassifier3::Status& internal_status,
280                      StatusProto* status_proto) {
281   StatusProto::Code code;
282   if (!internal_status.ok()) {
283     ICING_LOG(WARNING) << "Error: " << internal_status.error_code()
284                        << ", Message: " << internal_status.error_message();
285   }
286   switch (internal_status.CanonicalCode()) {
287     case libtextclassifier3::StatusCode::OK:
288       code = StatusProto::OK;
289       break;
290     case libtextclassifier3::StatusCode::DATA_LOSS:
291       code = StatusProto::WARNING_DATA_LOSS;
292       break;
293     case libtextclassifier3::StatusCode::INVALID_ARGUMENT:
294       code = StatusProto::INVALID_ARGUMENT;
295       break;
296     case libtextclassifier3::StatusCode::NOT_FOUND:
297       code = StatusProto::NOT_FOUND;
298       break;
299     case libtextclassifier3::StatusCode::FAILED_PRECONDITION:
300       code = StatusProto::FAILED_PRECONDITION;
301       break;
302     case libtextclassifier3::StatusCode::ABORTED:
303       code = StatusProto::ABORTED;
304       break;
305     case libtextclassifier3::StatusCode::INTERNAL:
306       // TODO(b/147699081): Cleanup our internal use of INTERNAL since it
307       // doesn't match with what it *should* indicate as described in
308       // go/icing-library-apis.
309       code = StatusProto::INTERNAL;
310       break;
311     case libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED:
312       // TODO(b/147699081): Note that we don't detect all cases of OUT_OF_SPACE
313       // (e.g. if the document log is full). And we use RESOURCE_EXHAUSTED
314       // internally to indicate other resources are exhausted (e.g.
315       // DocHitInfos) - although none of these are exposed through the API.
316       // Consider separating the two cases out more clearly.
317       code = StatusProto::OUT_OF_SPACE;
318       break;
319     case libtextclassifier3::StatusCode::ALREADY_EXISTS:
320       code = StatusProto::ALREADY_EXISTS;
321       break;
322     case libtextclassifier3::StatusCode::CANCELLED:
323       [[fallthrough]];
324     case libtextclassifier3::StatusCode::UNKNOWN:
325       [[fallthrough]];
326     case libtextclassifier3::StatusCode::DEADLINE_EXCEEDED:
327       [[fallthrough]];
328     case libtextclassifier3::StatusCode::PERMISSION_DENIED:
329       [[fallthrough]];
330     case libtextclassifier3::StatusCode::OUT_OF_RANGE:
331       [[fallthrough]];
332     case libtextclassifier3::StatusCode::UNIMPLEMENTED:
333       [[fallthrough]];
334     case libtextclassifier3::StatusCode::UNAVAILABLE:
335       [[fallthrough]];
336     case libtextclassifier3::StatusCode::UNAUTHENTICATED:
337       // Other internal status codes aren't supported externally yet. If it
338       // should be supported, add another switch-case above.
339       ICING_LOG(ERROR) << "Internal status code "
340                        << internal_status.error_code()
341                        << " not supported in the external API";
342       code = StatusProto::UNKNOWN;
343       break;
344   }
345   status_proto->set_code(code);
346   status_proto->set_message(internal_status.error_message());
347 }
348 
RetrieveAndAddDocumentInfo(const DocumentStore * document_store,DeleteByQueryResultProto & result_proto,std::unordered_map<NamespaceTypePair,DeleteByQueryResultProto::DocumentGroupInfo *,NamespaceTypePairHasher> & info_map,DocumentId document_id)349 libtextclassifier3::Status RetrieveAndAddDocumentInfo(
350     const DocumentStore* document_store, DeleteByQueryResultProto& result_proto,
351     std::unordered_map<NamespaceTypePair,
352                        DeleteByQueryResultProto::DocumentGroupInfo*,
353                        NamespaceTypePairHasher>& info_map,
354     DocumentId document_id) {
355   ICING_ASSIGN_OR_RETURN(DocumentProto document,
356                          document_store->Get(document_id));
357   NamespaceTypePair key = {document.namespace_(), document.schema()};
358   auto iter = info_map.find(key);
359   if (iter == info_map.end()) {
360     auto entry = result_proto.add_deleted_documents();
361     entry->set_namespace_(std::move(document.namespace_()));
362     entry->set_schema(std::move(document.schema()));
363     entry->add_uris(std::move(document.uri()));
364     info_map[key] = entry;
365   } else {
366     iter->second->add_uris(std::move(document.uri()));
367   }
368   return libtextclassifier3::Status::OK;
369 }
370 
ShouldRebuildIndex(const OptimizeStatsProto & optimize_stats,float optimize_rebuild_index_threshold)371 bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats,
372                         float optimize_rebuild_index_threshold) {
373   int num_invalid_documents = optimize_stats.num_deleted_documents() +
374                               optimize_stats.num_expired_documents();
375   return num_invalid_documents >= optimize_stats.num_original_documents() *
376                                       optimize_rebuild_index_threshold;
377 }
378 
379 // Useful method to get RankingStrategy if advanced scoring is enabled. When the
380 // "RelevanceScore" function is used in the advanced scoring expression,
381 // RankingStrategy will be treated as RELEVANCE_SCORE in order to prepare the
382 // necessary information needed for calculating relevance score.
383 libtextclassifier3::StatusOr<ScoringSpecProto::RankingStrategy::Code>
GetRankingStrategyFromScoringSpec(const ScoringSpecProto & scoring_spec)384 GetRankingStrategyFromScoringSpec(const ScoringSpecProto& scoring_spec) {
385   if (scoring_spec.advanced_scoring_expression().empty()) {
386     return scoring_spec.rank_by();
387   }
388   // TODO(b/261474063) The Lexer will be called again when creating the
389   // AdvancedScorer instance. Consider refactoring the code to allow the Lexer
390   // to be called only once.
391   Lexer lexer(scoring_spec.advanced_scoring_expression(),
392               Lexer::Language::SCORING);
393   ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
394                          lexer.ExtractTokens());
395   for (const Lexer::LexerToken& token : lexer_tokens) {
396     if (token.type == Lexer::TokenType::FUNCTION_NAME &&
397         token.text == RelevanceScoreFunctionScoreExpression::kFunctionName) {
398       return ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
399     }
400   }
401   return ScoringSpecProto::RankingStrategy::NONE;
402 }
403 
404 }  // namespace
405 
IcingSearchEngine(const IcingSearchEngineOptions & options,std::unique_ptr<const JniCache> jni_cache)406 IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options,
407                                      std::unique_ptr<const JniCache> jni_cache)
408     : IcingSearchEngine(options, std::make_unique<Filesystem>(),
409                         std::make_unique<IcingFilesystem>(),
410                         std::make_unique<Clock>(), std::move(jni_cache)) {}
411 
IcingSearchEngine(IcingSearchEngineOptions options,std::unique_ptr<const Filesystem> filesystem,std::unique_ptr<const IcingFilesystem> icing_filesystem,std::unique_ptr<Clock> clock,std::unique_ptr<const JniCache> jni_cache)412 IcingSearchEngine::IcingSearchEngine(
413     IcingSearchEngineOptions options,
414     std::unique_ptr<const Filesystem> filesystem,
415     std::unique_ptr<const IcingFilesystem> icing_filesystem,
416     std::unique_ptr<Clock> clock, std::unique_ptr<const JniCache> jni_cache)
417     : options_(std::move(options)),
418       filesystem_(std::move(filesystem)),
419       icing_filesystem_(std::move(icing_filesystem)),
420       clock_(std::move(clock)),
421       jni_cache_(std::move(jni_cache)) {
422   ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
423 }
424 
~IcingSearchEngine()425 IcingSearchEngine::~IcingSearchEngine() {
426   if (initialized_) {
427     if (PersistToDisk(PersistType::FULL).status().code() != StatusProto::OK) {
428       ICING_LOG(ERROR)
429           << "Error persisting to disk in IcingSearchEngine destructor";
430     }
431   }
432 }
433 
Initialize()434 InitializeResultProto IcingSearchEngine::Initialize() {
435   // This method does both read and write so we need a writer lock. Using two
436   // locks (reader and writer) has the chance to be interrupted during
437   // switching.
438   absl_ports::unique_lock l(&mutex_);
439   return InternalInitialize();
440 }
441 
ResetMembers()442 void IcingSearchEngine::ResetMembers() {
443   schema_store_.reset();
444   document_store_.reset();
445   language_segmenter_.reset();
446   normalizer_.reset();
447   index_.reset();
448   integer_index_.reset();
449   qualified_id_join_index_.reset();
450 }
451 
CheckInitMarkerFile(InitializeStatsProto * initialize_stats)452 libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
453     InitializeStatsProto* initialize_stats) {
454   // Check to see if the marker file exists and if we've already passed our max
455   // number of init attempts.
456   std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
457   bool file_exists = filesystem_->FileExists(marker_filepath.c_str());
458   int network_init_attempts = 0;
459   int host_init_attempts = 0;
460 
461   // Read the number of previous failed init attempts from the file. If it
462   // fails, then just assume the value is zero (the most likely reason for
463   // failure would be non-existence because the last init was successful
464   // anyways).
465   std::unique_ptr<ScopedFd> marker_file_fd = std::make_unique<ScopedFd>(
466       filesystem_->OpenForWrite(marker_filepath.c_str()));
467   libtextclassifier3::Status status;
468   if (file_exists &&
469       filesystem_->PRead(marker_file_fd->get(), &network_init_attempts,
470                          sizeof(network_init_attempts), /*offset=*/0)) {
471     host_init_attempts = GNetworkToHostL(network_init_attempts);
472     if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
473       // We're tried and failed to init too many times. We need to throw
474       // everything out and start from scratch.
475       ResetMembers();
476       marker_file_fd.reset();
477 
478       // Delete the entire base directory.
479       if (!filesystem_->DeleteDirectoryRecursively(
480               options_.base_dir().c_str())) {
481         return absl_ports::InternalError("Failed to delete icing base dir!");
482       }
483 
484       // Create the base directory again and reopen marker file.
485       if (!filesystem_->CreateDirectoryRecursively(
486               options_.base_dir().c_str())) {
487         return absl_ports::InternalError("Failed to create icing base dir!");
488       }
489 
490       marker_file_fd = std::make_unique<ScopedFd>(
491           filesystem_->OpenForWrite(marker_filepath.c_str()));
492 
493       status = absl_ports::DataLossError(
494           "Encountered failed initialization limit. Cleared all data.");
495       host_init_attempts = 0;
496     }
497   }
498 
499   // Use network_init_attempts here because we might have set host_init_attempts
500   // to 0 if it exceeded the max threshold.
501   initialize_stats->set_num_previous_init_failures(
502       GNetworkToHostL(network_init_attempts));
503 
504   ++host_init_attempts;
505   network_init_attempts = GHostToNetworkL(host_init_attempts);
506   // Write the updated number of attempts before we get started.
507   if (!filesystem_->PWrite(marker_file_fd->get(), /*offset=*/0,
508                            &network_init_attempts,
509                            sizeof(network_init_attempts)) ||
510       !filesystem_->DataSync(marker_file_fd->get())) {
511     return absl_ports::InternalError(
512         "Failed to write and sync init marker file");
513   }
514 
515   return status;
516 }
517 
InternalInitialize()518 InitializeResultProto IcingSearchEngine::InternalInitialize() {
519   ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
520                 << options_.base_dir();
521 
522   // Measure the latency of the initialization process.
523   std::unique_ptr<Timer> initialize_timer = clock_->GetNewTimer();
524 
525   InitializeResultProto result_proto;
526   StatusProto* result_status = result_proto.mutable_status();
527   InitializeStatsProto* initialize_stats =
528       result_proto.mutable_initialize_stats();
529   if (initialized_) {
530     // Already initialized.
531     result_status->set_code(StatusProto::OK);
532     initialize_stats->set_latency_ms(
533         initialize_timer->GetElapsedMilliseconds());
534     initialize_stats->set_num_documents(document_store_->num_documents());
535     return result_proto;
536   }
537 
538   // Now go ahead and try to initialize.
539   libtextclassifier3::Status status = InitializeMembers(initialize_stats);
540   if (status.ok() || absl_ports::IsDataLoss(status)) {
541     // We successfully initialized. We should delete the init marker file to
542     // indicate a successful init.
543     std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
544     if (!filesystem_->DeleteFile(marker_filepath.c_str())) {
545       status = absl_ports::InternalError("Failed to delete init marker file!");
546     } else {
547       initialized_ = true;
548     }
549   }
550   TransformStatus(status, result_status);
551   initialize_stats->set_latency_ms(initialize_timer->GetElapsedMilliseconds());
552   return result_proto;
553 }
554 
InitializeMembers(InitializeStatsProto * initialize_stats)555 libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
556     InitializeStatsProto* initialize_stats) {
557   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
558 
559   // Make sure the base directory exists
560   if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
561     return absl_ports::InternalError(absl_ports::StrCat(
562         "Could not create directory: ", options_.base_dir()));
563   }
564 
565   // Check to see if the marker file exists and if we've already passed our max
566   // number of init attempts.
567   libtextclassifier3::Status status = CheckInitMarkerFile(initialize_stats);
568   if (!status.ok() && !absl_ports::IsDataLoss(status)) {
569     return status;
570   }
571 
572   // Read version file and determine the state change.
573   const std::string version_filepath = MakeVersionFilePath(options_.base_dir());
574   const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
575   ICING_ASSIGN_OR_RETURN(
576       version_util::VersionInfo version_info,
577       version_util::ReadVersion(*filesystem_, version_filepath, index_dir));
578   version_util::StateChange version_state_change =
579       version_util::GetVersionStateChange(version_info);
580   if (version_state_change != version_util::StateChange::kCompatible) {
581     // Step 1: migrate schema according to the version state change.
582     ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema(
583         filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
584         version_state_change, version_util::kVersion));
585 
586     // Step 2: discard all derived data
587     ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
588 
589     // Step 3: update version file
590     version_util::VersionInfo new_version_info(
591         version_util::kVersion,
592         std::max(version_info.max_version, version_util::kVersion));
593     ICING_RETURN_IF_ERROR(version_util::WriteVersion(
594         *filesystem_, version_filepath, new_version_info));
595   }
596 
597   ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
598 
599   // TODO(b/156383798) : Resolve how to specify the locale.
600   language_segmenter_factory::SegmenterOptions segmenter_options(
601       ULOC_US, jni_cache_.get());
602   TC3_ASSIGN_OR_RETURN(language_segmenter_, language_segmenter_factory::Create(
603                                                 std::move(segmenter_options)));
604 
605   TC3_ASSIGN_OR_RETURN(normalizer_,
606                        normalizer_factory::Create(options_.max_token_length()));
607 
608   std::string marker_filepath =
609       MakeSetSchemaMarkerFilePath(options_.base_dir());
610 
611   libtextclassifier3::Status index_init_status;
612   if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
613     // The schema was either lost or never set before. Wipe out the doc store
614     // and index directories and initialize them from scratch.
615     const std::string doc_store_dir =
616         MakeDocumentDirectoryPath(options_.base_dir());
617     const std::string integer_index_dir =
618         MakeIntegerIndexWorkingPath(options_.base_dir());
619     const std::string qualified_id_join_index_dir =
620         MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
621     if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
622         !filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
623         !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok() ||
624         !QualifiedIdTypeJoinableIndex::Discard(*filesystem_,
625                                                qualified_id_join_index_dir)
626              .ok()) {
627       return absl_ports::InternalError(absl_ports::StrCat(
628           "Could not delete directories: ", index_dir, ", ", integer_index_dir,
629           ", ", qualified_id_join_index_dir, " and ", doc_store_dir));
630     }
631     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
632         /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
633     index_init_status = InitializeIndex(initialize_stats);
634     if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
635       return index_init_status;
636     }
637   } else if (filesystem_->FileExists(marker_filepath.c_str())) {
638     // If the marker file is still around then something wonky happened when we
639     // last tried to set the schema.
640     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
641         /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
642 
643     // We're going to need to build the index from scratch. So just delete its
644     // directory now.
645     // Discard index directory and instantiate a new one.
646     Index::Options index_options(index_dir, options_.index_merge_size());
647     if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
648         !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
649       return absl_ports::InternalError(
650           absl_ports::StrCat("Could not recreate directory: ", index_dir));
651     }
652     ICING_ASSIGN_OR_RETURN(index_,
653                            Index::Create(index_options, filesystem_.get(),
654                                          icing_filesystem_.get()));
655 
656     // Discard integer index directory and instantiate a new one.
657     std::string integer_index_dir =
658         MakeIntegerIndexWorkingPath(options_.base_dir());
659     ICING_RETURN_IF_ERROR(
660         IntegerIndex::Discard(*filesystem_, integer_index_dir));
661     ICING_ASSIGN_OR_RETURN(
662         integer_index_,
663         IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
664                              options_.pre_mapping_fbv()));
665 
666     // Discard qualified id join index directory and instantiate a new one.
667     std::string qualified_id_join_index_dir =
668         MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
669     ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard(
670         *filesystem_, qualified_id_join_index_dir));
671     ICING_ASSIGN_OR_RETURN(
672         qualified_id_join_index_,
673         QualifiedIdTypeJoinableIndex::Create(
674             *filesystem_, std::move(qualified_id_join_index_dir),
675             options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
676 
677     std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
678     IndexRestorationResult restore_result = RestoreIndexIfNeeded();
679     index_init_status = std::move(restore_result.status);
680     // DATA_LOSS means that we have successfully initialized and re-added
681     // content to the index. Some indexed content was lost, but otherwise the
682     // index is in a valid state and can be queried.
683     if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
684       return index_init_status;
685     }
686 
687     // Delete the marker file to indicate that everything is now in sync with
688     // whatever changes were made to the schema.
689     filesystem_->DeleteFile(marker_filepath.c_str());
690 
691     initialize_stats->set_index_restoration_latency_ms(
692         restore_timer->GetElapsedMilliseconds());
693     initialize_stats->set_index_restoration_cause(
694         InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
695     initialize_stats->set_integer_index_restoration_cause(
696         InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
697     initialize_stats->set_qualified_id_join_index_restoration_cause(
698         InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
699   } else if (version_state_change != version_util::StateChange::kCompatible) {
700     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
701         /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
702     index_init_status = InitializeIndex(initialize_stats);
703     if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
704       return index_init_status;
705     }
706 
707     initialize_stats->set_schema_store_recovery_cause(
708         InitializeStatsProto::VERSION_CHANGED);
709     initialize_stats->set_document_store_recovery_cause(
710         InitializeStatsProto::VERSION_CHANGED);
711     initialize_stats->set_index_restoration_cause(
712         InitializeStatsProto::VERSION_CHANGED);
713     initialize_stats->set_integer_index_restoration_cause(
714         InitializeStatsProto::VERSION_CHANGED);
715     initialize_stats->set_qualified_id_join_index_restoration_cause(
716         InitializeStatsProto::VERSION_CHANGED);
717   } else {
718     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
719         /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
720     index_init_status = InitializeIndex(initialize_stats);
721     if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
722       return index_init_status;
723     }
724   }
725 
726   if (status.ok()) {
727     status = index_init_status;
728   }
729 
730   result_state_manager_ = std::make_unique<ResultStateManager>(
731       performance_configuration_.max_num_total_hits, *document_store_);
732 
733   return status;
734 }
735 
InitializeSchemaStore(InitializeStatsProto * initialize_stats)736 libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
737     InitializeStatsProto* initialize_stats) {
738   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
739 
740   const std::string schema_store_dir =
741       MakeSchemaDirectoryPath(options_.base_dir());
742   // Make sure the sub-directory exists
743   if (!filesystem_->CreateDirectoryRecursively(schema_store_dir.c_str())) {
744     return absl_ports::InternalError(
745         absl_ports::StrCat("Could not create directory: ", schema_store_dir));
746   }
747   ICING_ASSIGN_OR_RETURN(
748       schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir,
749                                          clock_.get(), initialize_stats));
750 
751   return libtextclassifier3::Status::OK;
752 }
753 
InitializeDocumentStore(bool force_recovery_and_revalidate_documents,InitializeStatsProto * initialize_stats)754 libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
755     bool force_recovery_and_revalidate_documents,
756     InitializeStatsProto* initialize_stats) {
757   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
758 
759   const std::string document_dir =
760       MakeDocumentDirectoryPath(options_.base_dir());
761   // Make sure the sub-directory exists
762   if (!filesystem_->CreateDirectoryRecursively(document_dir.c_str())) {
763     return absl_ports::InternalError(
764         absl_ports::StrCat("Could not create directory: ", document_dir));
765   }
766   ICING_ASSIGN_OR_RETURN(
767       DocumentStore::CreateResult create_result,
768       DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
769                             schema_store_.get(),
770                             force_recovery_and_revalidate_documents,
771                             options_.document_store_namespace_id_fingerprint(),
772                             options_.compression_level(), initialize_stats));
773   document_store_ = std::move(create_result.document_store);
774 
775   return libtextclassifier3::Status::OK;
776 }
777 
InitializeIndex(InitializeStatsProto * initialize_stats)778 libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
779     InitializeStatsProto* initialize_stats) {
780   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
781 
782   const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
783   // Make sure the sub-directory exists
784   if (!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
785     return absl_ports::InternalError(
786         absl_ports::StrCat("Could not create directory: ", index_dir));
787   }
788   Index::Options index_options(index_dir, options_.index_merge_size());
789 
790   // Term index
791   InitializeStatsProto::RecoveryCause index_recovery_cause;
792   auto index_or =
793       Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
794   if (!index_or.ok()) {
795     if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
796         !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
797       return absl_ports::InternalError(
798           absl_ports::StrCat("Could not recreate directory: ", index_dir));
799     }
800 
801     index_recovery_cause = InitializeStatsProto::IO_ERROR;
802 
803     // Try recreating it from scratch and re-indexing everything.
804     ICING_ASSIGN_OR_RETURN(index_,
805                            Index::Create(index_options, filesystem_.get(),
806                                          icing_filesystem_.get()));
807   } else {
808     // Index was created fine.
809     index_ = std::move(index_or).ValueOrDie();
810     // If a recover does have to happen, then it must be because the index is
811     // out of sync with the document store.
812     index_recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
813   }
814 
815   // Integer index
816   std::string integer_index_dir =
817       MakeIntegerIndexWorkingPath(options_.base_dir());
818   InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
819   auto integer_index_or = IntegerIndex::Create(*filesystem_, integer_index_dir,
820                                                options_.pre_mapping_fbv());
821   if (!integer_index_or.ok()) {
822     ICING_RETURN_IF_ERROR(
823         IntegerIndex::Discard(*filesystem_, integer_index_dir));
824 
825     integer_index_recovery_cause = InitializeStatsProto::IO_ERROR;
826 
827     // Try recreating it from scratch and re-indexing everything.
828     ICING_ASSIGN_OR_RETURN(
829         integer_index_,
830         IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
831                              options_.pre_mapping_fbv()));
832   } else {
833     // Integer index was created fine.
834     integer_index_ = std::move(integer_index_or).ValueOrDie();
835     // If a recover does have to happen, then it must be because the index is
836     // out of sync with the document store.
837     integer_index_recovery_cause =
838         InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
839   }
840 
841   // Qualified id join index
842   std::string qualified_id_join_index_dir =
843       MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
844   InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
845   auto qualified_id_join_index_or = QualifiedIdTypeJoinableIndex::Create(
846       *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(),
847       options_.use_persistent_hash_map());
848   if (!qualified_id_join_index_or.ok()) {
849     ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard(
850         *filesystem_, qualified_id_join_index_dir));
851 
852     qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
853 
854     // Try recreating it from scratch and rebuild everything.
855     ICING_ASSIGN_OR_RETURN(
856         qualified_id_join_index_,
857         QualifiedIdTypeJoinableIndex::Create(
858             *filesystem_, std::move(qualified_id_join_index_dir),
859             options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
860   } else {
861     // Qualified id join index was created fine.
862     qualified_id_join_index_ =
863         std::move(qualified_id_join_index_or).ValueOrDie();
864     // If a recover does have to happen, then it must be because the index is
865     // out of sync with the document store.
866     qualified_id_join_index_recovery_cause =
867         InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
868   }
869 
870   std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
871   IndexRestorationResult restore_result = RestoreIndexIfNeeded();
872   if (restore_result.index_needed_restoration ||
873       restore_result.integer_index_needed_restoration ||
874       restore_result.qualified_id_join_index_needed_restoration) {
875     initialize_stats->set_index_restoration_latency_ms(
876         restore_timer->GetElapsedMilliseconds());
877 
878     if (restore_result.index_needed_restoration) {
879       initialize_stats->set_index_restoration_cause(index_recovery_cause);
880     }
881     if (restore_result.integer_index_needed_restoration) {
882       initialize_stats->set_integer_index_restoration_cause(
883           integer_index_recovery_cause);
884     }
885     if (restore_result.qualified_id_join_index_needed_restoration) {
886       initialize_stats->set_qualified_id_join_index_restoration_cause(
887           qualified_id_join_index_recovery_cause);
888     }
889   }
890   return restore_result.status;
891 }
892 
SetSchema(const SchemaProto & new_schema,bool ignore_errors_and_delete_documents)893 SetSchemaResultProto IcingSearchEngine::SetSchema(
894     const SchemaProto& new_schema, bool ignore_errors_and_delete_documents) {
895   return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
896 }
897 
SetSchema(SchemaProto && new_schema,bool ignore_errors_and_delete_documents)898 SetSchemaResultProto IcingSearchEngine::SetSchema(
899     SchemaProto&& new_schema, bool ignore_errors_and_delete_documents) {
900   ICING_VLOG(1) << "Setting new Schema";
901 
902   SetSchemaResultProto result_proto;
903   StatusProto* result_status = result_proto.mutable_status();
904 
905   absl_ports::unique_lock l(&mutex_);
906   ScopedTimer timer(clock_->GetNewTimer(), [&result_proto](int64_t t) {
907     result_proto.set_latency_ms(t);
908   });
909   if (!initialized_) {
910     result_status->set_code(StatusProto::FAILED_PRECONDITION);
911     result_status->set_message("IcingSearchEngine has not been initialized!");
912     return result_proto;
913   }
914 
915   auto lost_previous_schema_or = LostPreviousSchema();
916   if (!lost_previous_schema_or.ok()) {
917     TransformStatus(lost_previous_schema_or.status(), result_status);
918     return result_proto;
919   }
920   bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
921 
922   std::string marker_filepath =
923       MakeSetSchemaMarkerFilePath(options_.base_dir());
924   // Create the marker file indicating that we are going to apply a schema
925   // change. No need to write anything to the marker file - its existence is the
926   // only thing that matters. The marker file is used to indicate if we
927   // encountered a crash or a power loss while updating the schema and other
928   // files. So set it up to be deleted as long as we return from this function.
929   DestructibleFile marker_file(marker_filepath, filesystem_.get());
930 
931   auto set_schema_result_or = schema_store_->SetSchema(
932       std::move(new_schema), ignore_errors_and_delete_documents,
933       options_.allow_circular_schema_definitions());
934   if (!set_schema_result_or.ok()) {
935     TransformStatus(set_schema_result_or.status(), result_status);
936     return result_proto;
937   }
938   SchemaStore::SetSchemaResult set_schema_result =
939       std::move(set_schema_result_or).ValueOrDie();
940 
941   for (const std::string& deleted_type :
942        set_schema_result.schema_types_deleted_by_name) {
943     result_proto.add_deleted_schema_types(deleted_type);
944   }
945 
946   for (const std::string& incompatible_type :
947        set_schema_result.schema_types_incompatible_by_name) {
948     result_proto.add_incompatible_schema_types(incompatible_type);
949   }
950 
951   for (const std::string& new_type :
952        set_schema_result.schema_types_new_by_name) {
953     result_proto.add_new_schema_types(std::move(new_type));
954   }
955 
956   for (const std::string& compatible_type :
957        set_schema_result.schema_types_changed_fully_compatible_by_name) {
958     result_proto.add_fully_compatible_changed_schema_types(
959         std::move(compatible_type));
960   }
961 
962   bool index_incompatible =
963       !set_schema_result.schema_types_index_incompatible_by_name.empty();
964   for (const std::string& index_incompatible_type :
965        set_schema_result.schema_types_index_incompatible_by_name) {
966     result_proto.add_index_incompatible_changed_schema_types(
967         std::move(index_incompatible_type));
968   }
969 
970   bool join_incompatible =
971       !set_schema_result.schema_types_join_incompatible_by_name.empty();
972   for (const std::string& join_incompatible_type :
973        set_schema_result.schema_types_join_incompatible_by_name) {
974     result_proto.add_join_incompatible_changed_schema_types(
975         std::move(join_incompatible_type));
976   }
977 
978   libtextclassifier3::Status status;
979   if (set_schema_result.success) {
980     if (lost_previous_schema) {
981       // No previous schema to calculate a diff against. We have to go through
982       // and revalidate all the Documents in the DocumentStore
983       status = document_store_->UpdateSchemaStore(schema_store_.get());
984       if (!status.ok()) {
985         TransformStatus(status, result_status);
986         return result_proto;
987       }
988     } else if (!set_schema_result.old_schema_type_ids_changed.empty() ||
989                !set_schema_result.schema_types_incompatible_by_id.empty() ||
990                !set_schema_result.schema_types_deleted_by_id.empty()) {
991       status = document_store_->OptimizedUpdateSchemaStore(schema_store_.get(),
992                                                            set_schema_result);
993       if (!status.ok()) {
994         TransformStatus(status, result_status);
995         return result_proto;
996       }
997     }
998 
999     if (lost_previous_schema || index_incompatible) {
1000       // Clears search indices
1001       status = ClearSearchIndices();
1002       if (!status.ok()) {
1003         TransformStatus(status, result_status);
1004         return result_proto;
1005       }
1006     }
1007 
1008     if (lost_previous_schema || join_incompatible) {
1009       // Clears join indices
1010       status = ClearJoinIndices();
1011       if (!status.ok()) {
1012         TransformStatus(status, result_status);
1013         return result_proto;
1014       }
1015     }
1016 
1017     if (lost_previous_schema || index_incompatible || join_incompatible) {
1018       IndexRestorationResult restore_result = RestoreIndexIfNeeded();
1019       // DATA_LOSS means that we have successfully re-added content to the
1020       // index. Some indexed content was lost, but otherwise the index is in a
1021       // valid state and can be queried.
1022       if (!restore_result.status.ok() &&
1023           !absl_ports::IsDataLoss(restore_result.status)) {
1024         TransformStatus(status, result_status);
1025         return result_proto;
1026       }
1027     }
1028 
1029     result_status->set_code(StatusProto::OK);
1030   } else {
1031     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1032     result_status->set_message("Schema is incompatible.");
1033   }
1034 
1035   return result_proto;
1036 }
1037 
GetSchema()1038 GetSchemaResultProto IcingSearchEngine::GetSchema() {
1039   GetSchemaResultProto result_proto;
1040   StatusProto* result_status = result_proto.mutable_status();
1041 
1042   absl_ports::shared_lock l(&mutex_);
1043   if (!initialized_) {
1044     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1045     result_status->set_message("IcingSearchEngine has not been initialized!");
1046     return result_proto;
1047   }
1048 
1049   auto schema_or = schema_store_->GetSchema();
1050   if (!schema_or.ok()) {
1051     TransformStatus(schema_or.status(), result_status);
1052     return result_proto;
1053   }
1054 
1055   result_status->set_code(StatusProto::OK);
1056   *result_proto.mutable_schema() = *std::move(schema_or).ValueOrDie();
1057   return result_proto;
1058 }
1059 
GetSchemaType(std::string_view schema_type)1060 GetSchemaTypeResultProto IcingSearchEngine::GetSchemaType(
1061     std::string_view schema_type) {
1062   GetSchemaTypeResultProto result_proto;
1063   StatusProto* result_status = result_proto.mutable_status();
1064 
1065   absl_ports::shared_lock l(&mutex_);
1066   if (!initialized_) {
1067     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1068     result_status->set_message("IcingSearchEngine has not been initialized!");
1069     return result_proto;
1070   }
1071 
1072   auto type_config_or = schema_store_->GetSchemaTypeConfig(schema_type);
1073   if (!type_config_or.ok()) {
1074     TransformStatus(type_config_or.status(), result_status);
1075     return result_proto;
1076   }
1077 
1078   result_status->set_code(StatusProto::OK);
1079   *result_proto.mutable_schema_type_config() = *(type_config_or.ValueOrDie());
1080   return result_proto;
1081 }
1082 
Put(const DocumentProto & document)1083 PutResultProto IcingSearchEngine::Put(const DocumentProto& document) {
1084   return Put(DocumentProto(document));
1085 }
1086 
Put(DocumentProto && document)1087 PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
1088   ICING_VLOG(1) << "Writing document to document store";
1089 
1090   PutResultProto result_proto;
1091   StatusProto* result_status = result_proto.mutable_status();
1092   PutDocumentStatsProto* put_document_stats =
1093       result_proto.mutable_put_document_stats();
1094   ScopedTimer put_timer(clock_->GetNewTimer(), [put_document_stats](int64_t t) {
1095     put_document_stats->set_latency_ms(t);
1096   });
1097 
1098   // Lock must be acquired before validation because the DocumentStore uses
1099   // the schema file to validate, and the schema could be changed in
1100   // SetSchema() which is protected by the same mutex.
1101   absl_ports::unique_lock l(&mutex_);
1102   if (!initialized_) {
1103     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1104     result_status->set_message("IcingSearchEngine has not been initialized!");
1105     return result_proto;
1106   }
1107 
1108   auto tokenized_document_or = TokenizedDocument::Create(
1109       schema_store_.get(), language_segmenter_.get(), std::move(document));
1110   if (!tokenized_document_or.ok()) {
1111     TransformStatus(tokenized_document_or.status(), result_status);
1112     return result_proto;
1113   }
1114   TokenizedDocument tokenized_document(
1115       std::move(tokenized_document_or).ValueOrDie());
1116 
1117   auto document_id_or = document_store_->Put(
1118       tokenized_document.document(), tokenized_document.num_string_tokens(),
1119       put_document_stats);
1120   if (!document_id_or.ok()) {
1121     TransformStatus(document_id_or.status(), result_status);
1122     return result_proto;
1123   }
1124   DocumentId document_id = document_id_or.ValueOrDie();
1125 
1126   auto data_indexing_handlers_or = CreateDataIndexingHandlers();
1127   if (!data_indexing_handlers_or.ok()) {
1128     TransformStatus(data_indexing_handlers_or.status(), result_status);
1129     return result_proto;
1130   }
1131   IndexProcessor index_processor(
1132       std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get());
1133 
1134   auto index_status = index_processor.IndexDocument(
1135       tokenized_document, document_id, put_document_stats);
1136   // Getting an internal error from the index could possibly mean that the index
1137   // is broken. Try to rebuild them to recover.
1138   if (absl_ports::IsInternal(index_status)) {
1139     ICING_LOG(ERROR) << "Got an internal error from the index. Trying to "
1140                         "rebuild the index!\n"
1141                      << index_status.error_message();
1142     index_status = ClearAllIndices();
1143     if (index_status.ok()) {
1144       index_status = RestoreIndexIfNeeded().status;
1145       if (!index_status.ok()) {
1146         ICING_LOG(ERROR) << "Failed to reindex documents after a failure of "
1147                             "indexing a document.";
1148       }
1149     } else {
1150       ICING_LOG(ERROR)
1151           << "Failed to clear indices after a failure of indexing a document.";
1152     }
1153   }
1154 
1155   if (!index_status.ok()) {
1156     // If we encountered a failure or cannot resolve an internal error while
1157     // indexing this document, then mark it as deleted.
1158     int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1159     libtextclassifier3::Status delete_status =
1160         document_store_->Delete(document_id, current_time_ms);
1161     if (!delete_status.ok()) {
1162       // This is pretty dire (and, hopefully, unlikely). We can't roll back the
1163       // document that we just added. Wipeout the whole index.
1164       ICING_LOG(ERROR) << "Cannot delete the document that is failed to index. "
1165                           "Wiping out the whole Icing search engine.";
1166       ResetInternal();
1167     }
1168   }
1169 
1170   TransformStatus(index_status, result_status);
1171   return result_proto;
1172 }
1173 
Get(const std::string_view name_space,const std::string_view uri,const GetResultSpecProto & result_spec)1174 GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
1175                                       const std::string_view uri,
1176                                       const GetResultSpecProto& result_spec) {
1177   GetResultProto result_proto;
1178   StatusProto* result_status = result_proto.mutable_status();
1179 
1180   absl_ports::shared_lock l(&mutex_);
1181   if (!initialized_) {
1182     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1183     result_status->set_message("IcingSearchEngine has not been initialized!");
1184     return result_proto;
1185   }
1186 
1187   auto document_or = document_store_->Get(name_space, uri);
1188   if (!document_or.ok()) {
1189     TransformStatus(document_or.status(), result_status);
1190     return result_proto;
1191   }
1192 
1193   DocumentProto document = std::move(document_or).ValueOrDie();
1194   std::unique_ptr<ProjectionTree> type_projection_tree;
1195   std::unique_ptr<ProjectionTree> wildcard_projection_tree;
1196   for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
1197        schema_store_->ExpandTypePropertyMasks(
1198            result_spec.type_property_masks())) {
1199     if (type_field_mask.schema_type == document.schema()) {
1200       type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask);
1201     } else if (type_field_mask.schema_type ==
1202                SchemaStore::kSchemaTypeWildcard) {
1203       wildcard_projection_tree =
1204           std::make_unique<ProjectionTree>(type_field_mask);
1205     }
1206   }
1207 
1208   // Apply projection
1209   if (type_projection_tree != nullptr) {
1210     projector::Project(type_projection_tree->root().children, &document);
1211   } else if (wildcard_projection_tree != nullptr) {
1212     projector::Project(wildcard_projection_tree->root().children, &document);
1213   }
1214 
1215   result_status->set_code(StatusProto::OK);
1216   *result_proto.mutable_document() = std::move(document);
1217   return result_proto;
1218 }
1219 
ReportUsage(const UsageReport & usage_report)1220 ReportUsageResultProto IcingSearchEngine::ReportUsage(
1221     const UsageReport& usage_report) {
1222   ReportUsageResultProto result_proto;
1223   StatusProto* result_status = result_proto.mutable_status();
1224 
1225   absl_ports::unique_lock l(&mutex_);
1226   if (!initialized_) {
1227     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1228     result_status->set_message("IcingSearchEngine has not been initialized!");
1229     return result_proto;
1230   }
1231 
1232   libtextclassifier3::Status status =
1233       document_store_->ReportUsage(usage_report);
1234   TransformStatus(status, result_status);
1235   return result_proto;
1236 }
1237 
GetAllNamespaces()1238 GetAllNamespacesResultProto IcingSearchEngine::GetAllNamespaces() {
1239   GetAllNamespacesResultProto result_proto;
1240   StatusProto* result_status = result_proto.mutable_status();
1241 
1242   absl_ports::shared_lock l(&mutex_);
1243   if (!initialized_) {
1244     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1245     result_status->set_message("IcingSearchEngine has not been initialized!");
1246     return result_proto;
1247   }
1248 
1249   std::vector<std::string> namespaces = document_store_->GetAllNamespaces();
1250 
1251   for (const std::string& namespace_ : namespaces) {
1252     result_proto.add_namespaces(namespace_);
1253   }
1254 
1255   result_status->set_code(StatusProto::OK);
1256   return result_proto;
1257 }
1258 
Delete(const std::string_view name_space,const std::string_view uri)1259 DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
1260                                             const std::string_view uri) {
1261   ICING_VLOG(1) << "Deleting document from doc store";
1262 
1263   DeleteResultProto result_proto;
1264   StatusProto* result_status = result_proto.mutable_status();
1265 
1266   absl_ports::unique_lock l(&mutex_);
1267   if (!initialized_) {
1268     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1269     result_status->set_message("IcingSearchEngine has not been initialized!");
1270     return result_proto;
1271   }
1272 
1273   DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
1274   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
1275 
1276   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
1277   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
1278   // that can support error logging.
1279   int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1280   libtextclassifier3::Status status =
1281       document_store_->Delete(name_space, uri, current_time_ms);
1282   if (!status.ok()) {
1283     LogSeverity::Code severity = ERROR;
1284     if (absl_ports::IsNotFound(status)) {
1285       severity = DBG;
1286     }
1287     ICING_LOG(severity) << status.error_message()
1288                         << "Failed to delete Document. namespace: "
1289                         << name_space << ", uri: " << uri;
1290     TransformStatus(status, result_status);
1291     return result_proto;
1292   }
1293 
1294   result_status->set_code(StatusProto::OK);
1295   delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
1296   delete_stats->set_num_documents_deleted(1);
1297   return result_proto;
1298 }
1299 
DeleteByNamespace(const std::string_view name_space)1300 DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
1301     const std::string_view name_space) {
1302   ICING_VLOG(1) << "Deleting namespace from doc store";
1303 
1304   DeleteByNamespaceResultProto delete_result;
1305   StatusProto* result_status = delete_result.mutable_status();
1306   absl_ports::unique_lock l(&mutex_);
1307   if (!initialized_) {
1308     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1309     result_status->set_message("IcingSearchEngine has not been initialized!");
1310     return delete_result;
1311   }
1312 
1313   DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
1314   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
1315 
1316   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
1317   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
1318   // that can support error logging.
1319   DocumentStore::DeleteByGroupResult doc_store_result =
1320       document_store_->DeleteByNamespace(name_space);
1321   if (!doc_store_result.status.ok()) {
1322     ICING_LOG(ERROR) << doc_store_result.status.error_message()
1323                      << "Failed to delete Namespace: " << name_space;
1324     TransformStatus(doc_store_result.status, result_status);
1325     return delete_result;
1326   }
1327 
1328   result_status->set_code(StatusProto::OK);
1329   delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
1330   delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
1331   return delete_result;
1332 }
1333 
DeleteBySchemaType(const std::string_view schema_type)1334 DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
1335     const std::string_view schema_type) {
1336   ICING_VLOG(1) << "Deleting type from doc store";
1337 
1338   DeleteBySchemaTypeResultProto delete_result;
1339   StatusProto* result_status = delete_result.mutable_status();
1340   absl_ports::unique_lock l(&mutex_);
1341   if (!initialized_) {
1342     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1343     result_status->set_message("IcingSearchEngine has not been initialized!");
1344     return delete_result;
1345   }
1346 
1347   DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
1348   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
1349 
1350   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
1351   // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
1352   // that can support error logging.
1353   DocumentStore::DeleteByGroupResult doc_store_result =
1354       document_store_->DeleteBySchemaType(schema_type);
1355   if (!doc_store_result.status.ok()) {
1356     ICING_LOG(ERROR) << doc_store_result.status.error_message()
1357                      << "Failed to delete SchemaType: " << schema_type;
1358     TransformStatus(doc_store_result.status, result_status);
1359     return delete_result;
1360   }
1361 
1362   result_status->set_code(StatusProto::OK);
1363   delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
1364   delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
1365   return delete_result;
1366 }
1367 
DeleteByQuery(const SearchSpecProto & search_spec,bool return_deleted_document_info)1368 DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
1369     const SearchSpecProto& search_spec, bool return_deleted_document_info) {
1370   ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
1371                 << " from doc store";
1372 
1373   DeleteByQueryResultProto result_proto;
1374   StatusProto* result_status = result_proto.mutable_status();
1375 
1376   absl_ports::unique_lock l(&mutex_);
1377   if (!initialized_) {
1378     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1379     result_status->set_message("IcingSearchEngine has not been initialized!");
1380     return result_proto;
1381   }
1382 
1383   DeleteByQueryStatsProto* delete_stats =
1384       result_proto.mutable_delete_by_query_stats();
1385   delete_stats->set_query_length(search_spec.query().length());
1386   delete_stats->set_num_namespaces_filtered(
1387       search_spec.namespace_filters_size());
1388   delete_stats->set_num_schema_types_filtered(
1389       search_spec.schema_type_filters_size());
1390 
1391   ScopedTimer delete_timer(clock_->GetNewTimer(), [delete_stats](int64_t t) {
1392     delete_stats->set_latency_ms(t);
1393   });
1394   libtextclassifier3::Status status =
1395       ValidateSearchSpec(search_spec, performance_configuration_);
1396   if (!status.ok()) {
1397     TransformStatus(status, result_status);
1398     return result_proto;
1399   }
1400 
1401   std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
1402   // Gets unordered results from query processor
1403   auto query_processor_or = QueryProcessor::Create(
1404       index_.get(), integer_index_.get(), language_segmenter_.get(),
1405       normalizer_.get(), document_store_.get(), schema_store_.get());
1406   if (!query_processor_or.ok()) {
1407     TransformStatus(query_processor_or.status(), result_status);
1408     delete_stats->set_parse_query_latency_ms(
1409         component_timer->GetElapsedMilliseconds());
1410     return result_proto;
1411   }
1412   std::unique_ptr<QueryProcessor> query_processor =
1413       std::move(query_processor_or).ValueOrDie();
1414 
1415   int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1416   auto query_results_or = query_processor->ParseSearch(
1417       search_spec, ScoringSpecProto::RankingStrategy::NONE, current_time_ms);
1418   if (!query_results_or.ok()) {
1419     TransformStatus(query_results_or.status(), result_status);
1420     delete_stats->set_parse_query_latency_ms(
1421         component_timer->GetElapsedMilliseconds());
1422     return result_proto;
1423   }
1424   QueryResults query_results = std::move(query_results_or).ValueOrDie();
1425   delete_stats->set_parse_query_latency_ms(
1426       component_timer->GetElapsedMilliseconds());
1427 
1428   ICING_VLOG(2) << "Deleting the docs that matched the query.";
1429   int num_deleted = 0;
1430   // A map used to group deleted documents.
1431   // From the (namespace, type) pair to a list of uris.
1432   std::unordered_map<NamespaceTypePair,
1433                      DeleteByQueryResultProto::DocumentGroupInfo*,
1434                      NamespaceTypePairHasher>
1435       deleted_info_map;
1436 
1437   component_timer = clock_->GetNewTimer();
1438   while (query_results.root_iterator->Advance().ok()) {
1439     ICING_VLOG(3) << "Deleting doc "
1440                   << query_results.root_iterator->doc_hit_info().document_id();
1441     ++num_deleted;
1442     if (return_deleted_document_info) {
1443       status = RetrieveAndAddDocumentInfo(
1444           document_store_.get(), result_proto, deleted_info_map,
1445           query_results.root_iterator->doc_hit_info().document_id());
1446       if (!status.ok()) {
1447         TransformStatus(status, result_status);
1448         delete_stats->set_document_removal_latency_ms(
1449             component_timer->GetElapsedMilliseconds());
1450         return result_proto;
1451       }
1452     }
1453     status = document_store_->Delete(
1454         query_results.root_iterator->doc_hit_info().document_id(),
1455         current_time_ms);
1456     if (!status.ok()) {
1457       TransformStatus(status, result_status);
1458       delete_stats->set_document_removal_latency_ms(
1459           component_timer->GetElapsedMilliseconds());
1460       return result_proto;
1461     }
1462   }
1463   delete_stats->set_document_removal_latency_ms(
1464       component_timer->GetElapsedMilliseconds());
1465   int term_count = 0;
1466   for (const auto& section_and_terms : query_results.query_terms) {
1467     term_count += section_and_terms.second.size();
1468   }
1469   delete_stats->set_num_terms(term_count);
1470 
1471   if (num_deleted > 0) {
1472     result_proto.mutable_status()->set_code(StatusProto::OK);
1473   } else {
1474     result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
1475     result_proto.mutable_status()->set_message(
1476         "No documents matched the query to delete by!");
1477   }
1478   delete_stats->set_num_documents_deleted(num_deleted);
1479   return result_proto;
1480 }
1481 
PersistToDisk(PersistType::Code persist_type)1482 PersistToDiskResultProto IcingSearchEngine::PersistToDisk(
1483     PersistType::Code persist_type) {
1484   ICING_VLOG(1) << "Persisting data to disk";
1485 
1486   PersistToDiskResultProto result_proto;
1487   StatusProto* result_status = result_proto.mutable_status();
1488 
1489   absl_ports::unique_lock l(&mutex_);
1490   if (!initialized_) {
1491     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1492     result_status->set_message("IcingSearchEngine has not been initialized!");
1493     return result_proto;
1494   }
1495 
1496   auto status = InternalPersistToDisk(persist_type);
1497   TransformStatus(status, result_status);
1498   return result_proto;
1499 }
1500 
1501 // Optimizes Icing's storage
1502 //
1503 // Steps:
1504 // 1. Flush data to disk.
1505 // 2. Copy data needed to a tmp directory.
1506 // 3. Swap current directory and tmp directory.
Optimize()1507 OptimizeResultProto IcingSearchEngine::Optimize() {
1508   ICING_VLOG(1) << "Optimizing icing storage";
1509 
1510   OptimizeResultProto result_proto;
1511   StatusProto* result_status = result_proto.mutable_status();
1512 
1513   absl_ports::unique_lock l(&mutex_);
1514   if (!initialized_) {
1515     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1516     result_status->set_message("IcingSearchEngine has not been initialized!");
1517     return result_proto;
1518   }
1519 
1520   OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
1521   ScopedTimer optimize_timer(
1522       clock_->GetNewTimer(),
1523       [optimize_stats](int64_t t) { optimize_stats->set_latency_ms(t); });
1524 
1525   // Flushes data to disk before doing optimization
1526   auto status = InternalPersistToDisk(PersistType::FULL);
1527   if (!status.ok()) {
1528     TransformStatus(status, result_status);
1529     return result_proto;
1530   }
1531 
1532   int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
1533   optimize_stats->set_storage_size_before(
1534       Filesystem::SanitizeFileSize(before_size));
1535 
1536   // TODO(b/143646633): figure out if we need to optimize index and doc store
1537   // at the same time.
1538   std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
1539   libtextclassifier3::StatusOr<std::vector<DocumentId>>
1540       document_id_old_to_new_or = OptimizeDocumentStore(optimize_stats);
1541   optimize_stats->set_document_store_optimize_latency_ms(
1542       optimize_doc_store_timer->GetElapsedMilliseconds());
1543 
1544   if (!document_id_old_to_new_or.ok() &&
1545       !absl_ports::IsDataLoss(document_id_old_to_new_or.status())) {
1546     // The status now is either ABORTED_ERROR or INTERNAL_ERROR.
1547     // If ABORTED_ERROR, Icing should still be working.
1548     // If INTERNAL_ERROR, we're having IO errors or other errors that we can't
1549     // recover from.
1550     TransformStatus(document_id_old_to_new_or.status(), result_status);
1551     return result_proto;
1552   }
1553 
1554   // The status is either OK or DATA_LOSS. The optimized document store is
1555   // guaranteed to work, so we update index according to the new document store.
1556   std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
1557   bool should_rebuild_index =
1558       !document_id_old_to_new_or.ok() ||
1559       ShouldRebuildIndex(*optimize_stats,
1560                          options_.optimize_rebuild_index_threshold());
1561   if (!should_rebuild_index) {
1562     optimize_stats->set_index_restoration_mode(
1563         OptimizeStatsProto::INDEX_TRANSLATION);
1564     libtextclassifier3::Status index_optimize_status =
1565         index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
1566                          document_store_->last_added_document_id());
1567     if (!index_optimize_status.ok()) {
1568       ICING_LOG(WARNING) << "Failed to optimize index. Error: "
1569                          << index_optimize_status.error_message();
1570       should_rebuild_index = true;
1571     }
1572 
1573     libtextclassifier3::Status integer_index_optimize_status =
1574         integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
1575                                  document_store_->last_added_document_id());
1576     if (!integer_index_optimize_status.ok()) {
1577       ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
1578                          << integer_index_optimize_status.error_message();
1579       should_rebuild_index = true;
1580     }
1581 
1582     libtextclassifier3::Status qualified_id_join_index_optimize_status =
1583         qualified_id_join_index_->Optimize(
1584             document_id_old_to_new_or.ValueOrDie(),
1585             document_store_->last_added_document_id());
1586     if (!qualified_id_join_index_optimize_status.ok()) {
1587       ICING_LOG(WARNING)
1588           << "Failed to optimize qualified id join index. Error: "
1589           << qualified_id_join_index_optimize_status.error_message();
1590       should_rebuild_index = true;
1591     }
1592   }
1593   // If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
1594   // valid document store, but it might be the old one or the new one. So throw
1595   // out the index data and rebuild from scratch.
1596   // Likewise, if Index::Optimize failed, then attempt to recover the index by
1597   // rebuilding from scratch.
1598   // If ShouldRebuildIndex() returns true, we will also rebuild the index for
1599   // better performance.
1600   if (should_rebuild_index) {
1601     optimize_stats->set_index_restoration_mode(
1602         OptimizeStatsProto::FULL_INDEX_REBUILD);
1603     ICING_LOG(WARNING) << "Clearing the entire index!";
1604 
1605     libtextclassifier3::Status index_clear_status = ClearAllIndices();
1606     if (!index_clear_status.ok()) {
1607       status = absl_ports::Annotate(
1608           absl_ports::InternalError("Failed to clear index."),
1609           index_clear_status.error_message());
1610       TransformStatus(status, result_status);
1611       optimize_stats->set_index_restoration_latency_ms(
1612           optimize_index_timer->GetElapsedMilliseconds());
1613       return result_proto;
1614     }
1615 
1616     IndexRestorationResult index_restoration_status = RestoreIndexIfNeeded();
1617     // DATA_LOSS means that we have successfully re-added content to the index.
1618     // Some indexed content was lost, but otherwise the index is in a valid
1619     // state and can be queried.
1620     if (!index_restoration_status.status.ok() &&
1621         !absl_ports::IsDataLoss(index_restoration_status.status)) {
1622       status = absl_ports::Annotate(
1623           absl_ports::InternalError(
1624               "Failed to reindex documents after optimization."),
1625           index_restoration_status.status.error_message());
1626 
1627       TransformStatus(status, result_status);
1628       optimize_stats->set_index_restoration_latency_ms(
1629           optimize_index_timer->GetElapsedMilliseconds());
1630       return result_proto;
1631     }
1632   }
1633   optimize_stats->set_index_restoration_latency_ms(
1634       optimize_index_timer->GetElapsedMilliseconds());
1635 
1636   // Read the optimize status to get the time that we last ran.
1637   std::string optimize_status_filename =
1638       absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
1639   FileBackedProto<OptimizeStatusProto> optimize_status_file(
1640       *filesystem_, optimize_status_filename);
1641   auto optimize_status_or = optimize_status_file.Read();
1642   int64_t current_time = clock_->GetSystemTimeMilliseconds();
1643   if (optimize_status_or.ok()) {
1644     // If we have trouble reading the status or this is the first time that
1645     // we've ever run, don't set this field.
1646     optimize_stats->set_time_since_last_optimize_ms(
1647         current_time - optimize_status_or.ValueOrDie()
1648                            ->last_successful_optimize_run_time_ms());
1649   }
1650 
1651   // Update the status for this run and write it.
1652   auto optimize_status = std::make_unique<OptimizeStatusProto>();
1653   optimize_status->set_last_successful_optimize_run_time_ms(current_time);
1654   optimize_status_file.Write(std::move(optimize_status));
1655 
1656   // Flushes data to disk after doing optimization
1657   status = InternalPersistToDisk(PersistType::FULL);
1658   if (!status.ok()) {
1659     TransformStatus(status, result_status);
1660     return result_proto;
1661   }
1662 
1663   int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
1664   optimize_stats->set_storage_size_after(
1665       Filesystem::SanitizeFileSize(after_size));
1666 
1667   TransformStatus(document_id_old_to_new_or.status(), result_status);
1668   return result_proto;
1669 }
1670 
GetOptimizeInfo()1671 GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
1672   ICING_VLOG(1) << "Getting optimize info from IcingSearchEngine";
1673 
1674   GetOptimizeInfoResultProto result_proto;
1675   StatusProto* result_status = result_proto.mutable_status();
1676 
1677   absl_ports::shared_lock l(&mutex_);
1678   if (!initialized_) {
1679     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1680     result_status->set_message("IcingSearchEngine has not been initialized!");
1681     return result_proto;
1682   }
1683 
1684   // Read the optimize status to get the time that we last ran.
1685   std::string optimize_status_filename =
1686       absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
1687   FileBackedProto<OptimizeStatusProto> optimize_status_file(
1688       *filesystem_, optimize_status_filename);
1689   auto optimize_status_or = optimize_status_file.Read();
1690   int64_t current_time = clock_->GetSystemTimeMilliseconds();
1691 
1692   if (optimize_status_or.ok()) {
1693     // If we have trouble reading the status or this is the first time that
1694     // we've ever run, don't set this field.
1695     result_proto.set_time_since_last_optimize_ms(
1696         current_time - optimize_status_or.ValueOrDie()
1697                            ->last_successful_optimize_run_time_ms());
1698   }
1699 
1700   // Get stats from DocumentStore
1701   auto doc_store_optimize_info_or = document_store_->GetOptimizeInfo();
1702   if (!doc_store_optimize_info_or.ok()) {
1703     TransformStatus(doc_store_optimize_info_or.status(), result_status);
1704     return result_proto;
1705   }
1706   DocumentStore::OptimizeInfo doc_store_optimize_info =
1707       doc_store_optimize_info_or.ValueOrDie();
1708   result_proto.set_optimizable_docs(doc_store_optimize_info.optimizable_docs);
1709 
1710   if (doc_store_optimize_info.optimizable_docs == 0) {
1711     // Can return early since there's nothing to calculate on the index side
1712     result_proto.set_estimated_optimizable_bytes(0);
1713     result_status->set_code(StatusProto::OK);
1714     return result_proto;
1715   }
1716 
1717   // Get stats from Index.
1718   auto index_elements_size_or = index_->GetElementsSize();
1719   if (!index_elements_size_or.ok()) {
1720     TransformStatus(index_elements_size_or.status(), result_status);
1721     return result_proto;
1722   }
1723   int64_t index_elements_size = index_elements_size_or.ValueOrDie();
1724 
1725   // TODO(b/259744228): add stats for integer index
1726 
1727   // Sum up the optimizable sizes from DocumentStore and Index
1728   result_proto.set_estimated_optimizable_bytes(
1729       index_elements_size * doc_store_optimize_info.optimizable_docs /
1730           doc_store_optimize_info.total_docs +
1731       doc_store_optimize_info.estimated_optimizable_bytes);
1732 
1733   result_status->set_code(StatusProto::OK);
1734   return result_proto;
1735 }
1736 
GetStorageInfo()1737 StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
1738   StorageInfoResultProto result;
1739   absl_ports::shared_lock l(&mutex_);
1740   if (!initialized_) {
1741     result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
1742     result.mutable_status()->set_message(
1743         "IcingSearchEngine has not been initialized!");
1744     return result;
1745   }
1746 
1747   int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
1748   result.mutable_storage_info()->set_total_storage_size(
1749       Filesystem::SanitizeFileSize(index_size));
1750   *result.mutable_storage_info()->mutable_document_storage_info() =
1751       document_store_->GetStorageInfo();
1752   *result.mutable_storage_info()->mutable_schema_store_storage_info() =
1753       schema_store_->GetStorageInfo();
1754   *result.mutable_storage_info()->mutable_index_storage_info() =
1755       index_->GetStorageInfo();
1756   // TODO(b/259744228): add stats for integer index
1757   result.mutable_status()->set_code(StatusProto::OK);
1758   return result;
1759 }
1760 
GetDebugInfo(DebugInfoVerbosity::Code verbosity)1761 DebugInfoResultProto IcingSearchEngine::GetDebugInfo(
1762     DebugInfoVerbosity::Code verbosity) {
1763   DebugInfoResultProto debug_info;
1764   StatusProto* result_status = debug_info.mutable_status();
1765   absl_ports::shared_lock l(&mutex_);
1766   if (!initialized_) {
1767     debug_info.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
1768     debug_info.mutable_status()->set_message(
1769         "IcingSearchEngine has not been initialized!");
1770     return debug_info;
1771   }
1772 
1773   // Index
1774   *debug_info.mutable_debug_info()->mutable_index_info() =
1775       index_->GetDebugInfo(verbosity);
1776 
1777   // TODO(b/259744228): add debug info for integer index
1778 
1779   // Document Store
1780   libtextclassifier3::StatusOr<DocumentDebugInfoProto> document_debug_info =
1781       document_store_->GetDebugInfo(verbosity);
1782   if (!document_debug_info.ok()) {
1783     TransformStatus(document_debug_info.status(), result_status);
1784     return debug_info;
1785   }
1786   *debug_info.mutable_debug_info()->mutable_document_info() =
1787       std::move(document_debug_info).ValueOrDie();
1788 
1789   // Schema Store
1790   libtextclassifier3::StatusOr<SchemaDebugInfoProto> schema_debug_info =
1791       schema_store_->GetDebugInfo();
1792   if (!schema_debug_info.ok()) {
1793     TransformStatus(schema_debug_info.status(), result_status);
1794     return debug_info;
1795   }
1796   *debug_info.mutable_debug_info()->mutable_schema_info() =
1797       std::move(schema_debug_info).ValueOrDie();
1798 
1799   result_status->set_code(StatusProto::OK);
1800   return debug_info;
1801 }
1802 
InternalPersistToDisk(PersistType::Code persist_type)1803 libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
1804     PersistType::Code persist_type) {
1805   if (persist_type == PersistType::LITE) {
1806     return document_store_->PersistToDisk(persist_type);
1807   }
1808   ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
1809   ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
1810   ICING_RETURN_IF_ERROR(index_->PersistToDisk());
1811   ICING_RETURN_IF_ERROR(integer_index_->PersistToDisk());
1812   ICING_RETURN_IF_ERROR(qualified_id_join_index_->PersistToDisk());
1813 
1814   return libtextclassifier3::Status::OK;
1815 }
1816 
Search(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1817 SearchResultProto IcingSearchEngine::Search(
1818     const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1819     const ResultSpecProto& result_spec) {
1820   if (search_spec.use_read_only_search()) {
1821     return SearchLockedShared(search_spec, scoring_spec, result_spec);
1822   } else {
1823     return SearchLockedExclusive(search_spec, scoring_spec, result_spec);
1824   }
1825 }
1826 
SearchLockedShared(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1827 SearchResultProto IcingSearchEngine::SearchLockedShared(
1828     const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1829     const ResultSpecProto& result_spec) {
1830   std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
1831 
1832   // Only acquire an overall read-lock for this implementation. Finer-grained
1833   // locks are implemented around code paths that write changes to Icing's data
1834   // members.
1835   absl_ports::shared_lock l(&mutex_);
1836   int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
1837 
1838   SearchResultProto result_proto =
1839       InternalSearch(search_spec, scoring_spec, result_spec);
1840 
1841   result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
1842       lock_acquisition_latency);
1843   result_proto.mutable_query_stats()->set_latency_ms(
1844       overall_timer->GetElapsedMilliseconds());
1845   return result_proto;
1846 }
1847 
SearchLockedExclusive(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1848 SearchResultProto IcingSearchEngine::SearchLockedExclusive(
1849     const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1850     const ResultSpecProto& result_spec) {
1851   std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
1852 
1853   // Acquire the overall write-lock for this locked implementation.
1854   absl_ports::unique_lock l(&mutex_);
1855   int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
1856 
1857   SearchResultProto result_proto =
1858       InternalSearch(search_spec, scoring_spec, result_spec);
1859 
1860   result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
1861       lock_acquisition_latency);
1862   result_proto.mutable_query_stats()->set_latency_ms(
1863       overall_timer->GetElapsedMilliseconds());
1864   return result_proto;
1865 }
1866 
InternalSearch(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1867 SearchResultProto IcingSearchEngine::InternalSearch(
1868     const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1869     const ResultSpecProto& result_spec) {
1870   SearchResultProto result_proto;
1871   StatusProto* result_status = result_proto.mutable_status();
1872 
1873   QueryStatsProto* query_stats = result_proto.mutable_query_stats();
1874   query_stats->set_query_length(search_spec.query().length());
1875   if (!initialized_) {
1876     result_status->set_code(StatusProto::FAILED_PRECONDITION);
1877     result_status->set_message("IcingSearchEngine has not been initialized!");
1878     return result_proto;
1879   }
1880 
1881   libtextclassifier3::Status status =
1882       ValidateResultSpec(document_store_.get(), result_spec);
1883   if (!status.ok()) {
1884     TransformStatus(status, result_status);
1885     return result_proto;
1886   }
1887   status = ValidateSearchSpec(search_spec, performance_configuration_);
1888   if (!status.ok()) {
1889     TransformStatus(status, result_status);
1890     return result_proto;
1891   }
1892 
1893   query_stats->set_num_namespaces_filtered(
1894       search_spec.namespace_filters_size());
1895   query_stats->set_num_schema_types_filtered(
1896       search_spec.schema_type_filters_size());
1897   query_stats->set_ranking_strategy(scoring_spec.rank_by());
1898   query_stats->set_is_first_page(true);
1899   query_stats->set_requested_page_size(result_spec.num_per_page());
1900 
1901   const JoinSpecProto& join_spec = search_spec.join_spec();
1902   std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
1903   std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info;
1904   int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1905   if (!join_spec.parent_property_expression().empty() &&
1906       !join_spec.child_property_expression().empty()) {
1907     // Process child query
1908     QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
1909         join_spec.nested_spec().search_spec(),
1910         join_spec.nested_spec().scoring_spec(),
1911         join_spec.nested_spec().result_spec(),
1912         /*join_children_fetcher=*/nullptr, current_time_ms);
1913     // TOOD(b/256022027): set different kinds of latency for 2nd query.
1914     if (!nested_query_scoring_results.status.ok()) {
1915       TransformStatus(nested_query_scoring_results.status, result_status);
1916       return result_proto;
1917     }
1918 
1919     JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
1920                                  qualified_id_join_index_.get(),
1921                                  current_time_ms);
1922     // Building a JoinChildrenFetcher where child documents are grouped by
1923     // their joinable values.
1924     libtextclassifier3::StatusOr<JoinChildrenFetcher> join_children_fetcher_or =
1925         join_processor.GetChildrenFetcher(
1926             search_spec.join_spec(),
1927             std::move(nested_query_scoring_results.scored_document_hits));
1928     if (!join_children_fetcher_or.ok()) {
1929       TransformStatus(join_children_fetcher_or.status(), result_status);
1930       return result_proto;
1931     }
1932     join_children_fetcher = std::make_unique<JoinChildrenFetcher>(
1933         std::move(join_children_fetcher_or).ValueOrDie());
1934 
1935     // Assign child's ResultAdjustmentInfo.
1936     child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
1937         join_spec.nested_spec().search_spec(),
1938         join_spec.nested_spec().scoring_spec(),
1939         join_spec.nested_spec().result_spec(), schema_store_.get(),
1940         std::move(nested_query_scoring_results.query_terms));
1941   }
1942 
1943   // Process parent query
1944   QueryScoringResults query_scoring_results =
1945       ProcessQueryAndScore(search_spec, scoring_spec, result_spec,
1946                            join_children_fetcher.get(), current_time_ms);
1947   int term_count = 0;
1948   for (const auto& section_and_terms : query_scoring_results.query_terms) {
1949     term_count += section_and_terms.second.size();
1950   }
1951   query_stats->set_num_terms(term_count);
1952   query_stats->set_parse_query_latency_ms(
1953       query_scoring_results.parse_query_latency_ms);
1954   query_stats->set_scoring_latency_ms(query_scoring_results.scoring_latency_ms);
1955   if (!query_scoring_results.status.ok()) {
1956     TransformStatus(query_scoring_results.status, result_status);
1957     return result_proto;
1958   }
1959 
1960   query_stats->set_num_documents_scored(
1961       query_scoring_results.scored_document_hits.size());
1962   // Returns early for empty result
1963   if (query_scoring_results.scored_document_hits.empty()) {
1964     result_status->set_code(StatusProto::OK);
1965     return result_proto;
1966   }
1967 
1968   // Construct parent's result adjustment info.
1969   auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
1970       search_spec, scoring_spec, result_spec, schema_store_.get(),
1971       std::move(query_scoring_results.query_terms));
1972 
1973   std::unique_ptr<ScoredDocumentHitsRanker> ranker;
1974   if (join_children_fetcher != nullptr) {
1975     std::unique_ptr<Timer> join_timer = clock_->GetNewTimer();
1976     // Join 2 scored document hits
1977     JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
1978                                  qualified_id_join_index_.get(),
1979                                  current_time_ms);
1980     libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
1981         joined_result_document_hits_or = join_processor.Join(
1982             join_spec, std::move(query_scoring_results.scored_document_hits),
1983             *join_children_fetcher);
1984     if (!joined_result_document_hits_or.ok()) {
1985       TransformStatus(joined_result_document_hits_or.status(), result_status);
1986       return result_proto;
1987     }
1988     std::vector<JoinedScoredDocumentHit> joined_result_document_hits =
1989         std::move(joined_result_document_hits_or).ValueOrDie();
1990 
1991     query_stats->set_join_latency_ms(join_timer->GetElapsedMilliseconds());
1992 
1993     std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
1994     // Ranks results
1995     ranker = std::make_unique<
1996         PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
1997         std::move(joined_result_document_hits),
1998         /*is_descending=*/scoring_spec.order_by() ==
1999             ScoringSpecProto::Order::DESC);
2000     query_stats->set_ranking_latency_ms(
2001         component_timer->GetElapsedMilliseconds());
2002   } else {
2003     // Non-join query
2004     std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
2005     // Ranks results
2006     ranker = std::make_unique<
2007         PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
2008         std::move(query_scoring_results.scored_document_hits),
2009         /*is_descending=*/scoring_spec.order_by() ==
2010             ScoringSpecProto::Order::DESC);
2011     query_stats->set_ranking_latency_ms(
2012         component_timer->GetElapsedMilliseconds());
2013   }
2014 
2015   std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
2016   // CacheAndRetrieveFirstPage and retrieves the document protos and snippets if
2017   // requested
2018   auto result_retriever_or =
2019       ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
2020                                 language_segmenter_.get(), normalizer_.get());
2021   if (!result_retriever_or.ok()) {
2022     TransformStatus(result_retriever_or.status(), result_status);
2023     query_stats->set_document_retrieval_latency_ms(
2024         component_timer->GetElapsedMilliseconds());
2025     return result_proto;
2026   }
2027   std::unique_ptr<ResultRetrieverV2> result_retriever =
2028       std::move(result_retriever_or).ValueOrDie();
2029 
2030   libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
2031       page_result_info_or = result_state_manager_->CacheAndRetrieveFirstPage(
2032           std::move(ranker), std::move(parent_result_adjustment_info),
2033           std::move(child_result_adjustment_info), result_spec,
2034           *document_store_, *result_retriever, current_time_ms);
2035   if (!page_result_info_or.ok()) {
2036     TransformStatus(page_result_info_or.status(), result_status);
2037     query_stats->set_document_retrieval_latency_ms(
2038         component_timer->GetElapsedMilliseconds());
2039     return result_proto;
2040   }
2041   std::pair<uint64_t, PageResult> page_result_info =
2042       std::move(page_result_info_or).ValueOrDie();
2043 
2044   // Assembles the final search result proto
2045   result_proto.mutable_results()->Reserve(
2046       page_result_info.second.results.size());
2047 
2048   int32_t child_count = 0;
2049   for (SearchResultProto::ResultProto& result :
2050        page_result_info.second.results) {
2051     child_count += result.joined_results_size();
2052     result_proto.mutable_results()->Add(std::move(result));
2053   }
2054 
2055   result_status->set_code(StatusProto::OK);
2056   if (page_result_info.first != kInvalidNextPageToken) {
2057     result_proto.set_next_page_token(page_result_info.first);
2058   }
2059 
2060   query_stats->set_document_retrieval_latency_ms(
2061       component_timer->GetElapsedMilliseconds());
2062   query_stats->set_num_results_returned_current_page(
2063       result_proto.results_size());
2064 
2065   query_stats->set_num_joined_results_returned_current_page(child_count);
2066 
2067   query_stats->set_num_results_with_snippets(
2068       page_result_info.second.num_results_with_snippets);
2069   return result_proto;
2070 }
2071 
ProcessQueryAndScore(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec,const JoinChildrenFetcher * join_children_fetcher,int64_t current_time_ms)2072 IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
2073     const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
2074     const ResultSpecProto& result_spec,
2075     const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) {
2076   std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
2077 
2078   // Gets unordered results from query processor
2079   auto query_processor_or = QueryProcessor::Create(
2080       index_.get(), integer_index_.get(), language_segmenter_.get(),
2081       normalizer_.get(), document_store_.get(), schema_store_.get());
2082   if (!query_processor_or.ok()) {
2083     return QueryScoringResults(
2084         std::move(query_processor_or).status(), /*query_terms_in=*/{},
2085         /*scored_document_hits_in=*/{},
2086         /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
2087         /*scoring_latency_ms_in=*/0);
2088   }
2089   std::unique_ptr<QueryProcessor> query_processor =
2090       std::move(query_processor_or).ValueOrDie();
2091 
2092   auto ranking_strategy_or = GetRankingStrategyFromScoringSpec(scoring_spec);
2093   libtextclassifier3::StatusOr<QueryResults> query_results_or;
2094   if (ranking_strategy_or.ok()) {
2095     query_results_or = query_processor->ParseSearch(
2096         search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms);
2097   } else {
2098     query_results_or = ranking_strategy_or.status();
2099   }
2100   if (!query_results_or.ok()) {
2101     return QueryScoringResults(
2102         std::move(query_results_or).status(), /*query_terms_in=*/{},
2103         /*scored_document_hits_in=*/{},
2104         /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
2105         /*scoring_latency_ms_in=*/0);
2106   }
2107   QueryResults query_results = std::move(query_results_or).ValueOrDie();
2108   int64_t parse_query_latency_ms = component_timer->GetElapsedMilliseconds();
2109 
2110   component_timer = clock_->GetNewTimer();
2111   // Scores but does not rank the results.
2112   libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
2113       scoring_processor_or = ScoringProcessor::Create(
2114           scoring_spec, document_store_.get(), schema_store_.get(),
2115           current_time_ms, join_children_fetcher);
2116   if (!scoring_processor_or.ok()) {
2117     return QueryScoringResults(std::move(scoring_processor_or).status(),
2118                                std::move(query_results.query_terms),
2119                                /*scored_document_hits_in=*/{},
2120                                parse_query_latency_ms,
2121                                /*scoring_latency_ms_in=*/0);
2122   }
2123   std::unique_ptr<ScoringProcessor> scoring_processor =
2124       std::move(scoring_processor_or).ValueOrDie();
2125   std::vector<ScoredDocumentHit> scored_document_hits =
2126       scoring_processor->Score(std::move(query_results.root_iterator),
2127                                performance_configuration_.num_to_score,
2128                                &query_results.query_term_iterators);
2129   int64_t scoring_latency_ms = component_timer->GetElapsedMilliseconds();
2130 
2131   return QueryScoringResults(libtextclassifier3::Status::OK,
2132                              std::move(query_results.query_terms),
2133                              std::move(scored_document_hits),
2134                              parse_query_latency_ms, scoring_latency_ms);
2135 }
2136 
GetNextPage(uint64_t next_page_token)2137 SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
2138   SearchResultProto result_proto;
2139   StatusProto* result_status = result_proto.mutable_status();
2140 
2141   QueryStatsProto* query_stats = result_proto.mutable_query_stats();
2142   query_stats->set_is_first_page(false);
2143   std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
2144   // ResultStateManager has its own writer lock, so here we only need a reader
2145   // lock for other components.
2146   absl_ports::shared_lock l(&mutex_);
2147   query_stats->set_lock_acquisition_latency_ms(
2148       overall_timer->GetElapsedMilliseconds());
2149   if (!initialized_) {
2150     result_status->set_code(StatusProto::FAILED_PRECONDITION);
2151     result_status->set_message("IcingSearchEngine has not been initialized!");
2152     return result_proto;
2153   }
2154 
2155   auto result_retriever_or =
2156       ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
2157                                 language_segmenter_.get(), normalizer_.get());
2158   if (!result_retriever_or.ok()) {
2159     TransformStatus(result_retriever_or.status(), result_status);
2160     return result_proto;
2161   }
2162   std::unique_ptr<ResultRetrieverV2> result_retriever =
2163       std::move(result_retriever_or).ValueOrDie();
2164 
2165   int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
2166   libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
2167       page_result_info_or = result_state_manager_->GetNextPage(
2168           next_page_token, *result_retriever, current_time_ms);
2169   if (!page_result_info_or.ok()) {
2170     if (absl_ports::IsNotFound(page_result_info_or.status())) {
2171       // NOT_FOUND means an empty result.
2172       result_status->set_code(StatusProto::OK);
2173     } else {
2174       // Real error, pass up.
2175       TransformStatus(page_result_info_or.status(), result_status);
2176     }
2177     return result_proto;
2178   }
2179 
2180   std::pair<uint64_t, PageResult> page_result_info =
2181       std::move(page_result_info_or).ValueOrDie();
2182   query_stats->set_requested_page_size(
2183       page_result_info.second.requested_page_size);
2184 
2185   // Assembles the final search result proto
2186   result_proto.mutable_results()->Reserve(
2187       page_result_info.second.results.size());
2188 
2189   int32_t child_count = 0;
2190   for (SearchResultProto::ResultProto& result :
2191        page_result_info.second.results) {
2192     child_count += result.joined_results_size();
2193     result_proto.mutable_results()->Add(std::move(result));
2194   }
2195 
2196   result_status->set_code(StatusProto::OK);
2197   if (page_result_info.first != kInvalidNextPageToken) {
2198     result_proto.set_next_page_token(page_result_info.first);
2199   }
2200 
2201   // The only thing that we're doing is document retrieval. So document
2202   // retrieval latency and overall latency are the same and can use the same
2203   // timer.
2204   query_stats->set_document_retrieval_latency_ms(
2205       overall_timer->GetElapsedMilliseconds());
2206   query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
2207   query_stats->set_num_results_returned_current_page(
2208       result_proto.results_size());
2209   query_stats->set_num_results_with_snippets(
2210       page_result_info.second.num_results_with_snippets);
2211   query_stats->set_num_joined_results_returned_current_page(child_count);
2212 
2213   return result_proto;
2214 }
2215 
InvalidateNextPageToken(uint64_t next_page_token)2216 void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) {
2217   absl_ports::shared_lock l(&mutex_);
2218   if (!initialized_) {
2219     ICING_LOG(ERROR) << "IcingSearchEngine has not been initialized!";
2220     return;
2221   }
2222   result_state_manager_->InvalidateResultState(next_page_token);
2223 }
2224 
2225 libtextclassifier3::StatusOr<std::vector<DocumentId>>
OptimizeDocumentStore(OptimizeStatsProto * optimize_stats)2226 IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
2227   // Gets the current directory path and an empty tmp directory path for
2228   // document store optimization.
2229   const std::string current_document_dir =
2230       MakeDocumentDirectoryPath(options_.base_dir());
2231   const std::string temporary_document_dir =
2232       MakeDocumentTemporaryDirectoryPath(options_.base_dir());
2233   if (!filesystem_->DeleteDirectoryRecursively(
2234           temporary_document_dir.c_str()) ||
2235       !filesystem_->CreateDirectoryRecursively(
2236           temporary_document_dir.c_str())) {
2237     return absl_ports::AbortedError(absl_ports::StrCat(
2238         "Failed to create a tmp directory: ", temporary_document_dir));
2239   }
2240 
2241   // Copies valid document data to tmp directory
2242   libtextclassifier3::StatusOr<std::vector<DocumentId>>
2243       document_id_old_to_new_or = document_store_->OptimizeInto(
2244           temporary_document_dir, language_segmenter_.get(),
2245           options_.document_store_namespace_id_fingerprint(), optimize_stats);
2246 
2247   // Handles error if any
2248   if (!document_id_old_to_new_or.ok()) {
2249     filesystem_->DeleteDirectoryRecursively(temporary_document_dir.c_str());
2250     return absl_ports::Annotate(
2251         absl_ports::AbortedError("Failed to optimize document store"),
2252         document_id_old_to_new_or.status().error_message());
2253   }
2254 
2255   // result_state_manager_ depends on document_store_. So we need to reset it at
2256   // the same time that we reset the document_store_.
2257   result_state_manager_.reset();
2258   document_store_.reset();
2259 
2260   // When swapping files, always put the current working directory at the
2261   // second place because it is renamed at the latter position so we're less
2262   // vulnerable to errors.
2263   if (!filesystem_->SwapFiles(temporary_document_dir.c_str(),
2264                               current_document_dir.c_str())) {
2265     ICING_LOG(ERROR) << "Failed to swap files";
2266 
2267     // Ensures that current directory is still present.
2268     if (!filesystem_->CreateDirectoryRecursively(
2269             current_document_dir.c_str())) {
2270       // Can't even create the old directory. Mark as uninitialized and return
2271       // INTERNAL.
2272       initialized_ = false;
2273       return absl_ports::InternalError(
2274           "Failed to create file directory for document store");
2275     }
2276 
2277     // Tries to rebuild document store if swapping fails, to avoid leaving the
2278     // system in the broken state for future operations.
2279     auto create_result_or = DocumentStore::Create(
2280         filesystem_.get(), current_document_dir, clock_.get(),
2281         schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
2282         options_.document_store_namespace_id_fingerprint(),
2283         options_.compression_level(), /*initialize_stats=*/nullptr);
2284     // TODO(b/144458732): Implement a more robust version of
2285     // TC_ASSIGN_OR_RETURN that can support error logging.
2286     if (!create_result_or.ok()) {
2287       // Unable to create DocumentStore from the old file. Mark as uninitialized
2288       // and return INTERNAL.
2289       initialized_ = false;
2290       ICING_LOG(ERROR) << "Failed to create document store instance";
2291       return absl_ports::Annotate(
2292           absl_ports::InternalError("Failed to create document store instance"),
2293           create_result_or.status().error_message());
2294     }
2295     document_store_ = std::move(create_result_or.ValueOrDie().document_store);
2296     result_state_manager_ = std::make_unique<ResultStateManager>(
2297         performance_configuration_.max_num_total_hits, *document_store_);
2298 
2299     // Potential data loss
2300     // TODO(b/147373249): Find a way to detect true data loss error
2301     return absl_ports::DataLossError(
2302         "Failed to optimize document store, there might be data loss");
2303   }
2304 
2305   // Recreates the doc store instance
2306   auto create_result_or = DocumentStore::Create(
2307       filesystem_.get(), current_document_dir, clock_.get(),
2308       schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
2309       options_.document_store_namespace_id_fingerprint(),
2310       options_.compression_level(), /*initialize_stats=*/nullptr);
2311   if (!create_result_or.ok()) {
2312     // Unable to create DocumentStore from the new file. Mark as uninitialized
2313     // and return INTERNAL.
2314     initialized_ = false;
2315     return absl_ports::InternalError(
2316         "Document store has been optimized, but a valid document store "
2317         "instance can't be created");
2318   }
2319   document_store_ = std::move(create_result_or.ValueOrDie().document_store);
2320   result_state_manager_ = std::make_unique<ResultStateManager>(
2321       performance_configuration_.max_num_total_hits, *document_store_);
2322 
2323   // Deletes tmp directory
2324   if (!filesystem_->DeleteDirectoryRecursively(
2325           temporary_document_dir.c_str())) {
2326     ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
2327                         "delete temporary file directory";
2328   }
2329   return document_id_old_to_new_or;
2330 }
2331 
2332 IcingSearchEngine::IndexRestorationResult
RestoreIndexIfNeeded()2333 IcingSearchEngine::RestoreIndexIfNeeded() {
2334   DocumentId last_stored_document_id =
2335       document_store_->last_added_document_id();
2336   if (last_stored_document_id == index_->last_added_document_id() &&
2337       last_stored_document_id == integer_index_->last_added_document_id() &&
2338       last_stored_document_id ==
2339           qualified_id_join_index_->last_added_document_id()) {
2340     // No need to recover.
2341     return {libtextclassifier3::Status::OK, false, false, false};
2342   }
2343 
2344   if (last_stored_document_id == kInvalidDocumentId) {
2345     // Document store is empty but index is not. Clear the index.
2346     return {ClearAllIndices(), false, false, false};
2347   }
2348 
2349   // Truncate indices first.
2350   auto truncate_result_or = TruncateIndicesTo(last_stored_document_id);
2351   if (!truncate_result_or.ok()) {
2352     return {std::move(truncate_result_or).status(), false, false, false};
2353   }
2354   TruncateIndexResult truncate_result =
2355       std::move(truncate_result_or).ValueOrDie();
2356 
2357   if (truncate_result.first_document_to_reindex > last_stored_document_id) {
2358     // Nothing to restore. Just return.
2359     return {libtextclassifier3::Status::OK, false, false, false};
2360   }
2361 
2362   auto data_indexing_handlers_or = CreateDataIndexingHandlers();
2363   if (!data_indexing_handlers_or.ok()) {
2364     return {data_indexing_handlers_or.status(),
2365             truncate_result.index_needed_restoration,
2366             truncate_result.integer_index_needed_restoration,
2367             truncate_result.qualified_id_join_index_needed_restoration};
2368   }
2369   // By using recovery_mode for IndexProcessor, we're able to replay documents
2370   // from smaller document id and it will skip documents that are already been
2371   // indexed.
2372   IndexProcessor index_processor(
2373       std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get(),
2374       /*recovery_mode=*/true);
2375 
2376   ICING_VLOG(1) << "Restoring index by replaying documents from document id "
2377                 << truncate_result.first_document_to_reindex
2378                 << " to document id " << last_stored_document_id;
2379   libtextclassifier3::Status overall_status;
2380   for (DocumentId document_id = truncate_result.first_document_to_reindex;
2381        document_id <= last_stored_document_id; ++document_id) {
2382     libtextclassifier3::StatusOr<DocumentProto> document_or =
2383         document_store_->Get(document_id);
2384 
2385     if (!document_or.ok()) {
2386       if (absl_ports::IsInvalidArgument(document_or.status()) ||
2387           absl_ports::IsNotFound(document_or.status())) {
2388         // Skips invalid and non-existing documents.
2389         continue;
2390       } else {
2391         // Returns other errors
2392         return {document_or.status(), truncate_result.index_needed_restoration,
2393                 truncate_result.integer_index_needed_restoration,
2394                 truncate_result.qualified_id_join_index_needed_restoration};
2395       }
2396     }
2397     DocumentProto document(std::move(document_or).ValueOrDie());
2398 
2399     libtextclassifier3::StatusOr<TokenizedDocument> tokenized_document_or =
2400         TokenizedDocument::Create(schema_store_.get(),
2401                                   language_segmenter_.get(),
2402                                   std::move(document));
2403     if (!tokenized_document_or.ok()) {
2404       return {tokenized_document_or.status(),
2405               truncate_result.index_needed_restoration,
2406               truncate_result.integer_index_needed_restoration,
2407               truncate_result.qualified_id_join_index_needed_restoration};
2408     }
2409     TokenizedDocument tokenized_document(
2410         std::move(tokenized_document_or).ValueOrDie());
2411 
2412     libtextclassifier3::Status status =
2413         index_processor.IndexDocument(tokenized_document, document_id);
2414     if (!status.ok()) {
2415       if (!absl_ports::IsDataLoss(status)) {
2416         // Real error. Stop recovering and pass it up.
2417         return {status, truncate_result.index_needed_restoration,
2418                 truncate_result.integer_index_needed_restoration,
2419                 truncate_result.qualified_id_join_index_needed_restoration};
2420       }
2421       // FIXME: why can we skip data loss error here?
2422       // Just a data loss. Keep trying to add the remaining docs, but report the
2423       // data loss when we're done.
2424       overall_status = status;
2425     }
2426   }
2427 
2428   return {overall_status, truncate_result.index_needed_restoration,
2429           truncate_result.integer_index_needed_restoration,
2430           truncate_result.qualified_id_join_index_needed_restoration};
2431 }
2432 
LostPreviousSchema()2433 libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
2434   auto status_or = schema_store_->GetSchema();
2435   if (status_or.ok()) {
2436     // Found a schema.
2437     return false;
2438   }
2439 
2440   if (!absl_ports::IsNotFound(status_or.status())) {
2441     // Any other type of error
2442     return status_or.status();
2443   }
2444 
2445   // We know: We don't have a schema now.
2446   //
2447   // We know: If no documents have been added, then the last_added_document_id
2448   // will be invalid.
2449   //
2450   // So: If documents have been added before and we don't have a schema now,
2451   // then that means we must have had a schema at some point. Since we wouldn't
2452   // accept documents without a schema to validate them against.
2453   return document_store_->last_added_document_id() != kInvalidDocumentId;
2454 }
2455 
2456 libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
CreateDataIndexingHandlers()2457 IcingSearchEngine::CreateDataIndexingHandlers() {
2458   std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
2459 
2460   // Term index handler
2461   ICING_ASSIGN_OR_RETURN(std::unique_ptr<StringSectionIndexingHandler>
2462                              string_section_indexing_handler,
2463                          StringSectionIndexingHandler::Create(
2464                              clock_.get(), normalizer_.get(), index_.get()));
2465   handlers.push_back(std::move(string_section_indexing_handler));
2466 
2467   // Integer index handler
2468   ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler>
2469                              integer_section_indexing_handler,
2470                          IntegerSectionIndexingHandler::Create(
2471                              clock_.get(), integer_index_.get()));
2472   handlers.push_back(std::move(integer_section_indexing_handler));
2473 
2474   // Qualified id joinable property index handler
2475   ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
2476                              qualified_id_joinable_property_indexing_handler,
2477                          QualifiedIdJoinIndexingHandler::Create(
2478                              clock_.get(), qualified_id_join_index_.get()));
2479   handlers.push_back(
2480       std::move(qualified_id_joinable_property_indexing_handler));
2481 
2482   return handlers;
2483 }
2484 
2485 libtextclassifier3::StatusOr<IcingSearchEngine::TruncateIndexResult>
TruncateIndicesTo(DocumentId last_stored_document_id)2486 IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
2487   // Attempt to truncate term index.
2488   // TruncateTo ensures that the index does not hold any data that is not
2489   // present in the ground truth. If the document store lost some documents,
2490   // TruncateTo will ensure that the index does not contain any hits from those
2491   // lost documents. If the index does not contain any hits for documents with
2492   // document id greater than last_stored_document_id, then TruncateTo will have
2493   // no effect.
2494   ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
2495 
2496   // Get last indexed document id for term index after truncating.
2497   DocumentId term_index_last_added_document_id =
2498       index_->last_added_document_id();
2499   DocumentId first_document_to_reindex =
2500       (term_index_last_added_document_id != kInvalidDocumentId)
2501           ? term_index_last_added_document_id + 1
2502           : kMinDocumentId;
2503   bool index_needed_restoration =
2504       (last_stored_document_id != term_index_last_added_document_id);
2505 
2506   // Attempt to truncate integer index.
2507   bool integer_index_needed_restoration = false;
2508   DocumentId integer_index_last_added_document_id =
2509       integer_index_->last_added_document_id();
2510   if (integer_index_last_added_document_id == kInvalidDocumentId ||
2511       last_stored_document_id > integer_index_last_added_document_id) {
2512     // If last_stored_document_id is greater than
2513     // integer_index_last_added_document_id, then we only have to replay docs
2514     // starting from integer_index_last_added_document_id + 1. Also use std::min
2515     // since we might need to replay even smaller doc ids for term index.
2516     integer_index_needed_restoration = true;
2517     if (integer_index_last_added_document_id != kInvalidDocumentId) {
2518       first_document_to_reindex = std::min(
2519           first_document_to_reindex, integer_index_last_added_document_id + 1);
2520     } else {
2521       first_document_to_reindex = kMinDocumentId;
2522     }
2523   } else if (last_stored_document_id < integer_index_last_added_document_id) {
2524     // Clear the entire integer index if last_stored_document_id is smaller than
2525     // integer_index_last_added_document_id, because there is no way to remove
2526     // data with doc_id > last_stored_document_id from integer index and we have
2527     // to rebuild.
2528     ICING_RETURN_IF_ERROR(integer_index_->Clear());
2529 
2530     // Since the entire integer index is discarded, we start to rebuild it by
2531     // setting first_document_to_reindex to kMinDocumentId.
2532     integer_index_needed_restoration = true;
2533     first_document_to_reindex = kMinDocumentId;
2534   }
2535 
2536   // Attempt to truncate qualified id join index
2537   bool qualified_id_join_index_needed_restoration = false;
2538   DocumentId qualified_id_join_index_last_added_document_id =
2539       qualified_id_join_index_->last_added_document_id();
2540   if (qualified_id_join_index_last_added_document_id == kInvalidDocumentId ||
2541       last_stored_document_id >
2542           qualified_id_join_index_last_added_document_id) {
2543     // If last_stored_document_id is greater than
2544     // qualified_id_join_index_last_added_document_id, then we only have to
2545     // replay docs starting from (qualified_id_join_index_last_added_document_id
2546     // + 1). Also use std::min since we might need to replay even smaller doc
2547     // ids for other components.
2548     qualified_id_join_index_needed_restoration = true;
2549     if (qualified_id_join_index_last_added_document_id != kInvalidDocumentId) {
2550       first_document_to_reindex =
2551           std::min(first_document_to_reindex,
2552                    qualified_id_join_index_last_added_document_id + 1);
2553     } else {
2554       first_document_to_reindex = kMinDocumentId;
2555     }
2556   } else if (last_stored_document_id <
2557              qualified_id_join_index_last_added_document_id) {
2558     // Clear the entire qualified id join index if last_stored_document_id is
2559     // smaller than qualified_id_join_index_last_added_document_id, because
2560     // there is no way to remove data with doc_id > last_stored_document_id from
2561     // join index efficiently and we have to rebuild.
2562     ICING_RETURN_IF_ERROR(qualified_id_join_index_->Clear());
2563 
2564     // Since the entire qualified id join index is discarded, we start to
2565     // rebuild it by setting first_document_to_reindex to kMinDocumentId.
2566     qualified_id_join_index_needed_restoration = true;
2567     first_document_to_reindex = kMinDocumentId;
2568   }
2569 
2570   return TruncateIndexResult(first_document_to_reindex,
2571                              index_needed_restoration,
2572                              integer_index_needed_restoration,
2573                              qualified_id_join_index_needed_restoration);
2574 }
2575 
DiscardDerivedFiles()2576 libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
2577   if (schema_store_ != nullptr || document_store_ != nullptr ||
2578       index_ != nullptr || integer_index_ != nullptr ||
2579       qualified_id_join_index_ != nullptr) {
2580     return absl_ports::FailedPreconditionError(
2581         "Cannot discard derived files while having valid instances");
2582   }
2583 
2584   // Schema store
2585   ICING_RETURN_IF_ERROR(
2586       SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir()));
2587 
2588   // Document store
2589   ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
2590       filesystem_.get(), options_.base_dir()));
2591 
2592   // Term index
2593   if (!filesystem_->DeleteDirectoryRecursively(
2594           MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
2595     return absl_ports::InternalError("Failed to discard index");
2596   }
2597 
2598   // Integer index
2599   if (!filesystem_->DeleteDirectoryRecursively(
2600           MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
2601     return absl_ports::InternalError("Failed to discard integer index");
2602   }
2603 
2604   // Qualified id join index
2605   if (!filesystem_->DeleteDirectoryRecursively(
2606           MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
2607     return absl_ports::InternalError(
2608         "Failed to discard qualified id join index");
2609   }
2610 
2611   return libtextclassifier3::Status::OK;
2612 }
2613 
ClearSearchIndices()2614 libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() {
2615   ICING_RETURN_IF_ERROR(index_->Reset());
2616   ICING_RETURN_IF_ERROR(integer_index_->Clear());
2617   return libtextclassifier3::Status::OK;
2618 }
2619 
ClearJoinIndices()2620 libtextclassifier3::Status IcingSearchEngine::ClearJoinIndices() {
2621   return qualified_id_join_index_->Clear();
2622 }
2623 
ClearAllIndices()2624 libtextclassifier3::Status IcingSearchEngine::ClearAllIndices() {
2625   ICING_RETURN_IF_ERROR(ClearSearchIndices());
2626   ICING_RETURN_IF_ERROR(ClearJoinIndices());
2627   return libtextclassifier3::Status::OK;
2628 }
2629 
Reset()2630 ResetResultProto IcingSearchEngine::Reset() {
2631   absl_ports::unique_lock l(&mutex_);
2632   return ResetInternal();
2633 }
2634 
ResetInternal()2635 ResetResultProto IcingSearchEngine::ResetInternal() {
2636   ICING_VLOG(1) << "Resetting IcingSearchEngine";
2637 
2638   ResetResultProto result_proto;
2639   StatusProto* result_status = result_proto.mutable_status();
2640 
2641   initialized_ = false;
2642   ResetMembers();
2643   if (!filesystem_->DeleteDirectoryRecursively(options_.base_dir().c_str())) {
2644     result_status->set_code(StatusProto::INTERNAL);
2645     return result_proto;
2646   }
2647 
2648   if (InternalInitialize().status().code() != StatusProto::OK) {
2649     // We shouldn't hit the following Initialize errors:
2650     //   NOT_FOUND: all data was cleared, we aren't expecting anything
2651     //   DATA_LOSS: all data was cleared, we aren't expecting anything
2652     //   RESOURCE_EXHAUSTED: just deleted files, shouldn't run out of space
2653     //
2654     // We can't tell if Initialize failed and left Icing in an inconsistent
2655     // state or if it was a temporary I/O error. Group everything under INTERNAL
2656     // to be safe.
2657     //
2658     // TODO(b/147699081): Once Initialize returns the proper ABORTED/INTERNAL
2659     // status code, we can just propagate it up from here.
2660     result_status->set_code(StatusProto::INTERNAL);
2661     return result_proto;
2662   }
2663 
2664   result_status->set_code(StatusProto::OK);
2665   return result_proto;
2666 }
2667 
SearchSuggestions(const SuggestionSpecProto & suggestion_spec)2668 SuggestionResponse IcingSearchEngine::SearchSuggestions(
2669     const SuggestionSpecProto& suggestion_spec) {
2670   // TODO(b/146008613) Explore ideas to make this function read-only.
2671   absl_ports::unique_lock l(&mutex_);
2672   SuggestionResponse response;
2673   StatusProto* response_status = response.mutable_status();
2674   if (!initialized_) {
2675     response_status->set_code(StatusProto::FAILED_PRECONDITION);
2676     response_status->set_message("IcingSearchEngine has not been initialized!");
2677     return response;
2678   }
2679 
2680   libtextclassifier3::Status status =
2681       ValidateSuggestionSpec(suggestion_spec, performance_configuration_);
2682   if (!status.ok()) {
2683     TransformStatus(status, response_status);
2684     return response;
2685   }
2686 
2687   // Create the suggestion processor.
2688   auto suggestion_processor_or = SuggestionProcessor::Create(
2689       index_.get(), integer_index_.get(), language_segmenter_.get(),
2690       normalizer_.get(), document_store_.get(), schema_store_.get());
2691   if (!suggestion_processor_or.ok()) {
2692     TransformStatus(suggestion_processor_or.status(), response_status);
2693     return response;
2694   }
2695   std::unique_ptr<SuggestionProcessor> suggestion_processor =
2696       std::move(suggestion_processor_or).ValueOrDie();
2697 
2698   // Run suggestion based on given SuggestionSpec.
2699   int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
2700   libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
2701       suggestion_processor->QuerySuggestions(suggestion_spec, current_time_ms);
2702   if (!terms_or.ok()) {
2703     TransformStatus(terms_or.status(), response_status);
2704     return response;
2705   }
2706 
2707   // Convert vector<TermMetaData> into final SuggestionResponse proto.
2708   for (TermMetadata& term : terms_or.ValueOrDie()) {
2709     SuggestionResponse::Suggestion suggestion;
2710     suggestion.set_query(std::move(term.content));
2711     response.mutable_suggestions()->Add(std::move(suggestion));
2712   }
2713   response_status->set_code(StatusProto::OK);
2714   return response;
2715 }
2716 
2717 }  // namespace lib
2718 }  // namespace icing
2719