1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/icing-search-engine.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <string>
20 #include <string_view>
21 #include <unordered_map>
22 #include <unordered_set>
23 #include <utility>
24 #include <vector>
25
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/absl_ports/annotate.h"
29 #include "icing/absl_ports/canonical_errors.h"
30 #include "icing/absl_ports/mutex.h"
31 #include "icing/absl_ports/str_cat.h"
32 #include "icing/file/destructible-file.h"
33 #include "icing/file/file-backed-proto.h"
34 #include "icing/file/filesystem.h"
35 #include "icing/file/version-util.h"
36 #include "icing/index/data-indexing-handler.h"
37 #include "icing/index/hit/doc-hit-info.h"
38 #include "icing/index/index-processor.h"
39 #include "icing/index/index.h"
40 #include "icing/index/integer-section-indexing-handler.h"
41 #include "icing/index/iterator/doc-hit-info-iterator.h"
42 #include "icing/index/numeric/integer-index.h"
43 #include "icing/index/string-section-indexing-handler.h"
44 #include "icing/join/join-processor.h"
45 #include "icing/join/qualified-id-join-indexing-handler.h"
46 #include "icing/join/qualified-id-type-joinable-index.h"
47 #include "icing/legacy/index/icing-filesystem.h"
48 #include "icing/portable/endian.h"
49 #include "icing/proto/debug.pb.h"
50 #include "icing/proto/document.pb.h"
51 #include "icing/proto/initialize.pb.h"
52 #include "icing/proto/internal/optimize.pb.h"
53 #include "icing/proto/logging.pb.h"
54 #include "icing/proto/optimize.pb.h"
55 #include "icing/proto/persist.pb.h"
56 #include "icing/proto/reset.pb.h"
57 #include "icing/proto/schema.pb.h"
58 #include "icing/proto/scoring.pb.h"
59 #include "icing/proto/search.pb.h"
60 #include "icing/proto/status.pb.h"
61 #include "icing/proto/storage.pb.h"
62 #include "icing/proto/term.pb.h"
63 #include "icing/proto/usage.pb.h"
64 #include "icing/query/advanced_query_parser/lexer.h"
65 #include "icing/query/query-features.h"
66 #include "icing/query/query-processor.h"
67 #include "icing/query/query-results.h"
68 #include "icing/query/suggestion-processor.h"
69 #include "icing/result/page-result.h"
70 #include "icing/result/projection-tree.h"
71 #include "icing/result/projector.h"
72 #include "icing/result/result-adjustment-info.h"
73 #include "icing/result/result-retriever-v2.h"
74 #include "icing/schema/schema-store.h"
75 #include "icing/schema/schema-util.h"
76 #include "icing/schema/section.h"
77 #include "icing/scoring/advanced_scoring/score-expression.h"
78 #include "icing/scoring/priority-queue-scored-document-hits-ranker.h"
79 #include "icing/scoring/scored-document-hit.h"
80 #include "icing/scoring/scored-document-hits-ranker.h"
81 #include "icing/scoring/scoring-processor.h"
82 #include "icing/store/document-id.h"
83 #include "icing/store/document-store.h"
84 #include "icing/tokenization/language-segmenter-factory.h"
85 #include "icing/tokenization/language-segmenter.h"
86 #include "icing/transform/normalizer-factory.h"
87 #include "icing/transform/normalizer.h"
88 #include "icing/util/clock.h"
89 #include "icing/util/crc32.h"
90 #include "icing/util/logging.h"
91 #include "icing/util/status-macros.h"
92 #include "icing/util/tokenized-document.h"
93 #include "unicode/uloc.h"
94
95 namespace icing {
96 namespace lib {
97
98 namespace {
99
100 constexpr std::string_view kVersionFilename = "version";
101 constexpr std::string_view kDocumentSubfolderName = "document_dir";
102 constexpr std::string_view kIndexSubfolderName = "index_dir";
103 constexpr std::string_view kIntegerIndexSubfolderName = "integer_index_dir";
104 constexpr std::string_view kQualifiedIdJoinIndexSubfolderName =
105 "qualified_id_join_index_dir";
106 constexpr std::string_view kSchemaSubfolderName = "schema_dir";
107 constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
108 constexpr std::string_view kInitMarkerFilename = "init_marker";
109 constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
110
111 // The maximum number of unsuccessful initialization attempts from the current
112 // state that we will tolerate before deleting all data and starting from a
113 // fresh state.
114 constexpr int kMaxUnsuccessfulInitAttempts = 5;
115
116 // A pair that holds namespace and type.
117 struct NamespaceTypePair {
118 std::string namespace_;
119 std::string type;
120
operator ==icing::lib::__anon92ce90ac0111::NamespaceTypePair121 bool operator==(const NamespaceTypePair& other) const {
122 return namespace_ == other.namespace_ && type == other.type;
123 }
124 };
125
126 struct NamespaceTypePairHasher {
operator ()icing::lib::__anon92ce90ac0111::NamespaceTypePairHasher127 std::size_t operator()(const NamespaceTypePair& pair) const {
128 return std::hash<std::string>()(pair.namespace_) ^
129 std::hash<std::string>()(pair.type);
130 }
131 };
132
ValidateResultSpec(const DocumentStore * document_store,const ResultSpecProto & result_spec)133 libtextclassifier3::Status ValidateResultSpec(
134 const DocumentStore* document_store, const ResultSpecProto& result_spec) {
135 if (result_spec.num_per_page() < 0) {
136 return absl_ports::InvalidArgumentError(
137 "ResultSpecProto.num_per_page cannot be negative.");
138 }
139 if (result_spec.num_total_bytes_per_page_threshold() <= 0) {
140 return absl_ports::InvalidArgumentError(
141 "ResultSpecProto.num_total_bytes_per_page_threshold cannot be "
142 "non-positive.");
143 }
144 // Validate ResultGroupings.
145 std::unordered_set<int32_t> unique_entry_ids;
146 ResultSpecProto::ResultGroupingType result_grouping_type =
147 result_spec.result_group_type();
148 for (const ResultSpecProto::ResultGrouping& result_grouping :
149 result_spec.result_groupings()) {
150 if (result_grouping.max_results() <= 0) {
151 return absl_ports::InvalidArgumentError(
152 "Cannot specify a result grouping with max results <= 0.");
153 }
154 for (const ResultSpecProto::ResultGrouping::Entry& entry :
155 result_grouping.entry_groupings()) {
156 const std::string& name_space = entry.namespace_();
157 const std::string& schema = entry.schema();
158 auto entry_id_or = document_store->GetResultGroupingEntryId(
159 result_grouping_type, name_space, schema);
160 if (!entry_id_or.ok()) {
161 continue;
162 }
163 int32_t entry_id = entry_id_or.ValueOrDie();
164 if (unique_entry_ids.find(entry_id) != unique_entry_ids.end()) {
165 return absl_ports::InvalidArgumentError(
166 "Entry Ids must be unique across result groups.");
167 }
168 unique_entry_ids.insert(entry_id);
169 }
170 }
171 return libtextclassifier3::Status::OK;
172 }
173
ValidateSearchSpec(const SearchSpecProto & search_spec,const PerformanceConfiguration & configuration)174 libtextclassifier3::Status ValidateSearchSpec(
175 const SearchSpecProto& search_spec,
176 const PerformanceConfiguration& configuration) {
177 if (search_spec.query().size() > configuration.max_query_length) {
178 return absl_ports::InvalidArgumentError(
179 absl_ports::StrCat("SearchSpecProto.query is longer than the maximum "
180 "allowed query length: ",
181 std::to_string(configuration.max_query_length)));
182 }
183 // Check that no unknown features have been enabled in the search spec.
184 std::unordered_set<Feature> query_features_set = GetQueryFeaturesSet();
185 for (const Feature feature : search_spec.enabled_features()) {
186 if (query_features_set.find(feature) == query_features_set.end()) {
187 return absl_ports::InvalidArgumentError(
188 absl_ports::StrCat("Unknown feature in "
189 "SearchSpecProto.enabled_features: ",
190 feature));
191 }
192 }
193 return libtextclassifier3::Status::OK;
194 }
195
ValidateSuggestionSpec(const SuggestionSpecProto & suggestion_spec,const PerformanceConfiguration & configuration)196 libtextclassifier3::Status ValidateSuggestionSpec(
197 const SuggestionSpecProto& suggestion_spec,
198 const PerformanceConfiguration& configuration) {
199 if (suggestion_spec.prefix().empty()) {
200 return absl_ports::InvalidArgumentError(
201 absl_ports::StrCat("SuggestionSpecProto.prefix is empty!"));
202 }
203 if (suggestion_spec.scoring_spec().scoring_match_type() ==
204 TermMatchType::UNKNOWN) {
205 return absl_ports::InvalidArgumentError(
206 absl_ports::StrCat("SuggestionSpecProto.term_match_type is unknown!"));
207 }
208 if (suggestion_spec.num_to_return() <= 0) {
209 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
210 "SuggestionSpecProto.num_to_return must be positive."));
211 }
212 if (suggestion_spec.prefix().size() > configuration.max_query_length) {
213 return absl_ports::InvalidArgumentError(
214 absl_ports::StrCat("SuggestionSpecProto.prefix is longer than the "
215 "maximum allowed prefix length: ",
216 std::to_string(configuration.max_query_length)));
217 }
218 return libtextclassifier3::Status::OK;
219 }
220
221 // Version file is a single file under base_dir containing version info of the
222 // existing data.
MakeVersionFilePath(const std::string & base_dir)223 std::string MakeVersionFilePath(const std::string& base_dir) {
224 return absl_ports::StrCat(base_dir, "/", kVersionFilename);
225 }
226
227 // Document store files are in a standalone subfolder for easier file
228 // management. We can delete and recreate the subfolder and not touch/affect
229 // anything else.
MakeDocumentDirectoryPath(const std::string & base_dir)230 std::string MakeDocumentDirectoryPath(const std::string& base_dir) {
231 return absl_ports::StrCat(base_dir, "/", kDocumentSubfolderName);
232 }
233
234 // Makes a temporary folder path for the document store which will be used
235 // during full optimization.
MakeDocumentTemporaryDirectoryPath(const std::string & base_dir)236 std::string MakeDocumentTemporaryDirectoryPath(const std::string& base_dir) {
237 return absl_ports::StrCat(base_dir, "/", kDocumentSubfolderName,
238 "_optimize_tmp");
239 }
240
241 // Index files are in a standalone subfolder because for easier file management.
242 // We can delete and recreate the subfolder and not touch/affect anything
243 // else.
MakeIndexDirectoryPath(const std::string & base_dir)244 std::string MakeIndexDirectoryPath(const std::string& base_dir) {
245 return absl_ports::StrCat(base_dir, "/", kIndexSubfolderName);
246 }
247
248 // Working path for integer index. Integer index is derived from
249 // PersistentStorage and it will take full ownership of this working path,
250 // including creation/deletion. See PersistentStorage for more details about
251 // working path.
MakeIntegerIndexWorkingPath(const std::string & base_dir)252 std::string MakeIntegerIndexWorkingPath(const std::string& base_dir) {
253 return absl_ports::StrCat(base_dir, "/", kIntegerIndexSubfolderName);
254 }
255
256 // Working path for qualified id join index. It is derived from
257 // PersistentStorage and it will take full ownership of this working path,
258 // including creation/deletion. See PersistentStorage for more details about
259 // working path.
MakeQualifiedIdJoinIndexWorkingPath(const std::string & base_dir)260 std::string MakeQualifiedIdJoinIndexWorkingPath(const std::string& base_dir) {
261 return absl_ports::StrCat(base_dir, "/", kQualifiedIdJoinIndexSubfolderName);
262 }
263
264 // SchemaStore files are in a standalone subfolder for easier file management.
265 // We can delete and recreate the subfolder and not touch/affect anything
266 // else.
MakeSchemaDirectoryPath(const std::string & base_dir)267 std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
268 return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
269 }
270
MakeSetSchemaMarkerFilePath(const std::string & base_dir)271 std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
272 return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
273 }
274
MakeInitMarkerFilePath(const std::string & base_dir)275 std::string MakeInitMarkerFilePath(const std::string& base_dir) {
276 return absl_ports::StrCat(base_dir, "/", kInitMarkerFilename);
277 }
278
TransformStatus(const libtextclassifier3::Status & internal_status,StatusProto * status_proto)279 void TransformStatus(const libtextclassifier3::Status& internal_status,
280 StatusProto* status_proto) {
281 StatusProto::Code code;
282 if (!internal_status.ok()) {
283 ICING_LOG(WARNING) << "Error: " << internal_status.error_code()
284 << ", Message: " << internal_status.error_message();
285 }
286 switch (internal_status.CanonicalCode()) {
287 case libtextclassifier3::StatusCode::OK:
288 code = StatusProto::OK;
289 break;
290 case libtextclassifier3::StatusCode::DATA_LOSS:
291 code = StatusProto::WARNING_DATA_LOSS;
292 break;
293 case libtextclassifier3::StatusCode::INVALID_ARGUMENT:
294 code = StatusProto::INVALID_ARGUMENT;
295 break;
296 case libtextclassifier3::StatusCode::NOT_FOUND:
297 code = StatusProto::NOT_FOUND;
298 break;
299 case libtextclassifier3::StatusCode::FAILED_PRECONDITION:
300 code = StatusProto::FAILED_PRECONDITION;
301 break;
302 case libtextclassifier3::StatusCode::ABORTED:
303 code = StatusProto::ABORTED;
304 break;
305 case libtextclassifier3::StatusCode::INTERNAL:
306 // TODO(b/147699081): Cleanup our internal use of INTERNAL since it
307 // doesn't match with what it *should* indicate as described in
308 // go/icing-library-apis.
309 code = StatusProto::INTERNAL;
310 break;
311 case libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED:
312 // TODO(b/147699081): Note that we don't detect all cases of OUT_OF_SPACE
313 // (e.g. if the document log is full). And we use RESOURCE_EXHAUSTED
314 // internally to indicate other resources are exhausted (e.g.
315 // DocHitInfos) - although none of these are exposed through the API.
316 // Consider separating the two cases out more clearly.
317 code = StatusProto::OUT_OF_SPACE;
318 break;
319 case libtextclassifier3::StatusCode::ALREADY_EXISTS:
320 code = StatusProto::ALREADY_EXISTS;
321 break;
322 case libtextclassifier3::StatusCode::CANCELLED:
323 [[fallthrough]];
324 case libtextclassifier3::StatusCode::UNKNOWN:
325 [[fallthrough]];
326 case libtextclassifier3::StatusCode::DEADLINE_EXCEEDED:
327 [[fallthrough]];
328 case libtextclassifier3::StatusCode::PERMISSION_DENIED:
329 [[fallthrough]];
330 case libtextclassifier3::StatusCode::OUT_OF_RANGE:
331 [[fallthrough]];
332 case libtextclassifier3::StatusCode::UNIMPLEMENTED:
333 [[fallthrough]];
334 case libtextclassifier3::StatusCode::UNAVAILABLE:
335 [[fallthrough]];
336 case libtextclassifier3::StatusCode::UNAUTHENTICATED:
337 // Other internal status codes aren't supported externally yet. If it
338 // should be supported, add another switch-case above.
339 ICING_LOG(ERROR) << "Internal status code "
340 << internal_status.error_code()
341 << " not supported in the external API";
342 code = StatusProto::UNKNOWN;
343 break;
344 }
345 status_proto->set_code(code);
346 status_proto->set_message(internal_status.error_message());
347 }
348
RetrieveAndAddDocumentInfo(const DocumentStore * document_store,DeleteByQueryResultProto & result_proto,std::unordered_map<NamespaceTypePair,DeleteByQueryResultProto::DocumentGroupInfo *,NamespaceTypePairHasher> & info_map,DocumentId document_id)349 libtextclassifier3::Status RetrieveAndAddDocumentInfo(
350 const DocumentStore* document_store, DeleteByQueryResultProto& result_proto,
351 std::unordered_map<NamespaceTypePair,
352 DeleteByQueryResultProto::DocumentGroupInfo*,
353 NamespaceTypePairHasher>& info_map,
354 DocumentId document_id) {
355 ICING_ASSIGN_OR_RETURN(DocumentProto document,
356 document_store->Get(document_id));
357 NamespaceTypePair key = {document.namespace_(), document.schema()};
358 auto iter = info_map.find(key);
359 if (iter == info_map.end()) {
360 auto entry = result_proto.add_deleted_documents();
361 entry->set_namespace_(std::move(document.namespace_()));
362 entry->set_schema(std::move(document.schema()));
363 entry->add_uris(std::move(document.uri()));
364 info_map[key] = entry;
365 } else {
366 iter->second->add_uris(std::move(document.uri()));
367 }
368 return libtextclassifier3::Status::OK;
369 }
370
ShouldRebuildIndex(const OptimizeStatsProto & optimize_stats,float optimize_rebuild_index_threshold)371 bool ShouldRebuildIndex(const OptimizeStatsProto& optimize_stats,
372 float optimize_rebuild_index_threshold) {
373 int num_invalid_documents = optimize_stats.num_deleted_documents() +
374 optimize_stats.num_expired_documents();
375 return num_invalid_documents >= optimize_stats.num_original_documents() *
376 optimize_rebuild_index_threshold;
377 }
378
379 // Useful method to get RankingStrategy if advanced scoring is enabled. When the
380 // "RelevanceScore" function is used in the advanced scoring expression,
381 // RankingStrategy will be treated as RELEVANCE_SCORE in order to prepare the
382 // necessary information needed for calculating relevance score.
383 libtextclassifier3::StatusOr<ScoringSpecProto::RankingStrategy::Code>
GetRankingStrategyFromScoringSpec(const ScoringSpecProto & scoring_spec)384 GetRankingStrategyFromScoringSpec(const ScoringSpecProto& scoring_spec) {
385 if (scoring_spec.advanced_scoring_expression().empty()) {
386 return scoring_spec.rank_by();
387 }
388 // TODO(b/261474063) The Lexer will be called again when creating the
389 // AdvancedScorer instance. Consider refactoring the code to allow the Lexer
390 // to be called only once.
391 Lexer lexer(scoring_spec.advanced_scoring_expression(),
392 Lexer::Language::SCORING);
393 ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
394 lexer.ExtractTokens());
395 for (const Lexer::LexerToken& token : lexer_tokens) {
396 if (token.type == Lexer::TokenType::FUNCTION_NAME &&
397 token.text == RelevanceScoreFunctionScoreExpression::kFunctionName) {
398 return ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
399 }
400 }
401 return ScoringSpecProto::RankingStrategy::NONE;
402 }
403
404 } // namespace
405
IcingSearchEngine(const IcingSearchEngineOptions & options,std::unique_ptr<const JniCache> jni_cache)406 IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options,
407 std::unique_ptr<const JniCache> jni_cache)
408 : IcingSearchEngine(options, std::make_unique<Filesystem>(),
409 std::make_unique<IcingFilesystem>(),
410 std::make_unique<Clock>(), std::move(jni_cache)) {}
411
IcingSearchEngine(IcingSearchEngineOptions options,std::unique_ptr<const Filesystem> filesystem,std::unique_ptr<const IcingFilesystem> icing_filesystem,std::unique_ptr<Clock> clock,std::unique_ptr<const JniCache> jni_cache)412 IcingSearchEngine::IcingSearchEngine(
413 IcingSearchEngineOptions options,
414 std::unique_ptr<const Filesystem> filesystem,
415 std::unique_ptr<const IcingFilesystem> icing_filesystem,
416 std::unique_ptr<Clock> clock, std::unique_ptr<const JniCache> jni_cache)
417 : options_(std::move(options)),
418 filesystem_(std::move(filesystem)),
419 icing_filesystem_(std::move(icing_filesystem)),
420 clock_(std::move(clock)),
421 jni_cache_(std::move(jni_cache)) {
422 ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
423 }
424
~IcingSearchEngine()425 IcingSearchEngine::~IcingSearchEngine() {
426 if (initialized_) {
427 if (PersistToDisk(PersistType::FULL).status().code() != StatusProto::OK) {
428 ICING_LOG(ERROR)
429 << "Error persisting to disk in IcingSearchEngine destructor";
430 }
431 }
432 }
433
Initialize()434 InitializeResultProto IcingSearchEngine::Initialize() {
435 // This method does both read and write so we need a writer lock. Using two
436 // locks (reader and writer) has the chance to be interrupted during
437 // switching.
438 absl_ports::unique_lock l(&mutex_);
439 return InternalInitialize();
440 }
441
ResetMembers()442 void IcingSearchEngine::ResetMembers() {
443 schema_store_.reset();
444 document_store_.reset();
445 language_segmenter_.reset();
446 normalizer_.reset();
447 index_.reset();
448 integer_index_.reset();
449 qualified_id_join_index_.reset();
450 }
451
CheckInitMarkerFile(InitializeStatsProto * initialize_stats)452 libtextclassifier3::Status IcingSearchEngine::CheckInitMarkerFile(
453 InitializeStatsProto* initialize_stats) {
454 // Check to see if the marker file exists and if we've already passed our max
455 // number of init attempts.
456 std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
457 bool file_exists = filesystem_->FileExists(marker_filepath.c_str());
458 int network_init_attempts = 0;
459 int host_init_attempts = 0;
460
461 // Read the number of previous failed init attempts from the file. If it
462 // fails, then just assume the value is zero (the most likely reason for
463 // failure would be non-existence because the last init was successful
464 // anyways).
465 std::unique_ptr<ScopedFd> marker_file_fd = std::make_unique<ScopedFd>(
466 filesystem_->OpenForWrite(marker_filepath.c_str()));
467 libtextclassifier3::Status status;
468 if (file_exists &&
469 filesystem_->PRead(marker_file_fd->get(), &network_init_attempts,
470 sizeof(network_init_attempts), /*offset=*/0)) {
471 host_init_attempts = GNetworkToHostL(network_init_attempts);
472 if (host_init_attempts > kMaxUnsuccessfulInitAttempts) {
473 // We're tried and failed to init too many times. We need to throw
474 // everything out and start from scratch.
475 ResetMembers();
476 marker_file_fd.reset();
477
478 // Delete the entire base directory.
479 if (!filesystem_->DeleteDirectoryRecursively(
480 options_.base_dir().c_str())) {
481 return absl_ports::InternalError("Failed to delete icing base dir!");
482 }
483
484 // Create the base directory again and reopen marker file.
485 if (!filesystem_->CreateDirectoryRecursively(
486 options_.base_dir().c_str())) {
487 return absl_ports::InternalError("Failed to create icing base dir!");
488 }
489
490 marker_file_fd = std::make_unique<ScopedFd>(
491 filesystem_->OpenForWrite(marker_filepath.c_str()));
492
493 status = absl_ports::DataLossError(
494 "Encountered failed initialization limit. Cleared all data.");
495 host_init_attempts = 0;
496 }
497 }
498
499 // Use network_init_attempts here because we might have set host_init_attempts
500 // to 0 if it exceeded the max threshold.
501 initialize_stats->set_num_previous_init_failures(
502 GNetworkToHostL(network_init_attempts));
503
504 ++host_init_attempts;
505 network_init_attempts = GHostToNetworkL(host_init_attempts);
506 // Write the updated number of attempts before we get started.
507 if (!filesystem_->PWrite(marker_file_fd->get(), /*offset=*/0,
508 &network_init_attempts,
509 sizeof(network_init_attempts)) ||
510 !filesystem_->DataSync(marker_file_fd->get())) {
511 return absl_ports::InternalError(
512 "Failed to write and sync init marker file");
513 }
514
515 return status;
516 }
517
InternalInitialize()518 InitializeResultProto IcingSearchEngine::InternalInitialize() {
519 ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
520 << options_.base_dir();
521
522 // Measure the latency of the initialization process.
523 std::unique_ptr<Timer> initialize_timer = clock_->GetNewTimer();
524
525 InitializeResultProto result_proto;
526 StatusProto* result_status = result_proto.mutable_status();
527 InitializeStatsProto* initialize_stats =
528 result_proto.mutable_initialize_stats();
529 if (initialized_) {
530 // Already initialized.
531 result_status->set_code(StatusProto::OK);
532 initialize_stats->set_latency_ms(
533 initialize_timer->GetElapsedMilliseconds());
534 initialize_stats->set_num_documents(document_store_->num_documents());
535 return result_proto;
536 }
537
538 // Now go ahead and try to initialize.
539 libtextclassifier3::Status status = InitializeMembers(initialize_stats);
540 if (status.ok() || absl_ports::IsDataLoss(status)) {
541 // We successfully initialized. We should delete the init marker file to
542 // indicate a successful init.
543 std::string marker_filepath = MakeInitMarkerFilePath(options_.base_dir());
544 if (!filesystem_->DeleteFile(marker_filepath.c_str())) {
545 status = absl_ports::InternalError("Failed to delete init marker file!");
546 } else {
547 initialized_ = true;
548 }
549 }
550 TransformStatus(status, result_status);
551 initialize_stats->set_latency_ms(initialize_timer->GetElapsedMilliseconds());
552 return result_proto;
553 }
554
InitializeMembers(InitializeStatsProto * initialize_stats)555 libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
556 InitializeStatsProto* initialize_stats) {
557 ICING_RETURN_ERROR_IF_NULL(initialize_stats);
558
559 // Make sure the base directory exists
560 if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
561 return absl_ports::InternalError(absl_ports::StrCat(
562 "Could not create directory: ", options_.base_dir()));
563 }
564
565 // Check to see if the marker file exists and if we've already passed our max
566 // number of init attempts.
567 libtextclassifier3::Status status = CheckInitMarkerFile(initialize_stats);
568 if (!status.ok() && !absl_ports::IsDataLoss(status)) {
569 return status;
570 }
571
572 // Read version file and determine the state change.
573 const std::string version_filepath = MakeVersionFilePath(options_.base_dir());
574 const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
575 ICING_ASSIGN_OR_RETURN(
576 version_util::VersionInfo version_info,
577 version_util::ReadVersion(*filesystem_, version_filepath, index_dir));
578 version_util::StateChange version_state_change =
579 version_util::GetVersionStateChange(version_info);
580 if (version_state_change != version_util::StateChange::kCompatible) {
581 // Step 1: migrate schema according to the version state change.
582 ICING_RETURN_IF_ERROR(SchemaStore::MigrateSchema(
583 filesystem_.get(), MakeSchemaDirectoryPath(options_.base_dir()),
584 version_state_change, version_util::kVersion));
585
586 // Step 2: discard all derived data
587 ICING_RETURN_IF_ERROR(DiscardDerivedFiles());
588
589 // Step 3: update version file
590 version_util::VersionInfo new_version_info(
591 version_util::kVersion,
592 std::max(version_info.max_version, version_util::kVersion));
593 ICING_RETURN_IF_ERROR(version_util::WriteVersion(
594 *filesystem_, version_filepath, new_version_info));
595 }
596
597 ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
598
599 // TODO(b/156383798) : Resolve how to specify the locale.
600 language_segmenter_factory::SegmenterOptions segmenter_options(
601 ULOC_US, jni_cache_.get());
602 TC3_ASSIGN_OR_RETURN(language_segmenter_, language_segmenter_factory::Create(
603 std::move(segmenter_options)));
604
605 TC3_ASSIGN_OR_RETURN(normalizer_,
606 normalizer_factory::Create(options_.max_token_length()));
607
608 std::string marker_filepath =
609 MakeSetSchemaMarkerFilePath(options_.base_dir());
610
611 libtextclassifier3::Status index_init_status;
612 if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
613 // The schema was either lost or never set before. Wipe out the doc store
614 // and index directories and initialize them from scratch.
615 const std::string doc_store_dir =
616 MakeDocumentDirectoryPath(options_.base_dir());
617 const std::string integer_index_dir =
618 MakeIntegerIndexWorkingPath(options_.base_dir());
619 const std::string qualified_id_join_index_dir =
620 MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
621 if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
622 !filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
623 !IntegerIndex::Discard(*filesystem_, integer_index_dir).ok() ||
624 !QualifiedIdTypeJoinableIndex::Discard(*filesystem_,
625 qualified_id_join_index_dir)
626 .ok()) {
627 return absl_ports::InternalError(absl_ports::StrCat(
628 "Could not delete directories: ", index_dir, ", ", integer_index_dir,
629 ", ", qualified_id_join_index_dir, " and ", doc_store_dir));
630 }
631 ICING_RETURN_IF_ERROR(InitializeDocumentStore(
632 /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
633 index_init_status = InitializeIndex(initialize_stats);
634 if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
635 return index_init_status;
636 }
637 } else if (filesystem_->FileExists(marker_filepath.c_str())) {
638 // If the marker file is still around then something wonky happened when we
639 // last tried to set the schema.
640 ICING_RETURN_IF_ERROR(InitializeDocumentStore(
641 /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
642
643 // We're going to need to build the index from scratch. So just delete its
644 // directory now.
645 // Discard index directory and instantiate a new one.
646 Index::Options index_options(index_dir, options_.index_merge_size());
647 if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
648 !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
649 return absl_ports::InternalError(
650 absl_ports::StrCat("Could not recreate directory: ", index_dir));
651 }
652 ICING_ASSIGN_OR_RETURN(index_,
653 Index::Create(index_options, filesystem_.get(),
654 icing_filesystem_.get()));
655
656 // Discard integer index directory and instantiate a new one.
657 std::string integer_index_dir =
658 MakeIntegerIndexWorkingPath(options_.base_dir());
659 ICING_RETURN_IF_ERROR(
660 IntegerIndex::Discard(*filesystem_, integer_index_dir));
661 ICING_ASSIGN_OR_RETURN(
662 integer_index_,
663 IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
664 options_.pre_mapping_fbv()));
665
666 // Discard qualified id join index directory and instantiate a new one.
667 std::string qualified_id_join_index_dir =
668 MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
669 ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard(
670 *filesystem_, qualified_id_join_index_dir));
671 ICING_ASSIGN_OR_RETURN(
672 qualified_id_join_index_,
673 QualifiedIdTypeJoinableIndex::Create(
674 *filesystem_, std::move(qualified_id_join_index_dir),
675 options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
676
677 std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
678 IndexRestorationResult restore_result = RestoreIndexIfNeeded();
679 index_init_status = std::move(restore_result.status);
680 // DATA_LOSS means that we have successfully initialized and re-added
681 // content to the index. Some indexed content was lost, but otherwise the
682 // index is in a valid state and can be queried.
683 if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
684 return index_init_status;
685 }
686
687 // Delete the marker file to indicate that everything is now in sync with
688 // whatever changes were made to the schema.
689 filesystem_->DeleteFile(marker_filepath.c_str());
690
691 initialize_stats->set_index_restoration_latency_ms(
692 restore_timer->GetElapsedMilliseconds());
693 initialize_stats->set_index_restoration_cause(
694 InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
695 initialize_stats->set_integer_index_restoration_cause(
696 InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
697 initialize_stats->set_qualified_id_join_index_restoration_cause(
698 InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
699 } else if (version_state_change != version_util::StateChange::kCompatible) {
700 ICING_RETURN_IF_ERROR(InitializeDocumentStore(
701 /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
702 index_init_status = InitializeIndex(initialize_stats);
703 if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
704 return index_init_status;
705 }
706
707 initialize_stats->set_schema_store_recovery_cause(
708 InitializeStatsProto::VERSION_CHANGED);
709 initialize_stats->set_document_store_recovery_cause(
710 InitializeStatsProto::VERSION_CHANGED);
711 initialize_stats->set_index_restoration_cause(
712 InitializeStatsProto::VERSION_CHANGED);
713 initialize_stats->set_integer_index_restoration_cause(
714 InitializeStatsProto::VERSION_CHANGED);
715 initialize_stats->set_qualified_id_join_index_restoration_cause(
716 InitializeStatsProto::VERSION_CHANGED);
717 } else {
718 ICING_RETURN_IF_ERROR(InitializeDocumentStore(
719 /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
720 index_init_status = InitializeIndex(initialize_stats);
721 if (!index_init_status.ok() && !absl_ports::IsDataLoss(index_init_status)) {
722 return index_init_status;
723 }
724 }
725
726 if (status.ok()) {
727 status = index_init_status;
728 }
729
730 result_state_manager_ = std::make_unique<ResultStateManager>(
731 performance_configuration_.max_num_total_hits, *document_store_);
732
733 return status;
734 }
735
InitializeSchemaStore(InitializeStatsProto * initialize_stats)736 libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
737 InitializeStatsProto* initialize_stats) {
738 ICING_RETURN_ERROR_IF_NULL(initialize_stats);
739
740 const std::string schema_store_dir =
741 MakeSchemaDirectoryPath(options_.base_dir());
742 // Make sure the sub-directory exists
743 if (!filesystem_->CreateDirectoryRecursively(schema_store_dir.c_str())) {
744 return absl_ports::InternalError(
745 absl_ports::StrCat("Could not create directory: ", schema_store_dir));
746 }
747 ICING_ASSIGN_OR_RETURN(
748 schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir,
749 clock_.get(), initialize_stats));
750
751 return libtextclassifier3::Status::OK;
752 }
753
InitializeDocumentStore(bool force_recovery_and_revalidate_documents,InitializeStatsProto * initialize_stats)754 libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
755 bool force_recovery_and_revalidate_documents,
756 InitializeStatsProto* initialize_stats) {
757 ICING_RETURN_ERROR_IF_NULL(initialize_stats);
758
759 const std::string document_dir =
760 MakeDocumentDirectoryPath(options_.base_dir());
761 // Make sure the sub-directory exists
762 if (!filesystem_->CreateDirectoryRecursively(document_dir.c_str())) {
763 return absl_ports::InternalError(
764 absl_ports::StrCat("Could not create directory: ", document_dir));
765 }
766 ICING_ASSIGN_OR_RETURN(
767 DocumentStore::CreateResult create_result,
768 DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
769 schema_store_.get(),
770 force_recovery_and_revalidate_documents,
771 options_.document_store_namespace_id_fingerprint(),
772 options_.compression_level(), initialize_stats));
773 document_store_ = std::move(create_result.document_store);
774
775 return libtextclassifier3::Status::OK;
776 }
777
InitializeIndex(InitializeStatsProto * initialize_stats)778 libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
779 InitializeStatsProto* initialize_stats) {
780 ICING_RETURN_ERROR_IF_NULL(initialize_stats);
781
782 const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
783 // Make sure the sub-directory exists
784 if (!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
785 return absl_ports::InternalError(
786 absl_ports::StrCat("Could not create directory: ", index_dir));
787 }
788 Index::Options index_options(index_dir, options_.index_merge_size());
789
790 // Term index
791 InitializeStatsProto::RecoveryCause index_recovery_cause;
792 auto index_or =
793 Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
794 if (!index_or.ok()) {
795 if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
796 !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
797 return absl_ports::InternalError(
798 absl_ports::StrCat("Could not recreate directory: ", index_dir));
799 }
800
801 index_recovery_cause = InitializeStatsProto::IO_ERROR;
802
803 // Try recreating it from scratch and re-indexing everything.
804 ICING_ASSIGN_OR_RETURN(index_,
805 Index::Create(index_options, filesystem_.get(),
806 icing_filesystem_.get()));
807 } else {
808 // Index was created fine.
809 index_ = std::move(index_or).ValueOrDie();
810 // If a recover does have to happen, then it must be because the index is
811 // out of sync with the document store.
812 index_recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
813 }
814
815 // Integer index
816 std::string integer_index_dir =
817 MakeIntegerIndexWorkingPath(options_.base_dir());
818 InitializeStatsProto::RecoveryCause integer_index_recovery_cause;
819 auto integer_index_or = IntegerIndex::Create(*filesystem_, integer_index_dir,
820 options_.pre_mapping_fbv());
821 if (!integer_index_or.ok()) {
822 ICING_RETURN_IF_ERROR(
823 IntegerIndex::Discard(*filesystem_, integer_index_dir));
824
825 integer_index_recovery_cause = InitializeStatsProto::IO_ERROR;
826
827 // Try recreating it from scratch and re-indexing everything.
828 ICING_ASSIGN_OR_RETURN(
829 integer_index_,
830 IntegerIndex::Create(*filesystem_, std::move(integer_index_dir),
831 options_.pre_mapping_fbv()));
832 } else {
833 // Integer index was created fine.
834 integer_index_ = std::move(integer_index_or).ValueOrDie();
835 // If a recover does have to happen, then it must be because the index is
836 // out of sync with the document store.
837 integer_index_recovery_cause =
838 InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
839 }
840
841 // Qualified id join index
842 std::string qualified_id_join_index_dir =
843 MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir());
844 InitializeStatsProto::RecoveryCause qualified_id_join_index_recovery_cause;
845 auto qualified_id_join_index_or = QualifiedIdTypeJoinableIndex::Create(
846 *filesystem_, qualified_id_join_index_dir, options_.pre_mapping_fbv(),
847 options_.use_persistent_hash_map());
848 if (!qualified_id_join_index_or.ok()) {
849 ICING_RETURN_IF_ERROR(QualifiedIdTypeJoinableIndex::Discard(
850 *filesystem_, qualified_id_join_index_dir));
851
852 qualified_id_join_index_recovery_cause = InitializeStatsProto::IO_ERROR;
853
854 // Try recreating it from scratch and rebuild everything.
855 ICING_ASSIGN_OR_RETURN(
856 qualified_id_join_index_,
857 QualifiedIdTypeJoinableIndex::Create(
858 *filesystem_, std::move(qualified_id_join_index_dir),
859 options_.pre_mapping_fbv(), options_.use_persistent_hash_map()));
860 } else {
861 // Qualified id join index was created fine.
862 qualified_id_join_index_ =
863 std::move(qualified_id_join_index_or).ValueOrDie();
864 // If a recover does have to happen, then it must be because the index is
865 // out of sync with the document store.
866 qualified_id_join_index_recovery_cause =
867 InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
868 }
869
870 std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
871 IndexRestorationResult restore_result = RestoreIndexIfNeeded();
872 if (restore_result.index_needed_restoration ||
873 restore_result.integer_index_needed_restoration ||
874 restore_result.qualified_id_join_index_needed_restoration) {
875 initialize_stats->set_index_restoration_latency_ms(
876 restore_timer->GetElapsedMilliseconds());
877
878 if (restore_result.index_needed_restoration) {
879 initialize_stats->set_index_restoration_cause(index_recovery_cause);
880 }
881 if (restore_result.integer_index_needed_restoration) {
882 initialize_stats->set_integer_index_restoration_cause(
883 integer_index_recovery_cause);
884 }
885 if (restore_result.qualified_id_join_index_needed_restoration) {
886 initialize_stats->set_qualified_id_join_index_restoration_cause(
887 qualified_id_join_index_recovery_cause);
888 }
889 }
890 return restore_result.status;
891 }
892
SetSchema(const SchemaProto & new_schema,bool ignore_errors_and_delete_documents)893 SetSchemaResultProto IcingSearchEngine::SetSchema(
894 const SchemaProto& new_schema, bool ignore_errors_and_delete_documents) {
895 return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
896 }
897
SetSchema(SchemaProto && new_schema,bool ignore_errors_and_delete_documents)898 SetSchemaResultProto IcingSearchEngine::SetSchema(
899 SchemaProto&& new_schema, bool ignore_errors_and_delete_documents) {
900 ICING_VLOG(1) << "Setting new Schema";
901
902 SetSchemaResultProto result_proto;
903 StatusProto* result_status = result_proto.mutable_status();
904
905 absl_ports::unique_lock l(&mutex_);
906 ScopedTimer timer(clock_->GetNewTimer(), [&result_proto](int64_t t) {
907 result_proto.set_latency_ms(t);
908 });
909 if (!initialized_) {
910 result_status->set_code(StatusProto::FAILED_PRECONDITION);
911 result_status->set_message("IcingSearchEngine has not been initialized!");
912 return result_proto;
913 }
914
915 auto lost_previous_schema_or = LostPreviousSchema();
916 if (!lost_previous_schema_or.ok()) {
917 TransformStatus(lost_previous_schema_or.status(), result_status);
918 return result_proto;
919 }
920 bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
921
922 std::string marker_filepath =
923 MakeSetSchemaMarkerFilePath(options_.base_dir());
924 // Create the marker file indicating that we are going to apply a schema
925 // change. No need to write anything to the marker file - its existence is the
926 // only thing that matters. The marker file is used to indicate if we
927 // encountered a crash or a power loss while updating the schema and other
928 // files. So set it up to be deleted as long as we return from this function.
929 DestructibleFile marker_file(marker_filepath, filesystem_.get());
930
931 auto set_schema_result_or = schema_store_->SetSchema(
932 std::move(new_schema), ignore_errors_and_delete_documents,
933 options_.allow_circular_schema_definitions());
934 if (!set_schema_result_or.ok()) {
935 TransformStatus(set_schema_result_or.status(), result_status);
936 return result_proto;
937 }
938 SchemaStore::SetSchemaResult set_schema_result =
939 std::move(set_schema_result_or).ValueOrDie();
940
941 for (const std::string& deleted_type :
942 set_schema_result.schema_types_deleted_by_name) {
943 result_proto.add_deleted_schema_types(deleted_type);
944 }
945
946 for (const std::string& incompatible_type :
947 set_schema_result.schema_types_incompatible_by_name) {
948 result_proto.add_incompatible_schema_types(incompatible_type);
949 }
950
951 for (const std::string& new_type :
952 set_schema_result.schema_types_new_by_name) {
953 result_proto.add_new_schema_types(std::move(new_type));
954 }
955
956 for (const std::string& compatible_type :
957 set_schema_result.schema_types_changed_fully_compatible_by_name) {
958 result_proto.add_fully_compatible_changed_schema_types(
959 std::move(compatible_type));
960 }
961
962 bool index_incompatible =
963 !set_schema_result.schema_types_index_incompatible_by_name.empty();
964 for (const std::string& index_incompatible_type :
965 set_schema_result.schema_types_index_incompatible_by_name) {
966 result_proto.add_index_incompatible_changed_schema_types(
967 std::move(index_incompatible_type));
968 }
969
970 bool join_incompatible =
971 !set_schema_result.schema_types_join_incompatible_by_name.empty();
972 for (const std::string& join_incompatible_type :
973 set_schema_result.schema_types_join_incompatible_by_name) {
974 result_proto.add_join_incompatible_changed_schema_types(
975 std::move(join_incompatible_type));
976 }
977
978 libtextclassifier3::Status status;
979 if (set_schema_result.success) {
980 if (lost_previous_schema) {
981 // No previous schema to calculate a diff against. We have to go through
982 // and revalidate all the Documents in the DocumentStore
983 status = document_store_->UpdateSchemaStore(schema_store_.get());
984 if (!status.ok()) {
985 TransformStatus(status, result_status);
986 return result_proto;
987 }
988 } else if (!set_schema_result.old_schema_type_ids_changed.empty() ||
989 !set_schema_result.schema_types_incompatible_by_id.empty() ||
990 !set_schema_result.schema_types_deleted_by_id.empty()) {
991 status = document_store_->OptimizedUpdateSchemaStore(schema_store_.get(),
992 set_schema_result);
993 if (!status.ok()) {
994 TransformStatus(status, result_status);
995 return result_proto;
996 }
997 }
998
999 if (lost_previous_schema || index_incompatible) {
1000 // Clears search indices
1001 status = ClearSearchIndices();
1002 if (!status.ok()) {
1003 TransformStatus(status, result_status);
1004 return result_proto;
1005 }
1006 }
1007
1008 if (lost_previous_schema || join_incompatible) {
1009 // Clears join indices
1010 status = ClearJoinIndices();
1011 if (!status.ok()) {
1012 TransformStatus(status, result_status);
1013 return result_proto;
1014 }
1015 }
1016
1017 if (lost_previous_schema || index_incompatible || join_incompatible) {
1018 IndexRestorationResult restore_result = RestoreIndexIfNeeded();
1019 // DATA_LOSS means that we have successfully re-added content to the
1020 // index. Some indexed content was lost, but otherwise the index is in a
1021 // valid state and can be queried.
1022 if (!restore_result.status.ok() &&
1023 !absl_ports::IsDataLoss(restore_result.status)) {
1024 TransformStatus(status, result_status);
1025 return result_proto;
1026 }
1027 }
1028
1029 result_status->set_code(StatusProto::OK);
1030 } else {
1031 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1032 result_status->set_message("Schema is incompatible.");
1033 }
1034
1035 return result_proto;
1036 }
1037
GetSchema()1038 GetSchemaResultProto IcingSearchEngine::GetSchema() {
1039 GetSchemaResultProto result_proto;
1040 StatusProto* result_status = result_proto.mutable_status();
1041
1042 absl_ports::shared_lock l(&mutex_);
1043 if (!initialized_) {
1044 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1045 result_status->set_message("IcingSearchEngine has not been initialized!");
1046 return result_proto;
1047 }
1048
1049 auto schema_or = schema_store_->GetSchema();
1050 if (!schema_or.ok()) {
1051 TransformStatus(schema_or.status(), result_status);
1052 return result_proto;
1053 }
1054
1055 result_status->set_code(StatusProto::OK);
1056 *result_proto.mutable_schema() = *std::move(schema_or).ValueOrDie();
1057 return result_proto;
1058 }
1059
GetSchemaType(std::string_view schema_type)1060 GetSchemaTypeResultProto IcingSearchEngine::GetSchemaType(
1061 std::string_view schema_type) {
1062 GetSchemaTypeResultProto result_proto;
1063 StatusProto* result_status = result_proto.mutable_status();
1064
1065 absl_ports::shared_lock l(&mutex_);
1066 if (!initialized_) {
1067 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1068 result_status->set_message("IcingSearchEngine has not been initialized!");
1069 return result_proto;
1070 }
1071
1072 auto type_config_or = schema_store_->GetSchemaTypeConfig(schema_type);
1073 if (!type_config_or.ok()) {
1074 TransformStatus(type_config_or.status(), result_status);
1075 return result_proto;
1076 }
1077
1078 result_status->set_code(StatusProto::OK);
1079 *result_proto.mutable_schema_type_config() = *(type_config_or.ValueOrDie());
1080 return result_proto;
1081 }
1082
Put(const DocumentProto & document)1083 PutResultProto IcingSearchEngine::Put(const DocumentProto& document) {
1084 return Put(DocumentProto(document));
1085 }
1086
Put(DocumentProto && document)1087 PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
1088 ICING_VLOG(1) << "Writing document to document store";
1089
1090 PutResultProto result_proto;
1091 StatusProto* result_status = result_proto.mutable_status();
1092 PutDocumentStatsProto* put_document_stats =
1093 result_proto.mutable_put_document_stats();
1094 ScopedTimer put_timer(clock_->GetNewTimer(), [put_document_stats](int64_t t) {
1095 put_document_stats->set_latency_ms(t);
1096 });
1097
1098 // Lock must be acquired before validation because the DocumentStore uses
1099 // the schema file to validate, and the schema could be changed in
1100 // SetSchema() which is protected by the same mutex.
1101 absl_ports::unique_lock l(&mutex_);
1102 if (!initialized_) {
1103 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1104 result_status->set_message("IcingSearchEngine has not been initialized!");
1105 return result_proto;
1106 }
1107
1108 auto tokenized_document_or = TokenizedDocument::Create(
1109 schema_store_.get(), language_segmenter_.get(), std::move(document));
1110 if (!tokenized_document_or.ok()) {
1111 TransformStatus(tokenized_document_or.status(), result_status);
1112 return result_proto;
1113 }
1114 TokenizedDocument tokenized_document(
1115 std::move(tokenized_document_or).ValueOrDie());
1116
1117 auto document_id_or = document_store_->Put(
1118 tokenized_document.document(), tokenized_document.num_string_tokens(),
1119 put_document_stats);
1120 if (!document_id_or.ok()) {
1121 TransformStatus(document_id_or.status(), result_status);
1122 return result_proto;
1123 }
1124 DocumentId document_id = document_id_or.ValueOrDie();
1125
1126 auto data_indexing_handlers_or = CreateDataIndexingHandlers();
1127 if (!data_indexing_handlers_or.ok()) {
1128 TransformStatus(data_indexing_handlers_or.status(), result_status);
1129 return result_proto;
1130 }
1131 IndexProcessor index_processor(
1132 std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get());
1133
1134 auto index_status = index_processor.IndexDocument(
1135 tokenized_document, document_id, put_document_stats);
1136 // Getting an internal error from the index could possibly mean that the index
1137 // is broken. Try to rebuild them to recover.
1138 if (absl_ports::IsInternal(index_status)) {
1139 ICING_LOG(ERROR) << "Got an internal error from the index. Trying to "
1140 "rebuild the index!\n"
1141 << index_status.error_message();
1142 index_status = ClearAllIndices();
1143 if (index_status.ok()) {
1144 index_status = RestoreIndexIfNeeded().status;
1145 if (!index_status.ok()) {
1146 ICING_LOG(ERROR) << "Failed to reindex documents after a failure of "
1147 "indexing a document.";
1148 }
1149 } else {
1150 ICING_LOG(ERROR)
1151 << "Failed to clear indices after a failure of indexing a document.";
1152 }
1153 }
1154
1155 if (!index_status.ok()) {
1156 // If we encountered a failure or cannot resolve an internal error while
1157 // indexing this document, then mark it as deleted.
1158 int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1159 libtextclassifier3::Status delete_status =
1160 document_store_->Delete(document_id, current_time_ms);
1161 if (!delete_status.ok()) {
1162 // This is pretty dire (and, hopefully, unlikely). We can't roll back the
1163 // document that we just added. Wipeout the whole index.
1164 ICING_LOG(ERROR) << "Cannot delete the document that is failed to index. "
1165 "Wiping out the whole Icing search engine.";
1166 ResetInternal();
1167 }
1168 }
1169
1170 TransformStatus(index_status, result_status);
1171 return result_proto;
1172 }
1173
Get(const std::string_view name_space,const std::string_view uri,const GetResultSpecProto & result_spec)1174 GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
1175 const std::string_view uri,
1176 const GetResultSpecProto& result_spec) {
1177 GetResultProto result_proto;
1178 StatusProto* result_status = result_proto.mutable_status();
1179
1180 absl_ports::shared_lock l(&mutex_);
1181 if (!initialized_) {
1182 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1183 result_status->set_message("IcingSearchEngine has not been initialized!");
1184 return result_proto;
1185 }
1186
1187 auto document_or = document_store_->Get(name_space, uri);
1188 if (!document_or.ok()) {
1189 TransformStatus(document_or.status(), result_status);
1190 return result_proto;
1191 }
1192
1193 DocumentProto document = std::move(document_or).ValueOrDie();
1194 std::unique_ptr<ProjectionTree> type_projection_tree;
1195 std::unique_ptr<ProjectionTree> wildcard_projection_tree;
1196 for (const SchemaStore::ExpandedTypePropertyMask& type_field_mask :
1197 schema_store_->ExpandTypePropertyMasks(
1198 result_spec.type_property_masks())) {
1199 if (type_field_mask.schema_type == document.schema()) {
1200 type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask);
1201 } else if (type_field_mask.schema_type ==
1202 SchemaStore::kSchemaTypeWildcard) {
1203 wildcard_projection_tree =
1204 std::make_unique<ProjectionTree>(type_field_mask);
1205 }
1206 }
1207
1208 // Apply projection
1209 if (type_projection_tree != nullptr) {
1210 projector::Project(type_projection_tree->root().children, &document);
1211 } else if (wildcard_projection_tree != nullptr) {
1212 projector::Project(wildcard_projection_tree->root().children, &document);
1213 }
1214
1215 result_status->set_code(StatusProto::OK);
1216 *result_proto.mutable_document() = std::move(document);
1217 return result_proto;
1218 }
1219
ReportUsage(const UsageReport & usage_report)1220 ReportUsageResultProto IcingSearchEngine::ReportUsage(
1221 const UsageReport& usage_report) {
1222 ReportUsageResultProto result_proto;
1223 StatusProto* result_status = result_proto.mutable_status();
1224
1225 absl_ports::unique_lock l(&mutex_);
1226 if (!initialized_) {
1227 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1228 result_status->set_message("IcingSearchEngine has not been initialized!");
1229 return result_proto;
1230 }
1231
1232 libtextclassifier3::Status status =
1233 document_store_->ReportUsage(usage_report);
1234 TransformStatus(status, result_status);
1235 return result_proto;
1236 }
1237
GetAllNamespaces()1238 GetAllNamespacesResultProto IcingSearchEngine::GetAllNamespaces() {
1239 GetAllNamespacesResultProto result_proto;
1240 StatusProto* result_status = result_proto.mutable_status();
1241
1242 absl_ports::shared_lock l(&mutex_);
1243 if (!initialized_) {
1244 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1245 result_status->set_message("IcingSearchEngine has not been initialized!");
1246 return result_proto;
1247 }
1248
1249 std::vector<std::string> namespaces = document_store_->GetAllNamespaces();
1250
1251 for (const std::string& namespace_ : namespaces) {
1252 result_proto.add_namespaces(namespace_);
1253 }
1254
1255 result_status->set_code(StatusProto::OK);
1256 return result_proto;
1257 }
1258
Delete(const std::string_view name_space,const std::string_view uri)1259 DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
1260 const std::string_view uri) {
1261 ICING_VLOG(1) << "Deleting document from doc store";
1262
1263 DeleteResultProto result_proto;
1264 StatusProto* result_status = result_proto.mutable_status();
1265
1266 absl_ports::unique_lock l(&mutex_);
1267 if (!initialized_) {
1268 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1269 result_status->set_message("IcingSearchEngine has not been initialized!");
1270 return result_proto;
1271 }
1272
1273 DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
1274 delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
1275
1276 std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
1277 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
1278 // that can support error logging.
1279 int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1280 libtextclassifier3::Status status =
1281 document_store_->Delete(name_space, uri, current_time_ms);
1282 if (!status.ok()) {
1283 LogSeverity::Code severity = ERROR;
1284 if (absl_ports::IsNotFound(status)) {
1285 severity = DBG;
1286 }
1287 ICING_LOG(severity) << status.error_message()
1288 << "Failed to delete Document. namespace: "
1289 << name_space << ", uri: " << uri;
1290 TransformStatus(status, result_status);
1291 return result_proto;
1292 }
1293
1294 result_status->set_code(StatusProto::OK);
1295 delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
1296 delete_stats->set_num_documents_deleted(1);
1297 return result_proto;
1298 }
1299
DeleteByNamespace(const std::string_view name_space)1300 DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
1301 const std::string_view name_space) {
1302 ICING_VLOG(1) << "Deleting namespace from doc store";
1303
1304 DeleteByNamespaceResultProto delete_result;
1305 StatusProto* result_status = delete_result.mutable_status();
1306 absl_ports::unique_lock l(&mutex_);
1307 if (!initialized_) {
1308 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1309 result_status->set_message("IcingSearchEngine has not been initialized!");
1310 return delete_result;
1311 }
1312
1313 DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
1314 delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
1315
1316 std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
1317 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
1318 // that can support error logging.
1319 DocumentStore::DeleteByGroupResult doc_store_result =
1320 document_store_->DeleteByNamespace(name_space);
1321 if (!doc_store_result.status.ok()) {
1322 ICING_LOG(ERROR) << doc_store_result.status.error_message()
1323 << "Failed to delete Namespace: " << name_space;
1324 TransformStatus(doc_store_result.status, result_status);
1325 return delete_result;
1326 }
1327
1328 result_status->set_code(StatusProto::OK);
1329 delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
1330 delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
1331 return delete_result;
1332 }
1333
DeleteBySchemaType(const std::string_view schema_type)1334 DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
1335 const std::string_view schema_type) {
1336 ICING_VLOG(1) << "Deleting type from doc store";
1337
1338 DeleteBySchemaTypeResultProto delete_result;
1339 StatusProto* result_status = delete_result.mutable_status();
1340 absl_ports::unique_lock l(&mutex_);
1341 if (!initialized_) {
1342 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1343 result_status->set_message("IcingSearchEngine has not been initialized!");
1344 return delete_result;
1345 }
1346
1347 DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
1348 delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
1349
1350 std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
1351 // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
1352 // that can support error logging.
1353 DocumentStore::DeleteByGroupResult doc_store_result =
1354 document_store_->DeleteBySchemaType(schema_type);
1355 if (!doc_store_result.status.ok()) {
1356 ICING_LOG(ERROR) << doc_store_result.status.error_message()
1357 << "Failed to delete SchemaType: " << schema_type;
1358 TransformStatus(doc_store_result.status, result_status);
1359 return delete_result;
1360 }
1361
1362 result_status->set_code(StatusProto::OK);
1363 delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
1364 delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
1365 return delete_result;
1366 }
1367
DeleteByQuery(const SearchSpecProto & search_spec,bool return_deleted_document_info)1368 DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
1369 const SearchSpecProto& search_spec, bool return_deleted_document_info) {
1370 ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
1371 << " from doc store";
1372
1373 DeleteByQueryResultProto result_proto;
1374 StatusProto* result_status = result_proto.mutable_status();
1375
1376 absl_ports::unique_lock l(&mutex_);
1377 if (!initialized_) {
1378 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1379 result_status->set_message("IcingSearchEngine has not been initialized!");
1380 return result_proto;
1381 }
1382
1383 DeleteByQueryStatsProto* delete_stats =
1384 result_proto.mutable_delete_by_query_stats();
1385 delete_stats->set_query_length(search_spec.query().length());
1386 delete_stats->set_num_namespaces_filtered(
1387 search_spec.namespace_filters_size());
1388 delete_stats->set_num_schema_types_filtered(
1389 search_spec.schema_type_filters_size());
1390
1391 ScopedTimer delete_timer(clock_->GetNewTimer(), [delete_stats](int64_t t) {
1392 delete_stats->set_latency_ms(t);
1393 });
1394 libtextclassifier3::Status status =
1395 ValidateSearchSpec(search_spec, performance_configuration_);
1396 if (!status.ok()) {
1397 TransformStatus(status, result_status);
1398 return result_proto;
1399 }
1400
1401 std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
1402 // Gets unordered results from query processor
1403 auto query_processor_or = QueryProcessor::Create(
1404 index_.get(), integer_index_.get(), language_segmenter_.get(),
1405 normalizer_.get(), document_store_.get(), schema_store_.get());
1406 if (!query_processor_or.ok()) {
1407 TransformStatus(query_processor_or.status(), result_status);
1408 delete_stats->set_parse_query_latency_ms(
1409 component_timer->GetElapsedMilliseconds());
1410 return result_proto;
1411 }
1412 std::unique_ptr<QueryProcessor> query_processor =
1413 std::move(query_processor_or).ValueOrDie();
1414
1415 int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1416 auto query_results_or = query_processor->ParseSearch(
1417 search_spec, ScoringSpecProto::RankingStrategy::NONE, current_time_ms);
1418 if (!query_results_or.ok()) {
1419 TransformStatus(query_results_or.status(), result_status);
1420 delete_stats->set_parse_query_latency_ms(
1421 component_timer->GetElapsedMilliseconds());
1422 return result_proto;
1423 }
1424 QueryResults query_results = std::move(query_results_or).ValueOrDie();
1425 delete_stats->set_parse_query_latency_ms(
1426 component_timer->GetElapsedMilliseconds());
1427
1428 ICING_VLOG(2) << "Deleting the docs that matched the query.";
1429 int num_deleted = 0;
1430 // A map used to group deleted documents.
1431 // From the (namespace, type) pair to a list of uris.
1432 std::unordered_map<NamespaceTypePair,
1433 DeleteByQueryResultProto::DocumentGroupInfo*,
1434 NamespaceTypePairHasher>
1435 deleted_info_map;
1436
1437 component_timer = clock_->GetNewTimer();
1438 while (query_results.root_iterator->Advance().ok()) {
1439 ICING_VLOG(3) << "Deleting doc "
1440 << query_results.root_iterator->doc_hit_info().document_id();
1441 ++num_deleted;
1442 if (return_deleted_document_info) {
1443 status = RetrieveAndAddDocumentInfo(
1444 document_store_.get(), result_proto, deleted_info_map,
1445 query_results.root_iterator->doc_hit_info().document_id());
1446 if (!status.ok()) {
1447 TransformStatus(status, result_status);
1448 delete_stats->set_document_removal_latency_ms(
1449 component_timer->GetElapsedMilliseconds());
1450 return result_proto;
1451 }
1452 }
1453 status = document_store_->Delete(
1454 query_results.root_iterator->doc_hit_info().document_id(),
1455 current_time_ms);
1456 if (!status.ok()) {
1457 TransformStatus(status, result_status);
1458 delete_stats->set_document_removal_latency_ms(
1459 component_timer->GetElapsedMilliseconds());
1460 return result_proto;
1461 }
1462 }
1463 delete_stats->set_document_removal_latency_ms(
1464 component_timer->GetElapsedMilliseconds());
1465 int term_count = 0;
1466 for (const auto& section_and_terms : query_results.query_terms) {
1467 term_count += section_and_terms.second.size();
1468 }
1469 delete_stats->set_num_terms(term_count);
1470
1471 if (num_deleted > 0) {
1472 result_proto.mutable_status()->set_code(StatusProto::OK);
1473 } else {
1474 result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
1475 result_proto.mutable_status()->set_message(
1476 "No documents matched the query to delete by!");
1477 }
1478 delete_stats->set_num_documents_deleted(num_deleted);
1479 return result_proto;
1480 }
1481
PersistToDisk(PersistType::Code persist_type)1482 PersistToDiskResultProto IcingSearchEngine::PersistToDisk(
1483 PersistType::Code persist_type) {
1484 ICING_VLOG(1) << "Persisting data to disk";
1485
1486 PersistToDiskResultProto result_proto;
1487 StatusProto* result_status = result_proto.mutable_status();
1488
1489 absl_ports::unique_lock l(&mutex_);
1490 if (!initialized_) {
1491 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1492 result_status->set_message("IcingSearchEngine has not been initialized!");
1493 return result_proto;
1494 }
1495
1496 auto status = InternalPersistToDisk(persist_type);
1497 TransformStatus(status, result_status);
1498 return result_proto;
1499 }
1500
1501 // Optimizes Icing's storage
1502 //
1503 // Steps:
1504 // 1. Flush data to disk.
1505 // 2. Copy data needed to a tmp directory.
1506 // 3. Swap current directory and tmp directory.
Optimize()1507 OptimizeResultProto IcingSearchEngine::Optimize() {
1508 ICING_VLOG(1) << "Optimizing icing storage";
1509
1510 OptimizeResultProto result_proto;
1511 StatusProto* result_status = result_proto.mutable_status();
1512
1513 absl_ports::unique_lock l(&mutex_);
1514 if (!initialized_) {
1515 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1516 result_status->set_message("IcingSearchEngine has not been initialized!");
1517 return result_proto;
1518 }
1519
1520 OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
1521 ScopedTimer optimize_timer(
1522 clock_->GetNewTimer(),
1523 [optimize_stats](int64_t t) { optimize_stats->set_latency_ms(t); });
1524
1525 // Flushes data to disk before doing optimization
1526 auto status = InternalPersistToDisk(PersistType::FULL);
1527 if (!status.ok()) {
1528 TransformStatus(status, result_status);
1529 return result_proto;
1530 }
1531
1532 int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
1533 optimize_stats->set_storage_size_before(
1534 Filesystem::SanitizeFileSize(before_size));
1535
1536 // TODO(b/143646633): figure out if we need to optimize index and doc store
1537 // at the same time.
1538 std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
1539 libtextclassifier3::StatusOr<std::vector<DocumentId>>
1540 document_id_old_to_new_or = OptimizeDocumentStore(optimize_stats);
1541 optimize_stats->set_document_store_optimize_latency_ms(
1542 optimize_doc_store_timer->GetElapsedMilliseconds());
1543
1544 if (!document_id_old_to_new_or.ok() &&
1545 !absl_ports::IsDataLoss(document_id_old_to_new_or.status())) {
1546 // The status now is either ABORTED_ERROR or INTERNAL_ERROR.
1547 // If ABORTED_ERROR, Icing should still be working.
1548 // If INTERNAL_ERROR, we're having IO errors or other errors that we can't
1549 // recover from.
1550 TransformStatus(document_id_old_to_new_or.status(), result_status);
1551 return result_proto;
1552 }
1553
1554 // The status is either OK or DATA_LOSS. The optimized document store is
1555 // guaranteed to work, so we update index according to the new document store.
1556 std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
1557 bool should_rebuild_index =
1558 !document_id_old_to_new_or.ok() ||
1559 ShouldRebuildIndex(*optimize_stats,
1560 options_.optimize_rebuild_index_threshold());
1561 if (!should_rebuild_index) {
1562 optimize_stats->set_index_restoration_mode(
1563 OptimizeStatsProto::INDEX_TRANSLATION);
1564 libtextclassifier3::Status index_optimize_status =
1565 index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
1566 document_store_->last_added_document_id());
1567 if (!index_optimize_status.ok()) {
1568 ICING_LOG(WARNING) << "Failed to optimize index. Error: "
1569 << index_optimize_status.error_message();
1570 should_rebuild_index = true;
1571 }
1572
1573 libtextclassifier3::Status integer_index_optimize_status =
1574 integer_index_->Optimize(document_id_old_to_new_or.ValueOrDie(),
1575 document_store_->last_added_document_id());
1576 if (!integer_index_optimize_status.ok()) {
1577 ICING_LOG(WARNING) << "Failed to optimize integer index. Error: "
1578 << integer_index_optimize_status.error_message();
1579 should_rebuild_index = true;
1580 }
1581
1582 libtextclassifier3::Status qualified_id_join_index_optimize_status =
1583 qualified_id_join_index_->Optimize(
1584 document_id_old_to_new_or.ValueOrDie(),
1585 document_store_->last_added_document_id());
1586 if (!qualified_id_join_index_optimize_status.ok()) {
1587 ICING_LOG(WARNING)
1588 << "Failed to optimize qualified id join index. Error: "
1589 << qualified_id_join_index_optimize_status.error_message();
1590 should_rebuild_index = true;
1591 }
1592 }
1593 // If we received a DATA_LOSS error from OptimizeDocumentStore, we have a
1594 // valid document store, but it might be the old one or the new one. So throw
1595 // out the index data and rebuild from scratch.
1596 // Likewise, if Index::Optimize failed, then attempt to recover the index by
1597 // rebuilding from scratch.
1598 // If ShouldRebuildIndex() returns true, we will also rebuild the index for
1599 // better performance.
1600 if (should_rebuild_index) {
1601 optimize_stats->set_index_restoration_mode(
1602 OptimizeStatsProto::FULL_INDEX_REBUILD);
1603 ICING_LOG(WARNING) << "Clearing the entire index!";
1604
1605 libtextclassifier3::Status index_clear_status = ClearAllIndices();
1606 if (!index_clear_status.ok()) {
1607 status = absl_ports::Annotate(
1608 absl_ports::InternalError("Failed to clear index."),
1609 index_clear_status.error_message());
1610 TransformStatus(status, result_status);
1611 optimize_stats->set_index_restoration_latency_ms(
1612 optimize_index_timer->GetElapsedMilliseconds());
1613 return result_proto;
1614 }
1615
1616 IndexRestorationResult index_restoration_status = RestoreIndexIfNeeded();
1617 // DATA_LOSS means that we have successfully re-added content to the index.
1618 // Some indexed content was lost, but otherwise the index is in a valid
1619 // state and can be queried.
1620 if (!index_restoration_status.status.ok() &&
1621 !absl_ports::IsDataLoss(index_restoration_status.status)) {
1622 status = absl_ports::Annotate(
1623 absl_ports::InternalError(
1624 "Failed to reindex documents after optimization."),
1625 index_restoration_status.status.error_message());
1626
1627 TransformStatus(status, result_status);
1628 optimize_stats->set_index_restoration_latency_ms(
1629 optimize_index_timer->GetElapsedMilliseconds());
1630 return result_proto;
1631 }
1632 }
1633 optimize_stats->set_index_restoration_latency_ms(
1634 optimize_index_timer->GetElapsedMilliseconds());
1635
1636 // Read the optimize status to get the time that we last ran.
1637 std::string optimize_status_filename =
1638 absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
1639 FileBackedProto<OptimizeStatusProto> optimize_status_file(
1640 *filesystem_, optimize_status_filename);
1641 auto optimize_status_or = optimize_status_file.Read();
1642 int64_t current_time = clock_->GetSystemTimeMilliseconds();
1643 if (optimize_status_or.ok()) {
1644 // If we have trouble reading the status or this is the first time that
1645 // we've ever run, don't set this field.
1646 optimize_stats->set_time_since_last_optimize_ms(
1647 current_time - optimize_status_or.ValueOrDie()
1648 ->last_successful_optimize_run_time_ms());
1649 }
1650
1651 // Update the status for this run and write it.
1652 auto optimize_status = std::make_unique<OptimizeStatusProto>();
1653 optimize_status->set_last_successful_optimize_run_time_ms(current_time);
1654 optimize_status_file.Write(std::move(optimize_status));
1655
1656 // Flushes data to disk after doing optimization
1657 status = InternalPersistToDisk(PersistType::FULL);
1658 if (!status.ok()) {
1659 TransformStatus(status, result_status);
1660 return result_proto;
1661 }
1662
1663 int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
1664 optimize_stats->set_storage_size_after(
1665 Filesystem::SanitizeFileSize(after_size));
1666
1667 TransformStatus(document_id_old_to_new_or.status(), result_status);
1668 return result_proto;
1669 }
1670
GetOptimizeInfo()1671 GetOptimizeInfoResultProto IcingSearchEngine::GetOptimizeInfo() {
1672 ICING_VLOG(1) << "Getting optimize info from IcingSearchEngine";
1673
1674 GetOptimizeInfoResultProto result_proto;
1675 StatusProto* result_status = result_proto.mutable_status();
1676
1677 absl_ports::shared_lock l(&mutex_);
1678 if (!initialized_) {
1679 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1680 result_status->set_message("IcingSearchEngine has not been initialized!");
1681 return result_proto;
1682 }
1683
1684 // Read the optimize status to get the time that we last ran.
1685 std::string optimize_status_filename =
1686 absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
1687 FileBackedProto<OptimizeStatusProto> optimize_status_file(
1688 *filesystem_, optimize_status_filename);
1689 auto optimize_status_or = optimize_status_file.Read();
1690 int64_t current_time = clock_->GetSystemTimeMilliseconds();
1691
1692 if (optimize_status_or.ok()) {
1693 // If we have trouble reading the status or this is the first time that
1694 // we've ever run, don't set this field.
1695 result_proto.set_time_since_last_optimize_ms(
1696 current_time - optimize_status_or.ValueOrDie()
1697 ->last_successful_optimize_run_time_ms());
1698 }
1699
1700 // Get stats from DocumentStore
1701 auto doc_store_optimize_info_or = document_store_->GetOptimizeInfo();
1702 if (!doc_store_optimize_info_or.ok()) {
1703 TransformStatus(doc_store_optimize_info_or.status(), result_status);
1704 return result_proto;
1705 }
1706 DocumentStore::OptimizeInfo doc_store_optimize_info =
1707 doc_store_optimize_info_or.ValueOrDie();
1708 result_proto.set_optimizable_docs(doc_store_optimize_info.optimizable_docs);
1709
1710 if (doc_store_optimize_info.optimizable_docs == 0) {
1711 // Can return early since there's nothing to calculate on the index side
1712 result_proto.set_estimated_optimizable_bytes(0);
1713 result_status->set_code(StatusProto::OK);
1714 return result_proto;
1715 }
1716
1717 // Get stats from Index.
1718 auto index_elements_size_or = index_->GetElementsSize();
1719 if (!index_elements_size_or.ok()) {
1720 TransformStatus(index_elements_size_or.status(), result_status);
1721 return result_proto;
1722 }
1723 int64_t index_elements_size = index_elements_size_or.ValueOrDie();
1724
1725 // TODO(b/259744228): add stats for integer index
1726
1727 // Sum up the optimizable sizes from DocumentStore and Index
1728 result_proto.set_estimated_optimizable_bytes(
1729 index_elements_size * doc_store_optimize_info.optimizable_docs /
1730 doc_store_optimize_info.total_docs +
1731 doc_store_optimize_info.estimated_optimizable_bytes);
1732
1733 result_status->set_code(StatusProto::OK);
1734 return result_proto;
1735 }
1736
GetStorageInfo()1737 StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
1738 StorageInfoResultProto result;
1739 absl_ports::shared_lock l(&mutex_);
1740 if (!initialized_) {
1741 result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
1742 result.mutable_status()->set_message(
1743 "IcingSearchEngine has not been initialized!");
1744 return result;
1745 }
1746
1747 int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
1748 result.mutable_storage_info()->set_total_storage_size(
1749 Filesystem::SanitizeFileSize(index_size));
1750 *result.mutable_storage_info()->mutable_document_storage_info() =
1751 document_store_->GetStorageInfo();
1752 *result.mutable_storage_info()->mutable_schema_store_storage_info() =
1753 schema_store_->GetStorageInfo();
1754 *result.mutable_storage_info()->mutable_index_storage_info() =
1755 index_->GetStorageInfo();
1756 // TODO(b/259744228): add stats for integer index
1757 result.mutable_status()->set_code(StatusProto::OK);
1758 return result;
1759 }
1760
GetDebugInfo(DebugInfoVerbosity::Code verbosity)1761 DebugInfoResultProto IcingSearchEngine::GetDebugInfo(
1762 DebugInfoVerbosity::Code verbosity) {
1763 DebugInfoResultProto debug_info;
1764 StatusProto* result_status = debug_info.mutable_status();
1765 absl_ports::shared_lock l(&mutex_);
1766 if (!initialized_) {
1767 debug_info.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
1768 debug_info.mutable_status()->set_message(
1769 "IcingSearchEngine has not been initialized!");
1770 return debug_info;
1771 }
1772
1773 // Index
1774 *debug_info.mutable_debug_info()->mutable_index_info() =
1775 index_->GetDebugInfo(verbosity);
1776
1777 // TODO(b/259744228): add debug info for integer index
1778
1779 // Document Store
1780 libtextclassifier3::StatusOr<DocumentDebugInfoProto> document_debug_info =
1781 document_store_->GetDebugInfo(verbosity);
1782 if (!document_debug_info.ok()) {
1783 TransformStatus(document_debug_info.status(), result_status);
1784 return debug_info;
1785 }
1786 *debug_info.mutable_debug_info()->mutable_document_info() =
1787 std::move(document_debug_info).ValueOrDie();
1788
1789 // Schema Store
1790 libtextclassifier3::StatusOr<SchemaDebugInfoProto> schema_debug_info =
1791 schema_store_->GetDebugInfo();
1792 if (!schema_debug_info.ok()) {
1793 TransformStatus(schema_debug_info.status(), result_status);
1794 return debug_info;
1795 }
1796 *debug_info.mutable_debug_info()->mutable_schema_info() =
1797 std::move(schema_debug_info).ValueOrDie();
1798
1799 result_status->set_code(StatusProto::OK);
1800 return debug_info;
1801 }
1802
InternalPersistToDisk(PersistType::Code persist_type)1803 libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
1804 PersistType::Code persist_type) {
1805 if (persist_type == PersistType::LITE) {
1806 return document_store_->PersistToDisk(persist_type);
1807 }
1808 ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
1809 ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
1810 ICING_RETURN_IF_ERROR(index_->PersistToDisk());
1811 ICING_RETURN_IF_ERROR(integer_index_->PersistToDisk());
1812 ICING_RETURN_IF_ERROR(qualified_id_join_index_->PersistToDisk());
1813
1814 return libtextclassifier3::Status::OK;
1815 }
1816
Search(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1817 SearchResultProto IcingSearchEngine::Search(
1818 const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1819 const ResultSpecProto& result_spec) {
1820 if (search_spec.use_read_only_search()) {
1821 return SearchLockedShared(search_spec, scoring_spec, result_spec);
1822 } else {
1823 return SearchLockedExclusive(search_spec, scoring_spec, result_spec);
1824 }
1825 }
1826
SearchLockedShared(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1827 SearchResultProto IcingSearchEngine::SearchLockedShared(
1828 const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1829 const ResultSpecProto& result_spec) {
1830 std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
1831
1832 // Only acquire an overall read-lock for this implementation. Finer-grained
1833 // locks are implemented around code paths that write changes to Icing's data
1834 // members.
1835 absl_ports::shared_lock l(&mutex_);
1836 int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
1837
1838 SearchResultProto result_proto =
1839 InternalSearch(search_spec, scoring_spec, result_spec);
1840
1841 result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
1842 lock_acquisition_latency);
1843 result_proto.mutable_query_stats()->set_latency_ms(
1844 overall_timer->GetElapsedMilliseconds());
1845 return result_proto;
1846 }
1847
SearchLockedExclusive(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1848 SearchResultProto IcingSearchEngine::SearchLockedExclusive(
1849 const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1850 const ResultSpecProto& result_spec) {
1851 std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
1852
1853 // Acquire the overall write-lock for this locked implementation.
1854 absl_ports::unique_lock l(&mutex_);
1855 int64_t lock_acquisition_latency = overall_timer->GetElapsedMilliseconds();
1856
1857 SearchResultProto result_proto =
1858 InternalSearch(search_spec, scoring_spec, result_spec);
1859
1860 result_proto.mutable_query_stats()->set_lock_acquisition_latency_ms(
1861 lock_acquisition_latency);
1862 result_proto.mutable_query_stats()->set_latency_ms(
1863 overall_timer->GetElapsedMilliseconds());
1864 return result_proto;
1865 }
1866
InternalSearch(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec)1867 SearchResultProto IcingSearchEngine::InternalSearch(
1868 const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
1869 const ResultSpecProto& result_spec) {
1870 SearchResultProto result_proto;
1871 StatusProto* result_status = result_proto.mutable_status();
1872
1873 QueryStatsProto* query_stats = result_proto.mutable_query_stats();
1874 query_stats->set_query_length(search_spec.query().length());
1875 if (!initialized_) {
1876 result_status->set_code(StatusProto::FAILED_PRECONDITION);
1877 result_status->set_message("IcingSearchEngine has not been initialized!");
1878 return result_proto;
1879 }
1880
1881 libtextclassifier3::Status status =
1882 ValidateResultSpec(document_store_.get(), result_spec);
1883 if (!status.ok()) {
1884 TransformStatus(status, result_status);
1885 return result_proto;
1886 }
1887 status = ValidateSearchSpec(search_spec, performance_configuration_);
1888 if (!status.ok()) {
1889 TransformStatus(status, result_status);
1890 return result_proto;
1891 }
1892
1893 query_stats->set_num_namespaces_filtered(
1894 search_spec.namespace_filters_size());
1895 query_stats->set_num_schema_types_filtered(
1896 search_spec.schema_type_filters_size());
1897 query_stats->set_ranking_strategy(scoring_spec.rank_by());
1898 query_stats->set_is_first_page(true);
1899 query_stats->set_requested_page_size(result_spec.num_per_page());
1900
1901 const JoinSpecProto& join_spec = search_spec.join_spec();
1902 std::unique_ptr<JoinChildrenFetcher> join_children_fetcher;
1903 std::unique_ptr<ResultAdjustmentInfo> child_result_adjustment_info;
1904 int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
1905 if (!join_spec.parent_property_expression().empty() &&
1906 !join_spec.child_property_expression().empty()) {
1907 // Process child query
1908 QueryScoringResults nested_query_scoring_results = ProcessQueryAndScore(
1909 join_spec.nested_spec().search_spec(),
1910 join_spec.nested_spec().scoring_spec(),
1911 join_spec.nested_spec().result_spec(),
1912 /*join_children_fetcher=*/nullptr, current_time_ms);
1913 // TOOD(b/256022027): set different kinds of latency for 2nd query.
1914 if (!nested_query_scoring_results.status.ok()) {
1915 TransformStatus(nested_query_scoring_results.status, result_status);
1916 return result_proto;
1917 }
1918
1919 JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
1920 qualified_id_join_index_.get(),
1921 current_time_ms);
1922 // Building a JoinChildrenFetcher where child documents are grouped by
1923 // their joinable values.
1924 libtextclassifier3::StatusOr<JoinChildrenFetcher> join_children_fetcher_or =
1925 join_processor.GetChildrenFetcher(
1926 search_spec.join_spec(),
1927 std::move(nested_query_scoring_results.scored_document_hits));
1928 if (!join_children_fetcher_or.ok()) {
1929 TransformStatus(join_children_fetcher_or.status(), result_status);
1930 return result_proto;
1931 }
1932 join_children_fetcher = std::make_unique<JoinChildrenFetcher>(
1933 std::move(join_children_fetcher_or).ValueOrDie());
1934
1935 // Assign child's ResultAdjustmentInfo.
1936 child_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
1937 join_spec.nested_spec().search_spec(),
1938 join_spec.nested_spec().scoring_spec(),
1939 join_spec.nested_spec().result_spec(), schema_store_.get(),
1940 std::move(nested_query_scoring_results.query_terms));
1941 }
1942
1943 // Process parent query
1944 QueryScoringResults query_scoring_results =
1945 ProcessQueryAndScore(search_spec, scoring_spec, result_spec,
1946 join_children_fetcher.get(), current_time_ms);
1947 int term_count = 0;
1948 for (const auto& section_and_terms : query_scoring_results.query_terms) {
1949 term_count += section_and_terms.second.size();
1950 }
1951 query_stats->set_num_terms(term_count);
1952 query_stats->set_parse_query_latency_ms(
1953 query_scoring_results.parse_query_latency_ms);
1954 query_stats->set_scoring_latency_ms(query_scoring_results.scoring_latency_ms);
1955 if (!query_scoring_results.status.ok()) {
1956 TransformStatus(query_scoring_results.status, result_status);
1957 return result_proto;
1958 }
1959
1960 query_stats->set_num_documents_scored(
1961 query_scoring_results.scored_document_hits.size());
1962 // Returns early for empty result
1963 if (query_scoring_results.scored_document_hits.empty()) {
1964 result_status->set_code(StatusProto::OK);
1965 return result_proto;
1966 }
1967
1968 // Construct parent's result adjustment info.
1969 auto parent_result_adjustment_info = std::make_unique<ResultAdjustmentInfo>(
1970 search_spec, scoring_spec, result_spec, schema_store_.get(),
1971 std::move(query_scoring_results.query_terms));
1972
1973 std::unique_ptr<ScoredDocumentHitsRanker> ranker;
1974 if (join_children_fetcher != nullptr) {
1975 std::unique_ptr<Timer> join_timer = clock_->GetNewTimer();
1976 // Join 2 scored document hits
1977 JoinProcessor join_processor(document_store_.get(), schema_store_.get(),
1978 qualified_id_join_index_.get(),
1979 current_time_ms);
1980 libtextclassifier3::StatusOr<std::vector<JoinedScoredDocumentHit>>
1981 joined_result_document_hits_or = join_processor.Join(
1982 join_spec, std::move(query_scoring_results.scored_document_hits),
1983 *join_children_fetcher);
1984 if (!joined_result_document_hits_or.ok()) {
1985 TransformStatus(joined_result_document_hits_or.status(), result_status);
1986 return result_proto;
1987 }
1988 std::vector<JoinedScoredDocumentHit> joined_result_document_hits =
1989 std::move(joined_result_document_hits_or).ValueOrDie();
1990
1991 query_stats->set_join_latency_ms(join_timer->GetElapsedMilliseconds());
1992
1993 std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
1994 // Ranks results
1995 ranker = std::make_unique<
1996 PriorityQueueScoredDocumentHitsRanker<JoinedScoredDocumentHit>>(
1997 std::move(joined_result_document_hits),
1998 /*is_descending=*/scoring_spec.order_by() ==
1999 ScoringSpecProto::Order::DESC);
2000 query_stats->set_ranking_latency_ms(
2001 component_timer->GetElapsedMilliseconds());
2002 } else {
2003 // Non-join query
2004 std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
2005 // Ranks results
2006 ranker = std::make_unique<
2007 PriorityQueueScoredDocumentHitsRanker<ScoredDocumentHit>>(
2008 std::move(query_scoring_results.scored_document_hits),
2009 /*is_descending=*/scoring_spec.order_by() ==
2010 ScoringSpecProto::Order::DESC);
2011 query_stats->set_ranking_latency_ms(
2012 component_timer->GetElapsedMilliseconds());
2013 }
2014
2015 std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
2016 // CacheAndRetrieveFirstPage and retrieves the document protos and snippets if
2017 // requested
2018 auto result_retriever_or =
2019 ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
2020 language_segmenter_.get(), normalizer_.get());
2021 if (!result_retriever_or.ok()) {
2022 TransformStatus(result_retriever_or.status(), result_status);
2023 query_stats->set_document_retrieval_latency_ms(
2024 component_timer->GetElapsedMilliseconds());
2025 return result_proto;
2026 }
2027 std::unique_ptr<ResultRetrieverV2> result_retriever =
2028 std::move(result_retriever_or).ValueOrDie();
2029
2030 libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
2031 page_result_info_or = result_state_manager_->CacheAndRetrieveFirstPage(
2032 std::move(ranker), std::move(parent_result_adjustment_info),
2033 std::move(child_result_adjustment_info), result_spec,
2034 *document_store_, *result_retriever, current_time_ms);
2035 if (!page_result_info_or.ok()) {
2036 TransformStatus(page_result_info_or.status(), result_status);
2037 query_stats->set_document_retrieval_latency_ms(
2038 component_timer->GetElapsedMilliseconds());
2039 return result_proto;
2040 }
2041 std::pair<uint64_t, PageResult> page_result_info =
2042 std::move(page_result_info_or).ValueOrDie();
2043
2044 // Assembles the final search result proto
2045 result_proto.mutable_results()->Reserve(
2046 page_result_info.second.results.size());
2047
2048 int32_t child_count = 0;
2049 for (SearchResultProto::ResultProto& result :
2050 page_result_info.second.results) {
2051 child_count += result.joined_results_size();
2052 result_proto.mutable_results()->Add(std::move(result));
2053 }
2054
2055 result_status->set_code(StatusProto::OK);
2056 if (page_result_info.first != kInvalidNextPageToken) {
2057 result_proto.set_next_page_token(page_result_info.first);
2058 }
2059
2060 query_stats->set_document_retrieval_latency_ms(
2061 component_timer->GetElapsedMilliseconds());
2062 query_stats->set_num_results_returned_current_page(
2063 result_proto.results_size());
2064
2065 query_stats->set_num_joined_results_returned_current_page(child_count);
2066
2067 query_stats->set_num_results_with_snippets(
2068 page_result_info.second.num_results_with_snippets);
2069 return result_proto;
2070 }
2071
ProcessQueryAndScore(const SearchSpecProto & search_spec,const ScoringSpecProto & scoring_spec,const ResultSpecProto & result_spec,const JoinChildrenFetcher * join_children_fetcher,int64_t current_time_ms)2072 IcingSearchEngine::QueryScoringResults IcingSearchEngine::ProcessQueryAndScore(
2073 const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
2074 const ResultSpecProto& result_spec,
2075 const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) {
2076 std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
2077
2078 // Gets unordered results from query processor
2079 auto query_processor_or = QueryProcessor::Create(
2080 index_.get(), integer_index_.get(), language_segmenter_.get(),
2081 normalizer_.get(), document_store_.get(), schema_store_.get());
2082 if (!query_processor_or.ok()) {
2083 return QueryScoringResults(
2084 std::move(query_processor_or).status(), /*query_terms_in=*/{},
2085 /*scored_document_hits_in=*/{},
2086 /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
2087 /*scoring_latency_ms_in=*/0);
2088 }
2089 std::unique_ptr<QueryProcessor> query_processor =
2090 std::move(query_processor_or).ValueOrDie();
2091
2092 auto ranking_strategy_or = GetRankingStrategyFromScoringSpec(scoring_spec);
2093 libtextclassifier3::StatusOr<QueryResults> query_results_or;
2094 if (ranking_strategy_or.ok()) {
2095 query_results_or = query_processor->ParseSearch(
2096 search_spec, ranking_strategy_or.ValueOrDie(), current_time_ms);
2097 } else {
2098 query_results_or = ranking_strategy_or.status();
2099 }
2100 if (!query_results_or.ok()) {
2101 return QueryScoringResults(
2102 std::move(query_results_or).status(), /*query_terms_in=*/{},
2103 /*scored_document_hits_in=*/{},
2104 /*parse_query_latency_ms_in=*/component_timer->GetElapsedMilliseconds(),
2105 /*scoring_latency_ms_in=*/0);
2106 }
2107 QueryResults query_results = std::move(query_results_or).ValueOrDie();
2108 int64_t parse_query_latency_ms = component_timer->GetElapsedMilliseconds();
2109
2110 component_timer = clock_->GetNewTimer();
2111 // Scores but does not rank the results.
2112 libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
2113 scoring_processor_or = ScoringProcessor::Create(
2114 scoring_spec, document_store_.get(), schema_store_.get(),
2115 current_time_ms, join_children_fetcher);
2116 if (!scoring_processor_or.ok()) {
2117 return QueryScoringResults(std::move(scoring_processor_or).status(),
2118 std::move(query_results.query_terms),
2119 /*scored_document_hits_in=*/{},
2120 parse_query_latency_ms,
2121 /*scoring_latency_ms_in=*/0);
2122 }
2123 std::unique_ptr<ScoringProcessor> scoring_processor =
2124 std::move(scoring_processor_or).ValueOrDie();
2125 std::vector<ScoredDocumentHit> scored_document_hits =
2126 scoring_processor->Score(std::move(query_results.root_iterator),
2127 performance_configuration_.num_to_score,
2128 &query_results.query_term_iterators);
2129 int64_t scoring_latency_ms = component_timer->GetElapsedMilliseconds();
2130
2131 return QueryScoringResults(libtextclassifier3::Status::OK,
2132 std::move(query_results.query_terms),
2133 std::move(scored_document_hits),
2134 parse_query_latency_ms, scoring_latency_ms);
2135 }
2136
GetNextPage(uint64_t next_page_token)2137 SearchResultProto IcingSearchEngine::GetNextPage(uint64_t next_page_token) {
2138 SearchResultProto result_proto;
2139 StatusProto* result_status = result_proto.mutable_status();
2140
2141 QueryStatsProto* query_stats = result_proto.mutable_query_stats();
2142 query_stats->set_is_first_page(false);
2143 std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
2144 // ResultStateManager has its own writer lock, so here we only need a reader
2145 // lock for other components.
2146 absl_ports::shared_lock l(&mutex_);
2147 query_stats->set_lock_acquisition_latency_ms(
2148 overall_timer->GetElapsedMilliseconds());
2149 if (!initialized_) {
2150 result_status->set_code(StatusProto::FAILED_PRECONDITION);
2151 result_status->set_message("IcingSearchEngine has not been initialized!");
2152 return result_proto;
2153 }
2154
2155 auto result_retriever_or =
2156 ResultRetrieverV2::Create(document_store_.get(), schema_store_.get(),
2157 language_segmenter_.get(), normalizer_.get());
2158 if (!result_retriever_or.ok()) {
2159 TransformStatus(result_retriever_or.status(), result_status);
2160 return result_proto;
2161 }
2162 std::unique_ptr<ResultRetrieverV2> result_retriever =
2163 std::move(result_retriever_or).ValueOrDie();
2164
2165 int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
2166 libtextclassifier3::StatusOr<std::pair<uint64_t, PageResult>>
2167 page_result_info_or = result_state_manager_->GetNextPage(
2168 next_page_token, *result_retriever, current_time_ms);
2169 if (!page_result_info_or.ok()) {
2170 if (absl_ports::IsNotFound(page_result_info_or.status())) {
2171 // NOT_FOUND means an empty result.
2172 result_status->set_code(StatusProto::OK);
2173 } else {
2174 // Real error, pass up.
2175 TransformStatus(page_result_info_or.status(), result_status);
2176 }
2177 return result_proto;
2178 }
2179
2180 std::pair<uint64_t, PageResult> page_result_info =
2181 std::move(page_result_info_or).ValueOrDie();
2182 query_stats->set_requested_page_size(
2183 page_result_info.second.requested_page_size);
2184
2185 // Assembles the final search result proto
2186 result_proto.mutable_results()->Reserve(
2187 page_result_info.second.results.size());
2188
2189 int32_t child_count = 0;
2190 for (SearchResultProto::ResultProto& result :
2191 page_result_info.second.results) {
2192 child_count += result.joined_results_size();
2193 result_proto.mutable_results()->Add(std::move(result));
2194 }
2195
2196 result_status->set_code(StatusProto::OK);
2197 if (page_result_info.first != kInvalidNextPageToken) {
2198 result_proto.set_next_page_token(page_result_info.first);
2199 }
2200
2201 // The only thing that we're doing is document retrieval. So document
2202 // retrieval latency and overall latency are the same and can use the same
2203 // timer.
2204 query_stats->set_document_retrieval_latency_ms(
2205 overall_timer->GetElapsedMilliseconds());
2206 query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
2207 query_stats->set_num_results_returned_current_page(
2208 result_proto.results_size());
2209 query_stats->set_num_results_with_snippets(
2210 page_result_info.second.num_results_with_snippets);
2211 query_stats->set_num_joined_results_returned_current_page(child_count);
2212
2213 return result_proto;
2214 }
2215
InvalidateNextPageToken(uint64_t next_page_token)2216 void IcingSearchEngine::InvalidateNextPageToken(uint64_t next_page_token) {
2217 absl_ports::shared_lock l(&mutex_);
2218 if (!initialized_) {
2219 ICING_LOG(ERROR) << "IcingSearchEngine has not been initialized!";
2220 return;
2221 }
2222 result_state_manager_->InvalidateResultState(next_page_token);
2223 }
2224
2225 libtextclassifier3::StatusOr<std::vector<DocumentId>>
OptimizeDocumentStore(OptimizeStatsProto * optimize_stats)2226 IcingSearchEngine::OptimizeDocumentStore(OptimizeStatsProto* optimize_stats) {
2227 // Gets the current directory path and an empty tmp directory path for
2228 // document store optimization.
2229 const std::string current_document_dir =
2230 MakeDocumentDirectoryPath(options_.base_dir());
2231 const std::string temporary_document_dir =
2232 MakeDocumentTemporaryDirectoryPath(options_.base_dir());
2233 if (!filesystem_->DeleteDirectoryRecursively(
2234 temporary_document_dir.c_str()) ||
2235 !filesystem_->CreateDirectoryRecursively(
2236 temporary_document_dir.c_str())) {
2237 return absl_ports::AbortedError(absl_ports::StrCat(
2238 "Failed to create a tmp directory: ", temporary_document_dir));
2239 }
2240
2241 // Copies valid document data to tmp directory
2242 libtextclassifier3::StatusOr<std::vector<DocumentId>>
2243 document_id_old_to_new_or = document_store_->OptimizeInto(
2244 temporary_document_dir, language_segmenter_.get(),
2245 options_.document_store_namespace_id_fingerprint(), optimize_stats);
2246
2247 // Handles error if any
2248 if (!document_id_old_to_new_or.ok()) {
2249 filesystem_->DeleteDirectoryRecursively(temporary_document_dir.c_str());
2250 return absl_ports::Annotate(
2251 absl_ports::AbortedError("Failed to optimize document store"),
2252 document_id_old_to_new_or.status().error_message());
2253 }
2254
2255 // result_state_manager_ depends on document_store_. So we need to reset it at
2256 // the same time that we reset the document_store_.
2257 result_state_manager_.reset();
2258 document_store_.reset();
2259
2260 // When swapping files, always put the current working directory at the
2261 // second place because it is renamed at the latter position so we're less
2262 // vulnerable to errors.
2263 if (!filesystem_->SwapFiles(temporary_document_dir.c_str(),
2264 current_document_dir.c_str())) {
2265 ICING_LOG(ERROR) << "Failed to swap files";
2266
2267 // Ensures that current directory is still present.
2268 if (!filesystem_->CreateDirectoryRecursively(
2269 current_document_dir.c_str())) {
2270 // Can't even create the old directory. Mark as uninitialized and return
2271 // INTERNAL.
2272 initialized_ = false;
2273 return absl_ports::InternalError(
2274 "Failed to create file directory for document store");
2275 }
2276
2277 // Tries to rebuild document store if swapping fails, to avoid leaving the
2278 // system in the broken state for future operations.
2279 auto create_result_or = DocumentStore::Create(
2280 filesystem_.get(), current_document_dir, clock_.get(),
2281 schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
2282 options_.document_store_namespace_id_fingerprint(),
2283 options_.compression_level(), /*initialize_stats=*/nullptr);
2284 // TODO(b/144458732): Implement a more robust version of
2285 // TC_ASSIGN_OR_RETURN that can support error logging.
2286 if (!create_result_or.ok()) {
2287 // Unable to create DocumentStore from the old file. Mark as uninitialized
2288 // and return INTERNAL.
2289 initialized_ = false;
2290 ICING_LOG(ERROR) << "Failed to create document store instance";
2291 return absl_ports::Annotate(
2292 absl_ports::InternalError("Failed to create document store instance"),
2293 create_result_or.status().error_message());
2294 }
2295 document_store_ = std::move(create_result_or.ValueOrDie().document_store);
2296 result_state_manager_ = std::make_unique<ResultStateManager>(
2297 performance_configuration_.max_num_total_hits, *document_store_);
2298
2299 // Potential data loss
2300 // TODO(b/147373249): Find a way to detect true data loss error
2301 return absl_ports::DataLossError(
2302 "Failed to optimize document store, there might be data loss");
2303 }
2304
2305 // Recreates the doc store instance
2306 auto create_result_or = DocumentStore::Create(
2307 filesystem_.get(), current_document_dir, clock_.get(),
2308 schema_store_.get(), /*force_recovery_and_revalidate_documents=*/false,
2309 options_.document_store_namespace_id_fingerprint(),
2310 options_.compression_level(), /*initialize_stats=*/nullptr);
2311 if (!create_result_or.ok()) {
2312 // Unable to create DocumentStore from the new file. Mark as uninitialized
2313 // and return INTERNAL.
2314 initialized_ = false;
2315 return absl_ports::InternalError(
2316 "Document store has been optimized, but a valid document store "
2317 "instance can't be created");
2318 }
2319 document_store_ = std::move(create_result_or.ValueOrDie().document_store);
2320 result_state_manager_ = std::make_unique<ResultStateManager>(
2321 performance_configuration_.max_num_total_hits, *document_store_);
2322
2323 // Deletes tmp directory
2324 if (!filesystem_->DeleteDirectoryRecursively(
2325 temporary_document_dir.c_str())) {
2326 ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
2327 "delete temporary file directory";
2328 }
2329 return document_id_old_to_new_or;
2330 }
2331
2332 IcingSearchEngine::IndexRestorationResult
RestoreIndexIfNeeded()2333 IcingSearchEngine::RestoreIndexIfNeeded() {
2334 DocumentId last_stored_document_id =
2335 document_store_->last_added_document_id();
2336 if (last_stored_document_id == index_->last_added_document_id() &&
2337 last_stored_document_id == integer_index_->last_added_document_id() &&
2338 last_stored_document_id ==
2339 qualified_id_join_index_->last_added_document_id()) {
2340 // No need to recover.
2341 return {libtextclassifier3::Status::OK, false, false, false};
2342 }
2343
2344 if (last_stored_document_id == kInvalidDocumentId) {
2345 // Document store is empty but index is not. Clear the index.
2346 return {ClearAllIndices(), false, false, false};
2347 }
2348
2349 // Truncate indices first.
2350 auto truncate_result_or = TruncateIndicesTo(last_stored_document_id);
2351 if (!truncate_result_or.ok()) {
2352 return {std::move(truncate_result_or).status(), false, false, false};
2353 }
2354 TruncateIndexResult truncate_result =
2355 std::move(truncate_result_or).ValueOrDie();
2356
2357 if (truncate_result.first_document_to_reindex > last_stored_document_id) {
2358 // Nothing to restore. Just return.
2359 return {libtextclassifier3::Status::OK, false, false, false};
2360 }
2361
2362 auto data_indexing_handlers_or = CreateDataIndexingHandlers();
2363 if (!data_indexing_handlers_or.ok()) {
2364 return {data_indexing_handlers_or.status(),
2365 truncate_result.index_needed_restoration,
2366 truncate_result.integer_index_needed_restoration,
2367 truncate_result.qualified_id_join_index_needed_restoration};
2368 }
2369 // By using recovery_mode for IndexProcessor, we're able to replay documents
2370 // from smaller document id and it will skip documents that are already been
2371 // indexed.
2372 IndexProcessor index_processor(
2373 std::move(data_indexing_handlers_or).ValueOrDie(), clock_.get(),
2374 /*recovery_mode=*/true);
2375
2376 ICING_VLOG(1) << "Restoring index by replaying documents from document id "
2377 << truncate_result.first_document_to_reindex
2378 << " to document id " << last_stored_document_id;
2379 libtextclassifier3::Status overall_status;
2380 for (DocumentId document_id = truncate_result.first_document_to_reindex;
2381 document_id <= last_stored_document_id; ++document_id) {
2382 libtextclassifier3::StatusOr<DocumentProto> document_or =
2383 document_store_->Get(document_id);
2384
2385 if (!document_or.ok()) {
2386 if (absl_ports::IsInvalidArgument(document_or.status()) ||
2387 absl_ports::IsNotFound(document_or.status())) {
2388 // Skips invalid and non-existing documents.
2389 continue;
2390 } else {
2391 // Returns other errors
2392 return {document_or.status(), truncate_result.index_needed_restoration,
2393 truncate_result.integer_index_needed_restoration,
2394 truncate_result.qualified_id_join_index_needed_restoration};
2395 }
2396 }
2397 DocumentProto document(std::move(document_or).ValueOrDie());
2398
2399 libtextclassifier3::StatusOr<TokenizedDocument> tokenized_document_or =
2400 TokenizedDocument::Create(schema_store_.get(),
2401 language_segmenter_.get(),
2402 std::move(document));
2403 if (!tokenized_document_or.ok()) {
2404 return {tokenized_document_or.status(),
2405 truncate_result.index_needed_restoration,
2406 truncate_result.integer_index_needed_restoration,
2407 truncate_result.qualified_id_join_index_needed_restoration};
2408 }
2409 TokenizedDocument tokenized_document(
2410 std::move(tokenized_document_or).ValueOrDie());
2411
2412 libtextclassifier3::Status status =
2413 index_processor.IndexDocument(tokenized_document, document_id);
2414 if (!status.ok()) {
2415 if (!absl_ports::IsDataLoss(status)) {
2416 // Real error. Stop recovering and pass it up.
2417 return {status, truncate_result.index_needed_restoration,
2418 truncate_result.integer_index_needed_restoration,
2419 truncate_result.qualified_id_join_index_needed_restoration};
2420 }
2421 // FIXME: why can we skip data loss error here?
2422 // Just a data loss. Keep trying to add the remaining docs, but report the
2423 // data loss when we're done.
2424 overall_status = status;
2425 }
2426 }
2427
2428 return {overall_status, truncate_result.index_needed_restoration,
2429 truncate_result.integer_index_needed_restoration,
2430 truncate_result.qualified_id_join_index_needed_restoration};
2431 }
2432
LostPreviousSchema()2433 libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
2434 auto status_or = schema_store_->GetSchema();
2435 if (status_or.ok()) {
2436 // Found a schema.
2437 return false;
2438 }
2439
2440 if (!absl_ports::IsNotFound(status_or.status())) {
2441 // Any other type of error
2442 return status_or.status();
2443 }
2444
2445 // We know: We don't have a schema now.
2446 //
2447 // We know: If no documents have been added, then the last_added_document_id
2448 // will be invalid.
2449 //
2450 // So: If documents have been added before and we don't have a schema now,
2451 // then that means we must have had a schema at some point. Since we wouldn't
2452 // accept documents without a schema to validate them against.
2453 return document_store_->last_added_document_id() != kInvalidDocumentId;
2454 }
2455
2456 libtextclassifier3::StatusOr<std::vector<std::unique_ptr<DataIndexingHandler>>>
CreateDataIndexingHandlers()2457 IcingSearchEngine::CreateDataIndexingHandlers() {
2458 std::vector<std::unique_ptr<DataIndexingHandler>> handlers;
2459
2460 // Term index handler
2461 ICING_ASSIGN_OR_RETURN(std::unique_ptr<StringSectionIndexingHandler>
2462 string_section_indexing_handler,
2463 StringSectionIndexingHandler::Create(
2464 clock_.get(), normalizer_.get(), index_.get()));
2465 handlers.push_back(std::move(string_section_indexing_handler));
2466
2467 // Integer index handler
2468 ICING_ASSIGN_OR_RETURN(std::unique_ptr<IntegerSectionIndexingHandler>
2469 integer_section_indexing_handler,
2470 IntegerSectionIndexingHandler::Create(
2471 clock_.get(), integer_index_.get()));
2472 handlers.push_back(std::move(integer_section_indexing_handler));
2473
2474 // Qualified id joinable property index handler
2475 ICING_ASSIGN_OR_RETURN(std::unique_ptr<QualifiedIdJoinIndexingHandler>
2476 qualified_id_joinable_property_indexing_handler,
2477 QualifiedIdJoinIndexingHandler::Create(
2478 clock_.get(), qualified_id_join_index_.get()));
2479 handlers.push_back(
2480 std::move(qualified_id_joinable_property_indexing_handler));
2481
2482 return handlers;
2483 }
2484
2485 libtextclassifier3::StatusOr<IcingSearchEngine::TruncateIndexResult>
TruncateIndicesTo(DocumentId last_stored_document_id)2486 IcingSearchEngine::TruncateIndicesTo(DocumentId last_stored_document_id) {
2487 // Attempt to truncate term index.
2488 // TruncateTo ensures that the index does not hold any data that is not
2489 // present in the ground truth. If the document store lost some documents,
2490 // TruncateTo will ensure that the index does not contain any hits from those
2491 // lost documents. If the index does not contain any hits for documents with
2492 // document id greater than last_stored_document_id, then TruncateTo will have
2493 // no effect.
2494 ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
2495
2496 // Get last indexed document id for term index after truncating.
2497 DocumentId term_index_last_added_document_id =
2498 index_->last_added_document_id();
2499 DocumentId first_document_to_reindex =
2500 (term_index_last_added_document_id != kInvalidDocumentId)
2501 ? term_index_last_added_document_id + 1
2502 : kMinDocumentId;
2503 bool index_needed_restoration =
2504 (last_stored_document_id != term_index_last_added_document_id);
2505
2506 // Attempt to truncate integer index.
2507 bool integer_index_needed_restoration = false;
2508 DocumentId integer_index_last_added_document_id =
2509 integer_index_->last_added_document_id();
2510 if (integer_index_last_added_document_id == kInvalidDocumentId ||
2511 last_stored_document_id > integer_index_last_added_document_id) {
2512 // If last_stored_document_id is greater than
2513 // integer_index_last_added_document_id, then we only have to replay docs
2514 // starting from integer_index_last_added_document_id + 1. Also use std::min
2515 // since we might need to replay even smaller doc ids for term index.
2516 integer_index_needed_restoration = true;
2517 if (integer_index_last_added_document_id != kInvalidDocumentId) {
2518 first_document_to_reindex = std::min(
2519 first_document_to_reindex, integer_index_last_added_document_id + 1);
2520 } else {
2521 first_document_to_reindex = kMinDocumentId;
2522 }
2523 } else if (last_stored_document_id < integer_index_last_added_document_id) {
2524 // Clear the entire integer index if last_stored_document_id is smaller than
2525 // integer_index_last_added_document_id, because there is no way to remove
2526 // data with doc_id > last_stored_document_id from integer index and we have
2527 // to rebuild.
2528 ICING_RETURN_IF_ERROR(integer_index_->Clear());
2529
2530 // Since the entire integer index is discarded, we start to rebuild it by
2531 // setting first_document_to_reindex to kMinDocumentId.
2532 integer_index_needed_restoration = true;
2533 first_document_to_reindex = kMinDocumentId;
2534 }
2535
2536 // Attempt to truncate qualified id join index
2537 bool qualified_id_join_index_needed_restoration = false;
2538 DocumentId qualified_id_join_index_last_added_document_id =
2539 qualified_id_join_index_->last_added_document_id();
2540 if (qualified_id_join_index_last_added_document_id == kInvalidDocumentId ||
2541 last_stored_document_id >
2542 qualified_id_join_index_last_added_document_id) {
2543 // If last_stored_document_id is greater than
2544 // qualified_id_join_index_last_added_document_id, then we only have to
2545 // replay docs starting from (qualified_id_join_index_last_added_document_id
2546 // + 1). Also use std::min since we might need to replay even smaller doc
2547 // ids for other components.
2548 qualified_id_join_index_needed_restoration = true;
2549 if (qualified_id_join_index_last_added_document_id != kInvalidDocumentId) {
2550 first_document_to_reindex =
2551 std::min(first_document_to_reindex,
2552 qualified_id_join_index_last_added_document_id + 1);
2553 } else {
2554 first_document_to_reindex = kMinDocumentId;
2555 }
2556 } else if (last_stored_document_id <
2557 qualified_id_join_index_last_added_document_id) {
2558 // Clear the entire qualified id join index if last_stored_document_id is
2559 // smaller than qualified_id_join_index_last_added_document_id, because
2560 // there is no way to remove data with doc_id > last_stored_document_id from
2561 // join index efficiently and we have to rebuild.
2562 ICING_RETURN_IF_ERROR(qualified_id_join_index_->Clear());
2563
2564 // Since the entire qualified id join index is discarded, we start to
2565 // rebuild it by setting first_document_to_reindex to kMinDocumentId.
2566 qualified_id_join_index_needed_restoration = true;
2567 first_document_to_reindex = kMinDocumentId;
2568 }
2569
2570 return TruncateIndexResult(first_document_to_reindex,
2571 index_needed_restoration,
2572 integer_index_needed_restoration,
2573 qualified_id_join_index_needed_restoration);
2574 }
2575
DiscardDerivedFiles()2576 libtextclassifier3::Status IcingSearchEngine::DiscardDerivedFiles() {
2577 if (schema_store_ != nullptr || document_store_ != nullptr ||
2578 index_ != nullptr || integer_index_ != nullptr ||
2579 qualified_id_join_index_ != nullptr) {
2580 return absl_ports::FailedPreconditionError(
2581 "Cannot discard derived files while having valid instances");
2582 }
2583
2584 // Schema store
2585 ICING_RETURN_IF_ERROR(
2586 SchemaStore::DiscardDerivedFiles(filesystem_.get(), options_.base_dir()));
2587
2588 // Document store
2589 ICING_RETURN_IF_ERROR(DocumentStore::DiscardDerivedFiles(
2590 filesystem_.get(), options_.base_dir()));
2591
2592 // Term index
2593 if (!filesystem_->DeleteDirectoryRecursively(
2594 MakeIndexDirectoryPath(options_.base_dir()).c_str())) {
2595 return absl_ports::InternalError("Failed to discard index");
2596 }
2597
2598 // Integer index
2599 if (!filesystem_->DeleteDirectoryRecursively(
2600 MakeIntegerIndexWorkingPath(options_.base_dir()).c_str())) {
2601 return absl_ports::InternalError("Failed to discard integer index");
2602 }
2603
2604 // Qualified id join index
2605 if (!filesystem_->DeleteDirectoryRecursively(
2606 MakeQualifiedIdJoinIndexWorkingPath(options_.base_dir()).c_str())) {
2607 return absl_ports::InternalError(
2608 "Failed to discard qualified id join index");
2609 }
2610
2611 return libtextclassifier3::Status::OK;
2612 }
2613
ClearSearchIndices()2614 libtextclassifier3::Status IcingSearchEngine::ClearSearchIndices() {
2615 ICING_RETURN_IF_ERROR(index_->Reset());
2616 ICING_RETURN_IF_ERROR(integer_index_->Clear());
2617 return libtextclassifier3::Status::OK;
2618 }
2619
ClearJoinIndices()2620 libtextclassifier3::Status IcingSearchEngine::ClearJoinIndices() {
2621 return qualified_id_join_index_->Clear();
2622 }
2623
ClearAllIndices()2624 libtextclassifier3::Status IcingSearchEngine::ClearAllIndices() {
2625 ICING_RETURN_IF_ERROR(ClearSearchIndices());
2626 ICING_RETURN_IF_ERROR(ClearJoinIndices());
2627 return libtextclassifier3::Status::OK;
2628 }
2629
Reset()2630 ResetResultProto IcingSearchEngine::Reset() {
2631 absl_ports::unique_lock l(&mutex_);
2632 return ResetInternal();
2633 }
2634
ResetInternal()2635 ResetResultProto IcingSearchEngine::ResetInternal() {
2636 ICING_VLOG(1) << "Resetting IcingSearchEngine";
2637
2638 ResetResultProto result_proto;
2639 StatusProto* result_status = result_proto.mutable_status();
2640
2641 initialized_ = false;
2642 ResetMembers();
2643 if (!filesystem_->DeleteDirectoryRecursively(options_.base_dir().c_str())) {
2644 result_status->set_code(StatusProto::INTERNAL);
2645 return result_proto;
2646 }
2647
2648 if (InternalInitialize().status().code() != StatusProto::OK) {
2649 // We shouldn't hit the following Initialize errors:
2650 // NOT_FOUND: all data was cleared, we aren't expecting anything
2651 // DATA_LOSS: all data was cleared, we aren't expecting anything
2652 // RESOURCE_EXHAUSTED: just deleted files, shouldn't run out of space
2653 //
2654 // We can't tell if Initialize failed and left Icing in an inconsistent
2655 // state or if it was a temporary I/O error. Group everything under INTERNAL
2656 // to be safe.
2657 //
2658 // TODO(b/147699081): Once Initialize returns the proper ABORTED/INTERNAL
2659 // status code, we can just propagate it up from here.
2660 result_status->set_code(StatusProto::INTERNAL);
2661 return result_proto;
2662 }
2663
2664 result_status->set_code(StatusProto::OK);
2665 return result_proto;
2666 }
2667
SearchSuggestions(const SuggestionSpecProto & suggestion_spec)2668 SuggestionResponse IcingSearchEngine::SearchSuggestions(
2669 const SuggestionSpecProto& suggestion_spec) {
2670 // TODO(b/146008613) Explore ideas to make this function read-only.
2671 absl_ports::unique_lock l(&mutex_);
2672 SuggestionResponse response;
2673 StatusProto* response_status = response.mutable_status();
2674 if (!initialized_) {
2675 response_status->set_code(StatusProto::FAILED_PRECONDITION);
2676 response_status->set_message("IcingSearchEngine has not been initialized!");
2677 return response;
2678 }
2679
2680 libtextclassifier3::Status status =
2681 ValidateSuggestionSpec(suggestion_spec, performance_configuration_);
2682 if (!status.ok()) {
2683 TransformStatus(status, response_status);
2684 return response;
2685 }
2686
2687 // Create the suggestion processor.
2688 auto suggestion_processor_or = SuggestionProcessor::Create(
2689 index_.get(), integer_index_.get(), language_segmenter_.get(),
2690 normalizer_.get(), document_store_.get(), schema_store_.get());
2691 if (!suggestion_processor_or.ok()) {
2692 TransformStatus(suggestion_processor_or.status(), response_status);
2693 return response;
2694 }
2695 std::unique_ptr<SuggestionProcessor> suggestion_processor =
2696 std::move(suggestion_processor_or).ValueOrDie();
2697
2698 // Run suggestion based on given SuggestionSpec.
2699 int64_t current_time_ms = clock_->GetSystemTimeMilliseconds();
2700 libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
2701 suggestion_processor->QuerySuggestions(suggestion_spec, current_time_ms);
2702 if (!terms_or.ok()) {
2703 TransformStatus(terms_or.status(), response_status);
2704 return response;
2705 }
2706
2707 // Convert vector<TermMetaData> into final SuggestionResponse proto.
2708 for (TermMetadata& term : terms_or.ValueOrDie()) {
2709 SuggestionResponse::Suggestion suggestion;
2710 suggestion.set_query(std::move(term.content));
2711 response.mutable_suggestions()->Add(std::move(suggestion));
2712 }
2713 response_status->set_code(StatusProto::OK);
2714 return response;
2715 }
2716
2717 } // namespace lib
2718 } // namespace icing
2719