1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_ICING_SEARCH_ENGINE_H_ 16 #define ICING_ICING_SEARCH_ENGINE_H_ 17 18 #include <cstdint> 19 #include <memory> 20 #include <string> 21 #include <string_view> 22 #include <vector> 23 24 #include "icing/text_classifier/lib3/utils/base/status.h" 25 #include "icing/text_classifier/lib3/utils/base/statusor.h" 26 #include "icing/absl_ports/mutex.h" 27 #include "icing/absl_ports/thread_annotations.h" 28 #include "icing/file/filesystem.h" 29 #include "icing/index/data-indexing-handler.h" 30 #include "icing/index/index.h" 31 #include "icing/index/numeric/numeric-index.h" 32 #include "icing/jni/jni-cache.h" 33 #include "icing/join/join-children-fetcher.h" 34 #include "icing/join/qualified-id-type-joinable-index.h" 35 #include "icing/legacy/index/icing-filesystem.h" 36 #include "icing/performance-configuration.h" 37 #include "icing/proto/debug.pb.h" 38 #include "icing/proto/document.pb.h" 39 #include "icing/proto/initialize.pb.h" 40 #include "icing/proto/logging.pb.h" 41 #include "icing/proto/optimize.pb.h" 42 #include "icing/proto/persist.pb.h" 43 #include "icing/proto/reset.pb.h" 44 #include "icing/proto/schema.pb.h" 45 #include "icing/proto/scoring.pb.h" 46 #include "icing/proto/search.pb.h" 47 #include "icing/proto/storage.pb.h" 48 #include "icing/proto/usage.pb.h" 49 #include "icing/query/query-terms.h" 50 #include "icing/result/result-state-manager.h" 51 #include "icing/schema/schema-store.h" 52 #include "icing/scoring/scored-document-hit.h" 53 #include "icing/store/document-store.h" 54 #include "icing/tokenization/language-segmenter.h" 55 #include "icing/transform/normalizer.h" 56 #include "icing/util/clock.h" 57 #include "icing/util/crc32.h" 58 59 namespace icing { 60 namespace lib { 61 62 // TODO(cassiewang) Top-level comments and links to design-doc. 63 class IcingSearchEngine { 64 public: 65 // Note: It is only required to provide a pointer to a valid instance of 66 // JniCache if this instance needs to perform reverse-jni calls. Users on 67 // Linux and iOS should always provide a nullptr. 68 explicit IcingSearchEngine( 69 const IcingSearchEngineOptions& options, 70 std::unique_ptr<const JniCache> jni_cache = nullptr); 71 72 // Calculates integrity checks and persists files to disk. 73 ~IcingSearchEngine(); 74 75 // Loads & verifies the contents previously indexed from disk and gets ready 76 // to handle read/write requests. 77 // 78 // WARNING: This is expected to be fast if Icing had a clean shutdown. 79 // Otherwise, it can take longer as it runs integrity checks and attempts 80 // to bring the index to a consistent state. If the data on disk is not 81 // consistent, it restores the state when PersistToDisk() was last called. 82 // 83 // TODO(cassiewang): We shouldn't return NOT_FOUND here, this is a symptom 84 // of some other error. We should return a broader error group, i.e. data 85 // inconsistency or something 86 // 87 // Returns: 88 // OK on success 89 // DATA_LOSS if encountered any inconsistencies in data and had to restore 90 // its state back to the last time PersistToDisk was called. Or if any 91 // persisted data was lost and could not be recovered. 92 // INTERNAL if any internal state was left in an inconsistent. The instance 93 // of IcingSearchEngine is unusable if this happens. It's recommended to 94 // clear the underlying directory provided in 95 // IcingSearchEngineOptions.base_dir and reinitialize. 96 // RESOURCE_EXHAUSTED if not enough storage space 97 // NOT_FOUND if missing some internal data 98 InitializeResultProto Initialize() ICING_LOCKS_EXCLUDED(mutex_); 99 100 // Specifies the schema to be applied on all Documents that are already 101 // stored as well as future documents. A schema can be 'invalid' and/or 102 // 'incompatible'. These are two independent concepts. 103 // 104 // An 'invalid' schema is one that is not constructed properly. For example, 105 // a PropertyConfigProto is missing the property name field. A schema can be 106 // 'invalid' even if there is no previously existing schema. 107 // 108 // An 'incompatible' schema is one that is incompatible with a previously 109 // existing schema. If there is no previously existing schema, then a new 110 // schema cannot be incompatible. An incompatible schema is one that 111 // invalidates pre-existing data. For example, a previously OPTIONAL field is 112 // now REQUIRED in the new schema, and pre-existing data is considered invalid 113 // against the new schema now. 114 // 115 // Default behavior will not allow a new schema to be set if it is invalid or 116 // incompatible. 117 // 118 // The argument 'ignore_errors_and_delete_documents' can be set to true to 119 // force set an incompatible schema. In that case, documents that are 120 // invalidated by the new schema would be deleted from Icing. This cannot be 121 // used to force set an invalid schema. 122 // 123 // This schema is persisted to disk and used across multiple instances. 124 // So, callers should only have to call this if the schema changed. 125 // However, calling it multiple times with the same schema is a no-op. 126 // 127 // On some errors, Icing will keep using the older schema, but on 128 // INTERNAL_ERROR, it is undefined to continue using Icing. 129 // 130 // Returns: 131 // OK on success 132 // ALREADY_EXISTS if 'new_schema' contains multiple definitions of the same 133 // type or contains a type that has multiple properties with the same 134 // name. 135 // INVALID_ARGUMENT if 'new_schema' is invalid 136 // FAILED_PRECONDITION if 'new_schema' is incompatible, or IcingSearchEngine 137 // has not been initialized yet. 138 // INTERNAL_ERROR if Icing failed to store the new schema or upgrade 139 // existing data based on the new schema. Using Icing beyond this error is 140 // undefined and may cause crashes. 141 // DATA_LOSS_ERROR if 'new_schema' requires the index to be rebuilt and an 142 // IO error leads to some documents being excluded from the index. These 143 // documents will still be retrievable via Get, but won't match queries. 144 // 145 // TODO(cassiewang) Figure out, document (and maybe even enforce) the best 146 // way ordering of calls between Initialize() and SetSchema(), both when 147 // the caller is creating an instance of IcingSearchEngine for the first 148 // time and when the caller is reinitializing an existing index on disk. 149 SetSchemaResultProto SetSchema( 150 SchemaProto&& new_schema, bool ignore_errors_and_delete_documents = false) 151 ICING_LOCKS_EXCLUDED(mutex_); 152 153 // This function makes a copy of the schema and calls SetSchema(SchemaProto&& 154 // new_schema, bool ignore_errors_and_delete_documents) 155 // 156 // NOTE: It's recommended to call SetSchema(SchemaProto&& new_schema, bool 157 // ignore_errors_and_delete_documents) directly to avoid a copy if the caller 158 // can make an rvalue SchemaProto. 159 SetSchemaResultProto SetSchema(const SchemaProto& new_schema, 160 bool ignore_errors_and_delete_documents = 161 false) ICING_LOCKS_EXCLUDED(mutex_); 162 163 // Get Icing's current copy of the schema. 164 // 165 // Returns: 166 // SchemaProto on success 167 // NOT_FOUND if a schema has not been set yet 168 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet. 169 // INTERNAL_ERROR on IO error 170 GetSchemaResultProto GetSchema() ICING_LOCKS_EXCLUDED(mutex_); 171 172 // Get Icing's copy of the SchemaTypeConfigProto of name schema_type 173 // 174 // Returns: 175 // SchemaTypeConfigProto on success 176 // FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine 177 // has not been initialized yet. 178 // NOT_FOUND if there is no SchemaTypeConfig of schema_type in the 179 // SchemaProto 180 // INTERNAL_ERROR on IO error 181 GetSchemaTypeResultProto GetSchemaType(std::string_view schema_type) 182 ICING_LOCKS_EXCLUDED(mutex_); 183 184 // Puts the document into icing search engine so that it's stored and 185 // indexed. Documents are automatically written to disk, callers can also 186 // call PersistToDisk() to flush changes immediately. 187 // 188 // Returns: 189 // OK on success 190 // OUT_OF_SPACE if exceeds maximum number of allowed documents 191 // FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine 192 // has not been initialized yet. 193 // NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches 194 // the document's schema 195 // DATA_LOSS if an IO error occurs while merging document into the index and 196 // the index is lost. These documents will still be retrievable via Get, 197 // but won't match queries. 198 // INTERNAL_ERROR on IO error 199 PutResultProto Put(DocumentProto&& document) ICING_LOCKS_EXCLUDED(mutex_); 200 201 // This function makes a copy of document and calls Put(DocumentProto&& 202 // document). 203 // 204 // NOTE: It's recommended to call Put(DocumentProto&& document) directly to 205 // avoid a copy if the caller can make an rvalue DocumentProto. 206 PutResultProto Put(const DocumentProto& document) 207 ICING_LOCKS_EXCLUDED(mutex_); 208 209 // Finds and returns the document identified by the given key (namespace + 210 // uri) 211 // 212 // Returns: 213 // The document found on success 214 // NOT_FOUND if the key doesn't exist or doc has been deleted 215 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 216 // INTERNAL_ERROR on IO error 217 GetResultProto Get(std::string_view name_space, std::string_view uri, 218 const GetResultSpecProto& result_spec); 219 220 // Reports usage. The corresponding usage scores of the specified document in 221 // the report will be updated. 222 // 223 // Returns: 224 // OK on success 225 // NOT_FOUND if the [namesapce + uri] key in the report doesn't exist 226 // INTERNAL_ERROR on I/O errors. 227 ReportUsageResultProto ReportUsage(const UsageReport& usage_report); 228 229 // Returns all the namespaces that have at least one valid document in it. 230 // 231 // Returns: 232 // All namespaces on success 233 GetAllNamespacesResultProto GetAllNamespaces(); 234 235 // Deletes the Document specified by the given namespace / uri pair from the 236 // search engine. Delete changes are automatically applied to disk, callers 237 // can also call PersistToDisk() to flush changes immediately. 238 // 239 // NOTE: Space is not reclaimed for deleted documents until Optimize() is 240 // called. 241 // 242 // Returns: 243 // OK on success 244 // NOT_FOUND if no document exists with namespace, uri 245 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 246 // INTERNAL_ERROR on IO error 247 DeleteResultProto Delete(std::string_view name_space, std::string_view uri) 248 ICING_LOCKS_EXCLUDED(mutex_); 249 250 // Deletes all Documents belonging to the specified namespace from the search 251 // engine. Delete changes are automatically applied to disk, callers can also 252 // call PersistToDisk() to flush changes immediately. 253 // 254 // NOTE: Space is not reclaimed for deleted documents until Optimize() is 255 // called. 256 // 257 // Returns: 258 // OK on success 259 // NOT_FOUND if namespace doesn't exist 260 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 261 // INTERNAL_ERROR on IO error 262 DeleteByNamespaceResultProto DeleteByNamespace(std::string_view name_space) 263 ICING_LOCKS_EXCLUDED(mutex_); 264 265 // Deletes all Documents belonging to the specified type from the search 266 // engine. Delete changes are automatically applied to disk, callers can also 267 // call PersistToDisk() to flush changes immediately. 268 // 269 // NOTE: Space is not reclaimed for deleted documents until Optimize() is 270 // called. 271 // 272 // Returns: 273 // OK on success 274 // NOT_FOUND if schema type doesn't exist 275 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 276 // INTERNAL_ERROR on IO error 277 DeleteBySchemaTypeResultProto DeleteBySchemaType(std::string_view schema_type) 278 ICING_LOCKS_EXCLUDED(mutex_); 279 280 // Deletes all Documents that match the query specified in search_spec. Delete 281 // changes are automatically applied to disk, callers can also call 282 // PersistToDisk() to flush changes immediately. 283 // 284 // NOTE: Space is not reclaimed for deleted documents until Optimize() is 285 // called. 286 // 287 // Returns: 288 // OK on success 289 // NOT_FOUND if the query doesn't match any documents 290 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 291 // INTERNAL_ERROR on IO error 292 DeleteByQueryResultProto DeleteByQuery( 293 const SearchSpecProto& search_spec, 294 bool return_deleted_document_info = false) ICING_LOCKS_EXCLUDED(mutex_); 295 296 // Retrieves, scores, ranks, and returns the results according to the specs. 297 // Results can be empty. If there're multiple pages of results, 298 // SearchResultProto.next_page_token will be set to a non-zero token and can 299 // be used to fetch more pages via GetNextPage() method. Clients should call 300 // InvalidateNextPageToken() after they get the pages they need to release 301 // result cache in memory. Please refer to each proto file for spec 302 // definitions. 303 // 304 // Returns a SearchResultProto with status: 305 // OK with results on success 306 // INVALID_ARGUMENT if any of specs is invalid 307 // ABORTED if failed to perform search but existing data is not affected 308 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 309 // INTERNAL_ERROR on any other errors 310 SearchResultProto Search(const SearchSpecProto& search_spec, 311 const ScoringSpecProto& scoring_spec, 312 const ResultSpecProto& result_spec) 313 ICING_LOCKS_EXCLUDED(mutex_); 314 315 // Retrieves, scores, ranks and returns the suggested query string according 316 // to the specs. Results can be empty. 317 // 318 // Returns a SuggestionResponse with status: 319 // OK with results on success 320 // INVALID_ARGUMENT if any of specs is invalid 321 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 322 // INTERNAL_ERROR on any other errors 323 SuggestionResponse SearchSuggestions( 324 const SuggestionSpecProto& suggestion_spec) ICING_LOCKS_EXCLUDED(mutex_); 325 326 // Fetches the next page of results of a previously executed query. Results 327 // can be empty if next-page token is invalid. Invalid next page tokens are 328 // tokens that are either zero or were previously passed to 329 // InvalidateNextPageToken. If there are pages of results remaining after the 330 // one retrieved by this call, SearchResultProto.next_page_token will be 331 // set to a non-zero token and can be used to fetch more pages via 332 // GetNextPage() method. 333 // 334 // Returns a SearchResultProto with status: 335 // OK with results on success 336 // ABORTED if failed to get results but existing data is not affected 337 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 338 // INTERNAL_ERROR on any other errors 339 SearchResultProto GetNextPage(uint64_t next_page_token) 340 ICING_LOCKS_EXCLUDED(mutex_); 341 342 // Invalidates the next-page token so that no more results of the related 343 // query can be returned. 344 void InvalidateNextPageToken(uint64_t next_page_token) 345 ICING_LOCKS_EXCLUDED(mutex_); 346 347 // Makes sure that every update/delete received till this point is flushed 348 // to disk. If the app crashes after a call to PersistToDisk(), Icing 349 // would be able to fully recover all data written up to this point. 350 // 351 // If persist_type is PersistType::LITE, then only the ground truth will be 352 // synced. This should be relatively lightweight to do (order of microseconds) 353 // and ensures that there will be no data loss. At worst, Icing may need to 354 // recover internal data structures by replaying the document log upon the 355 // next startup. Clients should call PersistToDisk(LITE) after each batch of 356 // mutations. 357 // 358 // If persist_type is PersistType::FULL, then all internal data structures in 359 // Icing will be synced. This is a heavier operation (order of milliseconds). 360 // It ensures that Icing will not need to recover internal data structures 361 // upon the next startup. Clients should call PersistToDisk(FULL) before their 362 // process dies. 363 // 364 // NOTE: It is not necessary to call PersistToDisk() to read back data 365 // that was recently written. All read APIs will include the most recent 366 // updates/deletes regardless of the data being flushed to disk. 367 // 368 // Returns: 369 // OK on success 370 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 371 // INTERNAL on I/O error 372 PersistToDiskResultProto PersistToDisk(PersistType::Code persist_type) 373 ICING_LOCKS_EXCLUDED(mutex_); 374 375 // Allows Icing to run tasks that are too expensive and/or unnecessary to be 376 // executed in real-time, but are useful to keep it fast and be 377 // resource-efficient. This method purely optimizes the internal files and 378 // has no functional impact on what gets accepted/returned. 379 // 380 // WARNING: This method is CPU and IO intensive and depending on the 381 // contents stored, it can take from a few seconds to a few minutes. 382 // This call also blocks all read/write operations on Icing. 383 // 384 // SUGGESTION: Assuming the client has no restrictions on their side, it's 385 // recommended to call this method about once every 24 hours when the 386 // device is idle and charging. It can also be called when the system needs 387 // to free up extra disk-space. 388 // 389 // Returns: 390 // OK on success 391 // ABORTED_ERROR if optimization is aborted due to non-fatal errors before 392 // actual modifications are made. 393 // DATA_LOSS_ERROR on errors that could potentially cause data loss, 394 // IcingSearchEngine is still functioning. 395 // INTERNAL_ERROR on any IO errors or other unrecoverable errors. Continued 396 // use of Icing is undefined. 397 // Clients could clear and reinitialize IcingSearchEngine. 398 // FAILED_PRECONDITION IcingSearchEngine has not been initialized yet 399 OptimizeResultProto Optimize() ICING_LOCKS_EXCLUDED(mutex_); 400 401 // Returns potential size and document savings if Optimize were called. 402 // 403 // Returns: 404 // OK on success 405 // FAILED_PRECONDITION if IcingSearchEngine has not been initialized yet 406 // INTERNAL_ERROR on IO error 407 GetOptimizeInfoResultProto GetOptimizeInfo() ICING_LOCKS_EXCLUDED(mutex_); 408 409 // Calculates the StorageInfo for Icing. 410 // 411 // If an IO error occurs while trying to calculate the value for a field, then 412 // that field will be set to -1. 413 StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_); 414 415 // Get debug information for Icing. 416 DebugInfoResultProto GetDebugInfo(DebugInfoVerbosity::Code verbosity) 417 ICING_LOCKS_EXCLUDED(mutex_); 418 419 // Clears all data from Icing and re-initializes. Clients DO NOT need to call 420 // Initialize again. 421 // 422 // Returns: 423 // OK on success 424 // ABORTED_ERROR if failed to delete underlying files 425 // INTERNAL_ERROR if internal state is no longer consistent 426 ResetResultProto Reset() ICING_LOCKS_EXCLUDED(mutex_); 427 428 // Disallow copy and move. 429 IcingSearchEngine(const IcingSearchEngine&) = delete; 430 IcingSearchEngine& operator=(const IcingSearchEngine&) = delete; 431 432 protected: 433 IcingSearchEngine(IcingSearchEngineOptions options, 434 std::unique_ptr<const Filesystem> filesystem, 435 std::unique_ptr<const IcingFilesystem> icing_filesystem, 436 std::unique_ptr<Clock> clock, 437 std::unique_ptr<const JniCache> jni_cache = nullptr); 438 439 private: 440 const IcingSearchEngineOptions options_; 441 const std::unique_ptr<const Filesystem> filesystem_; 442 const std::unique_ptr<const IcingFilesystem> icing_filesystem_; 443 bool initialized_ ICING_GUARDED_BY(mutex_) = false; 444 445 // Abstraction for accessing time values. 446 const std::unique_ptr<const Clock> clock_; 447 448 // Provides key thresholds that affects the running time and memory of major 449 // components in Icing search engine. 450 const PerformanceConfiguration performance_configuration_; 451 452 // Used to manage pagination state of query results. Even though 453 // ResultStateManager has its own reader-writer lock, mutex_ must still be 454 // acquired first in order to adhere to the global lock ordering: 455 // 1. mutex_ 456 // 2. result_state_manager_.lock_ 457 std::unique_ptr<ResultStateManager> result_state_manager_ 458 ICING_GUARDED_BY(mutex_); 459 460 // Used to provide reader and writer locks 461 absl_ports::shared_mutex mutex_; 462 463 // Stores and processes the schema 464 std::unique_ptr<SchemaStore> schema_store_ ICING_GUARDED_BY(mutex_); 465 466 // Used to store all valid documents 467 std::unique_ptr<DocumentStore> document_store_ ICING_GUARDED_BY(mutex_); 468 469 std::unique_ptr<const LanguageSegmenter> language_segmenter_ 470 ICING_GUARDED_BY(mutex_); 471 472 std::unique_ptr<const Normalizer> normalizer_ ICING_GUARDED_BY(mutex_); 473 474 // Storage for all hits of string contents from the document store. 475 std::unique_ptr<Index> index_ ICING_GUARDED_BY(mutex_); 476 477 // Storage for all hits of numeric contents from the document store. 478 std::unique_ptr<NumericIndex<int64_t>> integer_index_ 479 ICING_GUARDED_BY(mutex_); 480 481 // Storage for all join qualified ids from the document store. 482 std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index_ 483 ICING_GUARDED_BY(mutex_); 484 485 // Pointer to JNI class references 486 const std::unique_ptr<const JniCache> jni_cache_; 487 488 // Resets all members that are created during Initialize. 489 void ResetMembers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 490 491 // Resets all members that are created during Initialize, deletes all 492 // underlying files and initializes a fresh index. 493 ResetResultProto ResetInternal() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 494 495 // Checks for the existence of the init marker file. If the failed init count 496 // exceeds kMaxUnsuccessfulInitAttempts, all data is deleted and the index is 497 // initialized from scratch. The updated count (original failed init count + 1 498 // ) is written to the marker file. 499 // 500 // RETURNS 501 // OK on success 502 // INTERNAL if an IO error occurs while trying to update the marker file. 503 libtextclassifier3::Status CheckInitMarkerFile( 504 InitializeStatsProto* initialize_stats) 505 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 506 507 // Helper method to do the actual work to persist data to disk. We need this 508 // separate method so that other public methods don't need to call 509 // PersistToDisk(). Public methods calling each other may cause deadlock 510 // issues. 511 libtextclassifier3::Status InternalPersistToDisk( 512 PersistType::Code persist_type) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 513 514 // Helper method to the actual work to Initialize. We need this separate 515 // method so that other public methods don't need to call Initialize(). Public 516 // methods calling each other may cause deadlock issues. 517 InitializeResultProto InternalInitialize() 518 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 519 520 // Helper method to initialize member variables. 521 // 522 // Returns: 523 // OK on success 524 // FAILED_PRECONDITION if initialize_stats is null 525 // RESOURCE_EXHAUSTED if the index runs out of storage 526 // NOT_FOUND if some Document's schema type is not in the SchemaStore 527 // INTERNAL on any I/O errors 528 libtextclassifier3::Status InitializeMembers( 529 InitializeStatsProto* initialize_stats) 530 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 531 532 // Do any initialization/recovery necessary to create a SchemaStore instance. 533 // 534 // Returns: 535 // OK on success 536 // FAILED_PRECONDITION if initialize_stats is null 537 // INTERNAL on I/O error 538 libtextclassifier3::Status InitializeSchemaStore( 539 InitializeStatsProto* initialize_stats) 540 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 541 542 // Do any initialization/recovery necessary to create a DocumentStore 543 // instance. 544 // 545 // See comments on DocumentStore::Create for explanation of 546 // force_recovery_and_revalidate_documents. 547 // 548 // Returns: 549 // OK on success 550 // FAILED_PRECONDITION if initialize_stats is null 551 // INTERNAL on I/O error 552 libtextclassifier3::Status InitializeDocumentStore( 553 bool force_recovery_and_revalidate_documents, 554 InitializeStatsProto* initialize_stats) 555 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 556 557 // Do any initialization/recovery necessary to create term index, integer 558 // index, and qualified id join index instances. 559 // 560 // Returns: 561 // OK on success 562 // FAILED_PRECONDITION if initialize_stats is null 563 // RESOURCE_EXHAUSTED if the index runs out of storage 564 // NOT_FOUND if some Document's schema type is not in the SchemaStore 565 // INTERNAL on I/O error 566 libtextclassifier3::Status InitializeIndex( 567 InitializeStatsProto* initialize_stats) 568 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 569 570 // Implementation of IcingSearchEngine::Search that only grabs the overall 571 // read-lock, allowing for parallel non-exclusive operations. 572 // This implementation is used if search_spec.use_read_only_search is true. 573 SearchResultProto SearchLockedShared(const SearchSpecProto& search_spec, 574 const ScoringSpecProto& scoring_spec, 575 const ResultSpecProto& result_spec) 576 ICING_LOCKS_EXCLUDED(mutex_); 577 578 // Implementation of IcingSearchEngine::Search that requires the overall 579 // write lock. No other operations of any kind can be executed in parallel if 580 // this version is used. 581 // This implementation is used if search_spec.use_read_only_search is false. 582 SearchResultProto SearchLockedExclusive(const SearchSpecProto& search_spec, 583 const ScoringSpecProto& scoring_spec, 584 const ResultSpecProto& result_spec) 585 ICING_LOCKS_EXCLUDED(mutex_); 586 587 // Helper method for the actual work to Search. We need this separate 588 // method to manage locking for Search. 589 SearchResultProto InternalSearch(const SearchSpecProto& search_spec, 590 const ScoringSpecProto& scoring_spec, 591 const ResultSpecProto& result_spec) 592 ICING_SHARED_LOCKS_REQUIRED(mutex_); 593 594 // Processes query and scores according to the specs. It is a helper function 595 // (called by Search) to process and score normal query and the nested child 596 // query for join search. 597 // 598 // Returns a QueryScoringResults 599 // OK on success with a vector of ScoredDocumentHits, 600 // SectionRestrictQueryTermsMap, and other stats fields for logging. 601 // Any other errors when processing the query or scoring 602 struct QueryScoringResults { 603 libtextclassifier3::Status status; 604 SectionRestrictQueryTermsMap query_terms; 605 std::vector<ScoredDocumentHit> scored_document_hits; 606 int64_t parse_query_latency_ms; 607 int64_t scoring_latency_ms; 608 QueryScoringResultsQueryScoringResults609 explicit QueryScoringResults( 610 libtextclassifier3::Status status_in, 611 SectionRestrictQueryTermsMap&& query_terms_in, 612 std::vector<ScoredDocumentHit>&& scored_document_hits_in, 613 int64_t parse_query_latency_ms_in, int64_t scoring_latency_ms_in) 614 : status(std::move(status_in)), 615 query_terms(std::move(query_terms_in)), 616 scored_document_hits(std::move(scored_document_hits_in)), 617 parse_query_latency_ms(parse_query_latency_ms_in), 618 scoring_latency_ms(scoring_latency_ms_in) {} 619 }; 620 QueryScoringResults ProcessQueryAndScore( 621 const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec, 622 const ResultSpecProto& result_spec, 623 const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms) 624 ICING_SHARED_LOCKS_REQUIRED(mutex_); 625 626 // Many of the internal components rely on other components' derived data. 627 // Check that everything is consistent with each other so that we're not 628 // using outdated derived data in some parts of our system. 629 // 630 // NOTE: this method can be called only at startup time or after 631 // PersistToDisk(), otherwise the check could fail due to any changes that are 632 // not persisted. 633 // 634 // Returns: 635 // OK on success 636 // NOT_FOUND if missing header file 637 // INTERNAL_ERROR on any IO errors or if header is inconsistent 638 libtextclassifier3::Status CheckConsistency() 639 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 640 641 // Discards all derived data. 642 // 643 // Returns: 644 // OK on success 645 // FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr) 646 // INTERNAL_ERROR on any I/O errors 647 libtextclassifier3::Status DiscardDerivedFiles() 648 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 649 650 // Repopulates derived data off our ground truths. 651 // 652 // Returns: 653 // OK on success 654 // INTERNAL_ERROR on any IO errors 655 libtextclassifier3::Status RegenerateDerivedFiles( 656 InitializeStatsProto* initialize_stats = nullptr, 657 bool log_document_store_stats = false) 658 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 659 660 // Optimizes the DocumentStore by removing any unneeded documents (i.e. 661 // deleted, expired, etc.) from the filesystem storage. 662 // 663 // NOTE: This may leave the DocumentStore in an invalid/uncreated state. Users 664 // would need call Initialize() to reinitialize everything into a valid state. 665 // 666 // Returns: 667 // On success, a vector that maps from old document id to new document id. A 668 // value of kInvalidDocumentId indicates that the old document id has been 669 // deleted. 670 // ABORTED_ERROR if any error happens before the actual optimization, the 671 // original document store should be still available 672 // DATA_LOSS_ERROR on errors that could potentially cause data loss, 673 // document store is still available 674 // INTERNAL_ERROR on any IO errors or other errors that we can't recover 675 // from 676 libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeDocumentStore( 677 OptimizeStatsProto* optimize_stats) 678 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 679 680 // Helper method to restore missing document data in index_, integer_index_, 681 // and qualified_id_join_index_. All documents will be reindexed. This does 682 // not clear the index, so it is recommended to call ClearAllIndices, 683 // ClearSearchIndices, or ClearJoinIndices first if needed. 684 // 685 // Returns: 686 // On success, OK and a bool indicating whether or not restoration was 687 // needed. 688 // DATA_LOSS, if an error during index merging caused us to lose indexed 689 // data in the main index. Despite the data loss, this is still considered 690 // a successful run and needed_restoration will be set to true. 691 // RESOURCE_EXHAUSTED if the index fills up before finishing indexing 692 // NOT_FOUND if some Document's schema type is not in the SchemaStore 693 // INTERNAL_ERROR on any IO errors 694 struct IndexRestorationResult { 695 libtextclassifier3::Status status; 696 bool index_needed_restoration; 697 bool integer_index_needed_restoration; 698 bool qualified_id_join_index_needed_restoration; 699 }; 700 IndexRestorationResult RestoreIndexIfNeeded() 701 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 702 703 // If we lost the schema during a previous failure, it may "look" the same as 704 // not having a schema set before: we don't have a schema proto file. So do 705 // some extra checks to differentiate between having-lost the schema, and 706 // never having a schema before. This may determine if we need to do extra 707 // recovery steps. 708 // 709 // Returns: 710 // bool indicating if we had a schema and unintentionally lost it 711 // INTERNAL_ERROR on I/O error 712 libtextclassifier3::StatusOr<bool> LostPreviousSchema() 713 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 714 715 // Helper method to create all types of data indexing handlers to index term, 716 // integer, and join qualified ids. 717 libtextclassifier3::StatusOr< 718 std::vector<std::unique_ptr<DataIndexingHandler>>> 719 CreateDataIndexingHandlers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 720 721 // Helper method to discard parts of (term, integer, qualified id join) 722 // indices if they contain data for document ids greater than 723 // last_stored_document_id. 724 // 725 // REQUIRES: last_stored_document_id is valid (!= kInvalidDocumentId). Note: 726 // if we want to truncate everything in the index, then please call 727 // ClearSearchIndices/ClearJoinIndices/ClearAllIndices instead. 728 // 729 // Returns: 730 // On success, a DocumentId indicating the first document to start for 731 // reindexing and 2 bool flags indicating whether term or integer index 732 // needs restoration. 733 // INTERNAL on any I/O errors 734 struct TruncateIndexResult { 735 DocumentId first_document_to_reindex; 736 bool index_needed_restoration; 737 bool integer_index_needed_restoration; 738 bool qualified_id_join_index_needed_restoration; 739 TruncateIndexResultTruncateIndexResult740 explicit TruncateIndexResult( 741 DocumentId first_document_to_reindex_in, 742 bool index_needed_restoration_in, 743 bool integer_index_needed_restoration_in, 744 bool qualified_id_join_index_needed_restoration_in) 745 : first_document_to_reindex(first_document_to_reindex_in), 746 index_needed_restoration(index_needed_restoration_in), 747 integer_index_needed_restoration(integer_index_needed_restoration_in), 748 qualified_id_join_index_needed_restoration( 749 qualified_id_join_index_needed_restoration_in) {} 750 }; 751 libtextclassifier3::StatusOr<TruncateIndexResult> TruncateIndicesTo( 752 DocumentId last_stored_document_id) 753 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 754 755 // Helper method to discard search (term, integer) indices. 756 // 757 // Returns: 758 // OK on success 759 // INTERNAL_ERROR on any I/O errors 760 libtextclassifier3::Status ClearSearchIndices() 761 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 762 763 // Helper method to discard join (qualified id) indices. 764 // 765 // Returns: 766 // OK on success 767 // INTERNAL_ERROR on any I/O errors 768 libtextclassifier3::Status ClearJoinIndices() 769 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 770 771 // Helper method to discard all search and join indices. 772 // 773 // Returns: 774 // OK on success 775 // INTERNAL_ERROR on any I/O errors 776 libtextclassifier3::Status ClearAllIndices() 777 ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 778 }; 779 780 } // namespace lib 781 } // namespace icing 782 783 #endif // ICING_ICING_SEARCH_ENGINE_H_ 784