• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_ICING_SEARCH_ENGINE_H_
16 #define ICING_ICING_SEARCH_ENGINE_H_
17 
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 #include <string_view>
22 #include <vector>
23 
24 #include "icing/text_classifier/lib3/utils/base/status.h"
25 #include "icing/text_classifier/lib3/utils/base/statusor.h"
26 #include "icing/absl_ports/mutex.h"
27 #include "icing/absl_ports/thread_annotations.h"
28 #include "icing/file/filesystem.h"
29 #include "icing/index/data-indexing-handler.h"
30 #include "icing/index/index.h"
31 #include "icing/index/numeric/numeric-index.h"
32 #include "icing/jni/jni-cache.h"
33 #include "icing/join/join-children-fetcher.h"
34 #include "icing/join/qualified-id-type-joinable-index.h"
35 #include "icing/legacy/index/icing-filesystem.h"
36 #include "icing/performance-configuration.h"
37 #include "icing/proto/debug.pb.h"
38 #include "icing/proto/document.pb.h"
39 #include "icing/proto/initialize.pb.h"
40 #include "icing/proto/logging.pb.h"
41 #include "icing/proto/optimize.pb.h"
42 #include "icing/proto/persist.pb.h"
43 #include "icing/proto/reset.pb.h"
44 #include "icing/proto/schema.pb.h"
45 #include "icing/proto/scoring.pb.h"
46 #include "icing/proto/search.pb.h"
47 #include "icing/proto/storage.pb.h"
48 #include "icing/proto/usage.pb.h"
49 #include "icing/query/query-terms.h"
50 #include "icing/result/result-state-manager.h"
51 #include "icing/schema/schema-store.h"
52 #include "icing/scoring/scored-document-hit.h"
53 #include "icing/store/document-store.h"
54 #include "icing/tokenization/language-segmenter.h"
55 #include "icing/transform/normalizer.h"
56 #include "icing/util/clock.h"
57 #include "icing/util/crc32.h"
58 
59 namespace icing {
60 namespace lib {
61 
62 // TODO(cassiewang) Top-level comments and links to design-doc.
63 class IcingSearchEngine {
64  public:
65   // Note: It is only required to provide a pointer to a valid instance of
66   // JniCache if this instance needs to perform reverse-jni calls. Users on
67   // Linux and iOS should always provide a nullptr.
68   explicit IcingSearchEngine(
69       const IcingSearchEngineOptions& options,
70       std::unique_ptr<const JniCache> jni_cache = nullptr);
71 
72   // Calculates integrity checks and persists files to disk.
73   ~IcingSearchEngine();
74 
75   // Loads & verifies the contents previously indexed from disk and gets ready
76   // to handle read/write requests.
77   //
78   // WARNING: This is expected to be fast if Icing had a clean shutdown.
79   // Otherwise, it can take longer as it runs integrity checks and attempts
80   // to bring the index to a consistent state. If the data on disk is not
81   // consistent, it restores the state when PersistToDisk() was last called.
82   //
83   // TODO(cassiewang): We shouldn't return NOT_FOUND here, this is a symptom
84   // of some other error. We should return a broader error group, i.e. data
85   // inconsistency or something
86   //
87   // Returns:
88   //   OK on success
89   //   DATA_LOSS if encountered any inconsistencies in data and had to restore
90   //     its state back to the last time PersistToDisk was called. Or if any
91   //     persisted data was lost and could not be recovered.
92   //   INTERNAL if any internal state was left in an inconsistent. The instance
93   //     of IcingSearchEngine is unusable if this happens. It's recommended to
94   //     clear the underlying directory provided in
95   //     IcingSearchEngineOptions.base_dir and reinitialize.
96   //   RESOURCE_EXHAUSTED if not enough storage space
97   //   NOT_FOUND if missing some internal data
98   InitializeResultProto Initialize() ICING_LOCKS_EXCLUDED(mutex_);
99 
100   // Specifies the schema to be applied on all Documents that are already
101   // stored as well as future documents. A schema can be 'invalid' and/or
102   // 'incompatible'. These are two independent concepts.
103   //
104   // An 'invalid' schema is one that is not constructed properly. For example,
105   // a PropertyConfigProto is missing the property name field. A schema can be
106   // 'invalid' even if there is no previously existing schema.
107   //
108   // An 'incompatible' schema is one that is incompatible with a previously
109   // existing schema. If there is no previously existing schema, then a new
110   // schema cannot be incompatible. An incompatible schema is one that
111   // invalidates pre-existing data. For example, a previously OPTIONAL field is
112   // now REQUIRED in the new schema, and pre-existing data is considered invalid
113   // against the new schema now.
114   //
115   // Default behavior will not allow a new schema to be set if it is invalid or
116   // incompatible.
117   //
118   // The argument 'ignore_errors_and_delete_documents' can be set to true to
119   // force set an incompatible schema. In that case, documents that are
120   // invalidated by the new schema would be deleted from Icing. This cannot be
121   // used to force set an invalid schema.
122   //
123   // This schema is persisted to disk and used across multiple instances.
124   // So, callers should only have to call this if the schema changed.
125   // However, calling it multiple times with the same schema is a no-op.
126   //
127   // On some errors, Icing will keep using the older schema, but on
128   // INTERNAL_ERROR, it is undefined to continue using Icing.
129   //
130   // Returns:
131   //   OK on success
132   //   ALREADY_EXISTS if 'new_schema' contains multiple definitions of the same
133   //     type or contains a type that has multiple properties with the same
134   //     name.
135   //   INVALID_ARGUMENT if 'new_schema' is invalid
136   //   FAILED_PRECONDITION if 'new_schema' is incompatible, or IcingSearchEngine
137   //     has not been initialized yet.
138   //   INTERNAL_ERROR if Icing failed to store the new schema or upgrade
139   //     existing data based on the new schema. Using Icing beyond this error is
140   //     undefined and may cause crashes.
141   //   DATA_LOSS_ERROR if 'new_schema' requires the index to be rebuilt and an
142   //     IO error leads to some documents being excluded from the index. These
143   //     documents will still be retrievable via Get, but won't match queries.
144   //
145   // TODO(cassiewang) Figure out, document (and maybe even enforce) the best
146   // way ordering of calls between Initialize() and SetSchema(), both when
147   // the caller is creating an instance of IcingSearchEngine for the first
148   // time and when the caller is reinitializing an existing index on disk.
149   SetSchemaResultProto SetSchema(
150       SchemaProto&& new_schema, bool ignore_errors_and_delete_documents = false)
151       ICING_LOCKS_EXCLUDED(mutex_);
152 
153   // This function makes a copy of the schema and calls SetSchema(SchemaProto&&
154   // new_schema, bool ignore_errors_and_delete_documents)
155   //
156   // NOTE: It's recommended to call SetSchema(SchemaProto&& new_schema, bool
157   // ignore_errors_and_delete_documents) directly to avoid a copy if the caller
158   // can make an rvalue SchemaProto.
159   SetSchemaResultProto SetSchema(const SchemaProto& new_schema,
160                                  bool ignore_errors_and_delete_documents =
161                                      false) ICING_LOCKS_EXCLUDED(mutex_);
162 
163   // Get Icing's current copy of the schema.
164   //
165   // Returns:
166   //   SchemaProto on success
167   //   NOT_FOUND if a schema has not been set yet
168   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet.
169   //   INTERNAL_ERROR on IO error
170   GetSchemaResultProto GetSchema() ICING_LOCKS_EXCLUDED(mutex_);
171 
172   // Get Icing's copy of the SchemaTypeConfigProto of name schema_type
173   //
174   // Returns:
175   //   SchemaTypeConfigProto on success
176   //   FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine
177   //     has not been initialized yet.
178   //   NOT_FOUND if there is no SchemaTypeConfig of schema_type in the
179   //     SchemaProto
180   //   INTERNAL_ERROR on IO error
181   GetSchemaTypeResultProto GetSchemaType(std::string_view schema_type)
182       ICING_LOCKS_EXCLUDED(mutex_);
183 
184   // Puts the document into icing search engine so that it's stored and
185   // indexed. Documents are automatically written to disk, callers can also
186   // call PersistToDisk() to flush changes immediately.
187   //
188   // Returns:
189   //   OK on success
190   //   OUT_OF_SPACE if exceeds maximum number of allowed documents
191   //   FAILED_PRECONDITION if a schema has not been set yet, IcingSearchEngine
192   //     has not been initialized yet.
193   //   NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches
194   //     the document's schema
195   //   DATA_LOSS if an IO error occurs while merging document into the index and
196   //     the index is lost. These documents will still be retrievable via Get,
197   //     but won't match queries.
198   //   INTERNAL_ERROR on IO error
199   PutResultProto Put(DocumentProto&& document) ICING_LOCKS_EXCLUDED(mutex_);
200 
201   // This function makes a copy of document and calls Put(DocumentProto&&
202   // document).
203   //
204   // NOTE: It's recommended to call Put(DocumentProto&& document) directly to
205   // avoid a copy if the caller can make an rvalue DocumentProto.
206   PutResultProto Put(const DocumentProto& document)
207       ICING_LOCKS_EXCLUDED(mutex_);
208 
209   // Finds and returns the document identified by the given key (namespace +
210   // uri)
211   //
212   // Returns:
213   //   The document found on success
214   //   NOT_FOUND if the key doesn't exist or doc has been deleted
215   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
216   //   INTERNAL_ERROR on IO error
217   GetResultProto Get(std::string_view name_space, std::string_view uri,
218                      const GetResultSpecProto& result_spec);
219 
220   // Reports usage. The corresponding usage scores of the specified document in
221   // the report will be updated.
222   //
223   // Returns:
224   //   OK on success
225   //   NOT_FOUND if the [namesapce + uri] key in the report doesn't exist
226   //   INTERNAL_ERROR on I/O errors.
227   ReportUsageResultProto ReportUsage(const UsageReport& usage_report);
228 
229   // Returns all the namespaces that have at least one valid document in it.
230   //
231   // Returns:
232   //   All namespaces on success
233   GetAllNamespacesResultProto GetAllNamespaces();
234 
235   // Deletes the Document specified by the given namespace / uri pair from the
236   // search engine. Delete changes are automatically applied to disk, callers
237   // can also call PersistToDisk() to flush changes immediately.
238   //
239   // NOTE: Space is not reclaimed for deleted documents until Optimize() is
240   // called.
241   //
242   // Returns:
243   //   OK on success
244   //   NOT_FOUND if no document exists with namespace, uri
245   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
246   //   INTERNAL_ERROR on IO error
247   DeleteResultProto Delete(std::string_view name_space, std::string_view uri)
248       ICING_LOCKS_EXCLUDED(mutex_);
249 
250   // Deletes all Documents belonging to the specified namespace from the search
251   // engine. Delete changes are automatically applied to disk, callers can also
252   // call PersistToDisk() to flush changes immediately.
253   //
254   // NOTE: Space is not reclaimed for deleted documents until Optimize() is
255   // called.
256   //
257   // Returns:
258   //   OK on success
259   //   NOT_FOUND if namespace doesn't exist
260   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
261   //   INTERNAL_ERROR on IO error
262   DeleteByNamespaceResultProto DeleteByNamespace(std::string_view name_space)
263       ICING_LOCKS_EXCLUDED(mutex_);
264 
265   // Deletes all Documents belonging to the specified type from the search
266   // engine. Delete changes are automatically applied to disk, callers can also
267   // call PersistToDisk() to flush changes immediately.
268   //
269   // NOTE: Space is not reclaimed for deleted documents until Optimize() is
270   // called.
271   //
272   // Returns:
273   //   OK on success
274   //   NOT_FOUND if schema type doesn't exist
275   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
276   //   INTERNAL_ERROR on IO error
277   DeleteBySchemaTypeResultProto DeleteBySchemaType(std::string_view schema_type)
278       ICING_LOCKS_EXCLUDED(mutex_);
279 
280   // Deletes all Documents that match the query specified in search_spec. Delete
281   // changes are automatically applied to disk, callers can also call
282   // PersistToDisk() to flush changes immediately.
283   //
284   // NOTE: Space is not reclaimed for deleted documents until Optimize() is
285   // called.
286   //
287   // Returns:
288   //   OK on success
289   //   NOT_FOUND if the query doesn't match any documents
290   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
291   //   INTERNAL_ERROR on IO error
292   DeleteByQueryResultProto DeleteByQuery(
293       const SearchSpecProto& search_spec,
294       bool return_deleted_document_info = false) ICING_LOCKS_EXCLUDED(mutex_);
295 
296   // Retrieves, scores, ranks, and returns the results according to the specs.
297   // Results can be empty. If there're multiple pages of results,
298   // SearchResultProto.next_page_token will be set to a non-zero token and can
299   // be used to fetch more pages via GetNextPage() method. Clients should call
300   // InvalidateNextPageToken() after they get the pages they need to release
301   // result cache in memory. Please refer to each proto file for spec
302   // definitions.
303   //
304   // Returns a SearchResultProto with status:
305   //   OK with results on success
306   //   INVALID_ARGUMENT if any of specs is invalid
307   //   ABORTED if failed to perform search but existing data is not affected
308   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
309   //   INTERNAL_ERROR on any other errors
310   SearchResultProto Search(const SearchSpecProto& search_spec,
311                            const ScoringSpecProto& scoring_spec,
312                            const ResultSpecProto& result_spec)
313       ICING_LOCKS_EXCLUDED(mutex_);
314 
315   // Retrieves, scores, ranks and returns the suggested query string according
316   // to the specs. Results can be empty.
317   //
318   // Returns a SuggestionResponse with status:
319   //   OK with results on success
320   //   INVALID_ARGUMENT if any of specs is invalid
321   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
322   //   INTERNAL_ERROR on any other errors
323   SuggestionResponse SearchSuggestions(
324       const SuggestionSpecProto& suggestion_spec) ICING_LOCKS_EXCLUDED(mutex_);
325 
326   // Fetches the next page of results of a previously executed query. Results
327   // can be empty if next-page token is invalid. Invalid next page tokens are
328   // tokens that are either zero or were previously passed to
329   // InvalidateNextPageToken. If there are pages of results remaining after the
330   // one retrieved by this call, SearchResultProto.next_page_token will be
331   // set to a non-zero token and can be used to fetch more pages via
332   // GetNextPage() method.
333   //
334   // Returns a SearchResultProto with status:
335   //   OK with results on success
336   //   ABORTED if failed to get results but existing data is not affected
337   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
338   //   INTERNAL_ERROR on any other errors
339   SearchResultProto GetNextPage(uint64_t next_page_token)
340       ICING_LOCKS_EXCLUDED(mutex_);
341 
342   // Invalidates the next-page token so that no more results of the related
343   // query can be returned.
344   void InvalidateNextPageToken(uint64_t next_page_token)
345       ICING_LOCKS_EXCLUDED(mutex_);
346 
347   // Makes sure that every update/delete received till this point is flushed
348   // to disk. If the app crashes after a call to PersistToDisk(), Icing
349   // would be able to fully recover all data written up to this point.
350   //
351   // If persist_type is PersistType::LITE, then only the ground truth will be
352   // synced. This should be relatively lightweight to do (order of microseconds)
353   // and ensures that there will be no data loss. At worst, Icing may need to
354   // recover internal data structures by replaying the document log upon the
355   // next startup. Clients should call PersistToDisk(LITE) after each batch of
356   // mutations.
357   //
358   // If persist_type is PersistType::FULL, then all internal data structures in
359   // Icing will be synced. This is a heavier operation (order of milliseconds).
360   // It ensures that Icing will not need to recover internal data structures
361   // upon the next startup. Clients should call PersistToDisk(FULL) before their
362   // process dies.
363   //
364   // NOTE: It is not necessary to call PersistToDisk() to read back data
365   // that was recently written. All read APIs will include the most recent
366   // updates/deletes regardless of the data being flushed to disk.
367   //
368   // Returns:
369   //   OK on success
370   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
371   //   INTERNAL on I/O error
372   PersistToDiskResultProto PersistToDisk(PersistType::Code persist_type)
373       ICING_LOCKS_EXCLUDED(mutex_);
374 
375   // Allows Icing to run tasks that are too expensive and/or unnecessary to be
376   // executed in real-time, but are useful to keep it fast and be
377   // resource-efficient. This method purely optimizes the internal files and
378   // has no functional impact on what gets accepted/returned.
379   //
380   // WARNING: This method is CPU and IO intensive and depending on the
381   // contents stored, it can take from a few seconds to a few minutes.
382   // This call also blocks all read/write operations on Icing.
383   //
384   // SUGGESTION: Assuming the client has no restrictions on their side, it's
385   // recommended to call this method about once every 24 hours when the
386   // device is idle and charging. It can also be called when the system needs
387   // to free up extra disk-space.
388   //
389   // Returns:
390   //   OK on success
391   //   ABORTED_ERROR if optimization is aborted due to non-fatal errors before
392   //                 actual modifications are made.
393   //   DATA_LOSS_ERROR on errors that could potentially cause data loss,
394   //                   IcingSearchEngine is still functioning.
395   //   INTERNAL_ERROR on any IO errors or other unrecoverable errors. Continued
396   //                  use of Icing is undefined.
397   //                  Clients could clear and reinitialize IcingSearchEngine.
398   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
399   OptimizeResultProto Optimize() ICING_LOCKS_EXCLUDED(mutex_);
400 
401   // Returns potential size and document savings if Optimize were called.
402   //
403   // Returns:
404   //   OK on success
405   //   FAILED_PRECONDITION if IcingSearchEngine has not been initialized yet
406   //   INTERNAL_ERROR on IO error
407   GetOptimizeInfoResultProto GetOptimizeInfo() ICING_LOCKS_EXCLUDED(mutex_);
408 
409   // Calculates the StorageInfo for Icing.
410   //
411   // If an IO error occurs while trying to calculate the value for a field, then
412   // that field will be set to -1.
413   StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_);
414 
415   // Get debug information for Icing.
416   DebugInfoResultProto GetDebugInfo(DebugInfoVerbosity::Code verbosity)
417       ICING_LOCKS_EXCLUDED(mutex_);
418 
419   // Clears all data from Icing and re-initializes. Clients DO NOT need to call
420   // Initialize again.
421   //
422   // Returns:
423   //   OK on success
424   //   ABORTED_ERROR if failed to delete underlying files
425   //   INTERNAL_ERROR if internal state is no longer consistent
426   ResetResultProto Reset() ICING_LOCKS_EXCLUDED(mutex_);
427 
428   // Disallow copy and move.
429   IcingSearchEngine(const IcingSearchEngine&) = delete;
430   IcingSearchEngine& operator=(const IcingSearchEngine&) = delete;
431 
432  protected:
433   IcingSearchEngine(IcingSearchEngineOptions options,
434                     std::unique_ptr<const Filesystem> filesystem,
435                     std::unique_ptr<const IcingFilesystem> icing_filesystem,
436                     std::unique_ptr<Clock> clock,
437                     std::unique_ptr<const JniCache> jni_cache = nullptr);
438 
439  private:
440   const IcingSearchEngineOptions options_;
441   const std::unique_ptr<const Filesystem> filesystem_;
442   const std::unique_ptr<const IcingFilesystem> icing_filesystem_;
443   bool initialized_ ICING_GUARDED_BY(mutex_) = false;
444 
445   // Abstraction for accessing time values.
446   const std::unique_ptr<const Clock> clock_;
447 
448   // Provides key thresholds that affects the running time and memory of major
449   // components in Icing search engine.
450   const PerformanceConfiguration performance_configuration_;
451 
452   // Used to manage pagination state of query results. Even though
453   // ResultStateManager has its own reader-writer lock, mutex_ must still be
454   // acquired first in order to adhere to the global lock ordering:
455   //   1. mutex_
456   //   2. result_state_manager_.lock_
457   std::unique_ptr<ResultStateManager> result_state_manager_
458       ICING_GUARDED_BY(mutex_);
459 
460   // Used to provide reader and writer locks
461   absl_ports::shared_mutex mutex_;
462 
463   // Stores and processes the schema
464   std::unique_ptr<SchemaStore> schema_store_ ICING_GUARDED_BY(mutex_);
465 
466   // Used to store all valid documents
467   std::unique_ptr<DocumentStore> document_store_ ICING_GUARDED_BY(mutex_);
468 
469   std::unique_ptr<const LanguageSegmenter> language_segmenter_
470       ICING_GUARDED_BY(mutex_);
471 
472   std::unique_ptr<const Normalizer> normalizer_ ICING_GUARDED_BY(mutex_);
473 
474   // Storage for all hits of string contents from the document store.
475   std::unique_ptr<Index> index_ ICING_GUARDED_BY(mutex_);
476 
477   // Storage for all hits of numeric contents from the document store.
478   std::unique_ptr<NumericIndex<int64_t>> integer_index_
479       ICING_GUARDED_BY(mutex_);
480 
481   // Storage for all join qualified ids from the document store.
482   std::unique_ptr<QualifiedIdTypeJoinableIndex> qualified_id_join_index_
483       ICING_GUARDED_BY(mutex_);
484 
485   // Pointer to JNI class references
486   const std::unique_ptr<const JniCache> jni_cache_;
487 
488   // Resets all members that are created during Initialize.
489   void ResetMembers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
490 
491   // Resets all members that are created during Initialize, deletes all
492   // underlying files and initializes a fresh index.
493   ResetResultProto ResetInternal() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
494 
495   // Checks for the existence of the init marker file. If the failed init count
496   // exceeds kMaxUnsuccessfulInitAttempts, all data is deleted and the index is
497   // initialized from scratch. The updated count (original failed init count + 1
498   // ) is written to the marker file.
499   //
500   // RETURNS
501   //   OK on success
502   //   INTERNAL if an IO error occurs while trying to update the marker file.
503   libtextclassifier3::Status CheckInitMarkerFile(
504       InitializeStatsProto* initialize_stats)
505       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
506 
507   // Helper method to do the actual work to persist data to disk. We need this
508   // separate method so that other public methods don't need to call
509   // PersistToDisk(). Public methods calling each other may cause deadlock
510   // issues.
511   libtextclassifier3::Status InternalPersistToDisk(
512       PersistType::Code persist_type) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
513 
514   // Helper method to the actual work to Initialize. We need this separate
515   // method so that other public methods don't need to call Initialize(). Public
516   // methods calling each other may cause deadlock issues.
517   InitializeResultProto InternalInitialize()
518       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
519 
520   // Helper method to initialize member variables.
521   //
522   // Returns:
523   //   OK on success
524   //   FAILED_PRECONDITION if initialize_stats is null
525   //   RESOURCE_EXHAUSTED if the index runs out of storage
526   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
527   //   INTERNAL on any I/O errors
528   libtextclassifier3::Status InitializeMembers(
529       InitializeStatsProto* initialize_stats)
530       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
531 
532   // Do any initialization/recovery necessary to create a SchemaStore instance.
533   //
534   // Returns:
535   //   OK on success
536   //   FAILED_PRECONDITION if initialize_stats is null
537   //   INTERNAL on I/O error
538   libtextclassifier3::Status InitializeSchemaStore(
539       InitializeStatsProto* initialize_stats)
540       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
541 
542   // Do any initialization/recovery necessary to create a DocumentStore
543   // instance.
544   //
545   // See comments on DocumentStore::Create for explanation of
546   // force_recovery_and_revalidate_documents.
547   //
548   // Returns:
549   //   OK on success
550   //   FAILED_PRECONDITION if initialize_stats is null
551   //   INTERNAL on I/O error
552   libtextclassifier3::Status InitializeDocumentStore(
553       bool force_recovery_and_revalidate_documents,
554       InitializeStatsProto* initialize_stats)
555       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
556 
557   // Do any initialization/recovery necessary to create term index, integer
558   // index, and qualified id join index instances.
559   //
560   // Returns:
561   //   OK on success
562   //   FAILED_PRECONDITION if initialize_stats is null
563   //   RESOURCE_EXHAUSTED if the index runs out of storage
564   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
565   //   INTERNAL on I/O error
566   libtextclassifier3::Status InitializeIndex(
567       InitializeStatsProto* initialize_stats)
568       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
569 
570   // Implementation of IcingSearchEngine::Search that only grabs the overall
571   // read-lock, allowing for parallel non-exclusive operations.
572   // This implementation is used if search_spec.use_read_only_search is true.
573   SearchResultProto SearchLockedShared(const SearchSpecProto& search_spec,
574                                    const ScoringSpecProto& scoring_spec,
575                                    const ResultSpecProto& result_spec)
576       ICING_LOCKS_EXCLUDED(mutex_);
577 
578   // Implementation of IcingSearchEngine::Search that requires the overall
579   // write lock. No other operations of any kind can be executed in parallel if
580   // this version is used.
581   // This implementation is used if search_spec.use_read_only_search is false.
582   SearchResultProto SearchLockedExclusive(const SearchSpecProto& search_spec,
583                                  const ScoringSpecProto& scoring_spec,
584                                  const ResultSpecProto& result_spec)
585       ICING_LOCKS_EXCLUDED(mutex_);
586 
587   // Helper method for the actual work to Search. We need this separate
588   // method to manage locking for Search.
589   SearchResultProto InternalSearch(const SearchSpecProto& search_spec,
590                                    const ScoringSpecProto& scoring_spec,
591                                    const ResultSpecProto& result_spec)
592       ICING_SHARED_LOCKS_REQUIRED(mutex_);
593 
594   // Processes query and scores according to the specs. It is a helper function
595   // (called by Search) to process and score normal query and the nested child
596   // query for join search.
597   //
598   // Returns a QueryScoringResults
599   //   OK on success with a vector of ScoredDocumentHits,
600   //      SectionRestrictQueryTermsMap, and other stats fields for logging.
601   //   Any other errors when processing the query or scoring
602   struct QueryScoringResults {
603     libtextclassifier3::Status status;
604     SectionRestrictQueryTermsMap query_terms;
605     std::vector<ScoredDocumentHit> scored_document_hits;
606     int64_t parse_query_latency_ms;
607     int64_t scoring_latency_ms;
608 
QueryScoringResultsQueryScoringResults609     explicit QueryScoringResults(
610         libtextclassifier3::Status status_in,
611         SectionRestrictQueryTermsMap&& query_terms_in,
612         std::vector<ScoredDocumentHit>&& scored_document_hits_in,
613         int64_t parse_query_latency_ms_in, int64_t scoring_latency_ms_in)
614         : status(std::move(status_in)),
615           query_terms(std::move(query_terms_in)),
616           scored_document_hits(std::move(scored_document_hits_in)),
617           parse_query_latency_ms(parse_query_latency_ms_in),
618           scoring_latency_ms(scoring_latency_ms_in) {}
619   };
620   QueryScoringResults ProcessQueryAndScore(
621       const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
622       const ResultSpecProto& result_spec,
623       const JoinChildrenFetcher* join_children_fetcher, int64_t current_time_ms)
624       ICING_SHARED_LOCKS_REQUIRED(mutex_);
625 
626   // Many of the internal components rely on other components' derived data.
627   // Check that everything is consistent with each other so that we're not
628   // using outdated derived data in some parts of our system.
629   //
630   // NOTE: this method can be called only at startup time or after
631   // PersistToDisk(), otherwise the check could fail due to any changes that are
632   // not persisted.
633   //
634   // Returns:
635   //   OK on success
636   //   NOT_FOUND if missing header file
637   //   INTERNAL_ERROR on any IO errors or if header is inconsistent
638   libtextclassifier3::Status CheckConsistency()
639       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
640 
641   // Discards all derived data.
642   //
643   // Returns:
644   //   OK on success
645   //   FAILED_PRECONDITION_ERROR if those instances are valid (non nullptr)
646   //   INTERNAL_ERROR on any I/O errors
647   libtextclassifier3::Status DiscardDerivedFiles()
648       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
649 
650   // Repopulates derived data off our ground truths.
651   //
652   // Returns:
653   //   OK on success
654   //   INTERNAL_ERROR on any IO errors
655   libtextclassifier3::Status RegenerateDerivedFiles(
656       InitializeStatsProto* initialize_stats = nullptr,
657       bool log_document_store_stats = false)
658       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
659 
660   // Optimizes the DocumentStore by removing any unneeded documents (i.e.
661   // deleted, expired, etc.) from the filesystem storage.
662   //
663   // NOTE: This may leave the DocumentStore in an invalid/uncreated state. Users
664   // would need call Initialize() to reinitialize everything into a valid state.
665   //
666   // Returns:
667   //   On success, a vector that maps from old document id to new document id. A
668   //   value of kInvalidDocumentId indicates that the old document id has been
669   //   deleted.
670   //   ABORTED_ERROR if any error happens before the actual optimization, the
671   //                 original document store should be still available
672   //   DATA_LOSS_ERROR on errors that could potentially cause data loss,
673   //                   document store is still available
674   //   INTERNAL_ERROR on any IO errors or other errors that we can't recover
675   //                  from
676   libtextclassifier3::StatusOr<std::vector<DocumentId>> OptimizeDocumentStore(
677       OptimizeStatsProto* optimize_stats)
678       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
679 
680   // Helper method to restore missing document data in index_, integer_index_,
681   // and qualified_id_join_index_. All documents will be reindexed. This does
682   // not clear the index, so it is recommended to call ClearAllIndices,
683   // ClearSearchIndices, or ClearJoinIndices first if needed.
684   //
685   // Returns:
686   //   On success, OK and a bool indicating whether or not restoration was
687   //     needed.
688   //   DATA_LOSS, if an error during index merging caused us to lose indexed
689   //     data in the main index. Despite the data loss, this is still considered
690   //     a successful run and needed_restoration will be set to true.
691   //   RESOURCE_EXHAUSTED if the index fills up before finishing indexing
692   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
693   //   INTERNAL_ERROR on any IO errors
694   struct IndexRestorationResult {
695     libtextclassifier3::Status status;
696     bool index_needed_restoration;
697     bool integer_index_needed_restoration;
698     bool qualified_id_join_index_needed_restoration;
699   };
700   IndexRestorationResult RestoreIndexIfNeeded()
701       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
702 
703   // If we lost the schema during a previous failure, it may "look" the same as
704   // not having a schema set before: we don't have a schema proto file. So do
705   // some extra checks to differentiate between having-lost the schema, and
706   // never having a schema before. This may determine if we need to do extra
707   // recovery steps.
708   //
709   // Returns:
710   //   bool indicating if we had a schema and unintentionally lost it
711   //   INTERNAL_ERROR on I/O error
712   libtextclassifier3::StatusOr<bool> LostPreviousSchema()
713       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
714 
715   // Helper method to create all types of data indexing handlers to index term,
716   // integer, and join qualified ids.
717   libtextclassifier3::StatusOr<
718       std::vector<std::unique_ptr<DataIndexingHandler>>>
719   CreateDataIndexingHandlers() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
720 
721   // Helper method to discard parts of (term, integer, qualified id join)
722   // indices if they contain data for document ids greater than
723   // last_stored_document_id.
724   //
725   // REQUIRES: last_stored_document_id is valid (!= kInvalidDocumentId). Note:
726   //   if we want to truncate everything in the index, then please call
727   //   ClearSearchIndices/ClearJoinIndices/ClearAllIndices instead.
728   //
729   // Returns:
730   //   On success, a DocumentId indicating the first document to start for
731   //     reindexing and 2 bool flags indicating whether term or integer index
732   //     needs restoration.
733   //   INTERNAL on any I/O errors
734   struct TruncateIndexResult {
735     DocumentId first_document_to_reindex;
736     bool index_needed_restoration;
737     bool integer_index_needed_restoration;
738     bool qualified_id_join_index_needed_restoration;
739 
TruncateIndexResultTruncateIndexResult740     explicit TruncateIndexResult(
741         DocumentId first_document_to_reindex_in,
742         bool index_needed_restoration_in,
743         bool integer_index_needed_restoration_in,
744         bool qualified_id_join_index_needed_restoration_in)
745         : first_document_to_reindex(first_document_to_reindex_in),
746           index_needed_restoration(index_needed_restoration_in),
747           integer_index_needed_restoration(integer_index_needed_restoration_in),
748           qualified_id_join_index_needed_restoration(
749               qualified_id_join_index_needed_restoration_in) {}
750   };
751   libtextclassifier3::StatusOr<TruncateIndexResult> TruncateIndicesTo(
752       DocumentId last_stored_document_id)
753       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
754 
755   // Helper method to discard search (term, integer) indices.
756   //
757   // Returns:
758   //   OK on success
759   //   INTERNAL_ERROR on any I/O errors
760   libtextclassifier3::Status ClearSearchIndices()
761       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
762 
763   // Helper method to discard join (qualified id) indices.
764   //
765   // Returns:
766   //   OK on success
767   //   INTERNAL_ERROR on any I/O errors
768   libtextclassifier3::Status ClearJoinIndices()
769       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
770 
771   // Helper method to discard all search and join indices.
772   //
773   // Returns:
774   //   OK on success
775   //   INTERNAL_ERROR on any I/O errors
776   libtextclassifier3::Status ClearAllIndices()
777       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
778 };
779 
780 }  // namespace lib
781 }  // namespace icing
782 
783 #endif  // ICING_ICING_SEARCH_ENGINE_H_
784