• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_QUERY_QUERY_PROCESSOR_H_
16 #define ICING_QUERY_QUERY_PROCESSOR_H_
17 
18 #include <cstdint>
19 #include <memory>
20 
21 #include "icing/text_classifier/lib3/utils/base/statusor.h"
22 #include "icing/index/embed/embedding-index.h"
23 #include "icing/index/index.h"
24 #include "icing/index/numeric/numeric-index.h"
25 #include "icing/proto/logging.pb.h"
26 #include "icing/proto/search.pb.h"
27 #include "icing/query/query-results.h"
28 #include "icing/schema/schema-store.h"
29 #include "icing/store/document-store.h"
30 #include "icing/tokenization/language-segmenter.h"
31 #include "icing/transform/normalizer.h"
32 #include "icing/util/clock.h"
33 
34 namespace icing {
35 namespace lib {
36 
37 // Processes SearchSpecProtos and retrieves the specified DocHitInfos that
38 // satisfies the query and its restrictions. This does not perform any scoring,
39 // and returns matched documents in a descending DocumentId order.
40 class QueryProcessor {
41  public:
42   // Factory function to create a QueryProcessor which does not take ownership
43   // of any input components, and all pointers must refer to valid objects that
44   // outlive the created QueryProcessor instance.
45   //
46   // Returns:
47   //   An QueryProcessor on success
48   //   FAILED_PRECONDITION if any of the pointers is null.
49   static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create(
50       Index* index, const NumericIndex<int64_t>* numeric_index,
51       const EmbeddingIndex* embedding_index,
52       const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
53       const DocumentStore* document_store, const SchemaStore* schema_store,
54       const Clock* clock);
55 
56   // Parse the search configurations (including the query, any additional
57   // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator.
58   //
59   // When ranking_strategy == RELEVANCE_SCORE, the root_iterator and the
60   // query_term_iterators returned will keep term frequency information
61   // internally, so that term frequency stats will be collected when calling
62   // PopulateMatchedTermsStats to the iterators.
63   //
64   // Returns:
65   //   On success,
66   //     - One iterator that represents the entire query
67   //     - A map representing the query terms and any section restrictions
68   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
69   //   INTERNAL_ERROR on all other errors
70   libtextclassifier3::StatusOr<QueryResults> ParseSearch(
71       const SearchSpecProto& search_spec,
72       ScoringSpecProto::RankingStrategy::Code ranking_strategy,
73       int64_t current_time_ms,
74       QueryStatsProto::SearchStats* search_stats = nullptr);
75 
76  private:
77   explicit QueryProcessor(Index* index,
78                           const NumericIndex<int64_t>* numeric_index,
79                           const EmbeddingIndex* embedding_index,
80                           const LanguageSegmenter* language_segmenter,
81                           const Normalizer* normalizer,
82                           const DocumentStore* document_store,
83                           const SchemaStore* schema_store, const Clock* clock);
84 
85   // Parse the query into a one DocHitInfoIterator that represents the root of a
86   // query tree in our new Advanced Query Language.
87   //
88   // Returns:
89   //   On success,
90   //     - One iterator that represents the entire query
91   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
92   libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery(
93       const SearchSpecProto& search_spec,
94       ScoringSpecProto::RankingStrategy::Code ranking_strategy,
95       int64_t current_time_ms,
96       QueryStatsProto::SearchStats* search_stats) const;
97 
98   // Parse the query into a one DocHitInfoIterator that represents the root of a
99   // query tree.
100   //
101   // Returns:
102   //   On success,
103   //     - One iterator that represents the entire query
104   //     - A map representing the query terms and any section restrictions
105   //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
106   //   INTERNAL_ERROR on all other errors
107   libtextclassifier3::StatusOr<QueryResults> ParseRawQuery(
108       const SearchSpecProto& search_spec,
109       ScoringSpecProto::RankingStrategy::Code ranking_strategy,
110       int64_t current_time_ms);
111 
112   // Not const because we could modify/sort the hit buffer in the lite index at
113   // query time.
114   Index& index_;                                 // Does not own.
115   const NumericIndex<int64_t>& numeric_index_;   // Does not own.
116   const EmbeddingIndex& embedding_index_;        // Does not own.
117   const LanguageSegmenter& language_segmenter_;  // Does not own.
118   const Normalizer& normalizer_;                 // Does not own.
119   const DocumentStore& document_store_;          // Does not own.
120   const SchemaStore& schema_store_;              // Does not own.
121   const Clock& clock_;                           // Does not own.
122 };
123 
124 }  // namespace lib
125 }  // namespace icing
126 
127 #endif  // ICING_QUERY_QUERY_PROCESSOR_H_
128