1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "icing/query/query-processor.h"
16
17 #include <cstdint>
18 #include <memory>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/index/embed/embedding-index.h"
27 #include "icing/index/index.h"
28 #include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
29 #include "icing/index/iterator/doc-hit-info-iterator-filter.h"
30 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
31 #include "icing/index/numeric/numeric-index.h"
32 #include "icing/proto/logging.pb.h"
33 #include "icing/proto/search.pb.h"
34 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
35 #include "icing/query/advanced_query_parser/lexer.h"
36 #include "icing/query/advanced_query_parser/parser.h"
37 #include "icing/query/advanced_query_parser/query-visitor.h"
38 #include "icing/query/query-features.h"
39 #include "icing/query/query-results.h"
40 #include "icing/query/query-utils.h"
41 #include "icing/schema/schema-store.h"
42 #include "icing/store/document-store.h"
43 #include "icing/tokenization/language-segmenter.h"
44 #include "icing/tokenization/tokenizer-factory.h"
45 #include "icing/tokenization/tokenizer.h"
46 #include "icing/transform/normalizer.h"
47 #include "icing/util/clock.h"
48 #include "icing/util/status-macros.h"
49
50 namespace icing {
51 namespace lib {
52
53 libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>>
Create(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const Clock * clock)54 QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index,
55 const EmbeddingIndex* embedding_index,
56 const LanguageSegmenter* language_segmenter,
57 const Normalizer* normalizer,
58 const DocumentStore* document_store,
59 const SchemaStore* schema_store, const Clock* clock) {
60 ICING_RETURN_ERROR_IF_NULL(index);
61 ICING_RETURN_ERROR_IF_NULL(numeric_index);
62 ICING_RETURN_ERROR_IF_NULL(embedding_index);
63 ICING_RETURN_ERROR_IF_NULL(language_segmenter);
64 ICING_RETURN_ERROR_IF_NULL(normalizer);
65 ICING_RETURN_ERROR_IF_NULL(document_store);
66 ICING_RETURN_ERROR_IF_NULL(schema_store);
67 ICING_RETURN_ERROR_IF_NULL(clock);
68
69 return std::unique_ptr<QueryProcessor>(new QueryProcessor(
70 index, numeric_index, embedding_index, language_segmenter, normalizer,
71 document_store, schema_store, clock));
72 }
73
QueryProcessor(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const Clock * clock)74 QueryProcessor::QueryProcessor(Index* index,
75 const NumericIndex<int64_t>* numeric_index,
76 const EmbeddingIndex* embedding_index,
77 const LanguageSegmenter* language_segmenter,
78 const Normalizer* normalizer,
79 const DocumentStore* document_store,
80 const SchemaStore* schema_store,
81 const Clock* clock)
82 : index_(*index),
83 numeric_index_(*numeric_index),
84 embedding_index_(*embedding_index),
85 language_segmenter_(*language_segmenter),
86 normalizer_(*normalizer),
87 document_store_(*document_store),
88 schema_store_(*schema_store),
89 clock_(*clock) {}
90
ParseSearch(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats)91 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
92 const SearchSpecProto& search_spec,
93 ScoringSpecProto::RankingStrategy::Code ranking_strategy,
94 int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) {
95 ICING_ASSIGN_OR_RETURN(QueryResults results,
96 ParseAdvancedQuery(search_spec, ranking_strategy,
97 current_time_ms, search_stats));
98
99 // Check that all new features used in the search have been enabled in the
100 // SearchSpec.
101 const std::unordered_set<Feature> enabled_features(
102 search_spec.enabled_features().begin(),
103 search_spec.enabled_features().end());
104 for (const Feature feature : results.features_in_use) {
105 if (enabled_features.find(feature) == enabled_features.end()) {
106 return absl_ports::InvalidArgumentError(absl_ports::StrCat(
107 "Attempted use of unenabled feature ", feature,
108 ". Please make sure that you have explicitly set all advanced query "
109 "features used in this query as enabled in the SearchSpec."));
110 }
111 }
112
113 DocHitInfoIteratorFilter::Options options =
114 GetFilterOptions(search_spec, document_store_, schema_store_);
115 results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
116 std::move(results.root_iterator), &document_store_, &schema_store_,
117 options, current_time_ms);
118 if (!search_spec.type_property_filters().empty()) {
119 results.root_iterator =
120 DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
121 std::move(results.root_iterator), &document_store_, &schema_store_,
122 search_spec, current_time_ms);
123 }
124 return results;
125 }
126
ParseAdvancedQuery(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats) const127 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
128 const SearchSpecProto& search_spec,
129 ScoringSpecProto::RankingStrategy::Code ranking_strategy,
130 int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) const {
131 std::unique_ptr<Timer> lexer_timer = clock_.GetNewTimer();
132 Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
133 ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
134 lexer.ExtractTokens());
135 if (search_stats != nullptr) {
136 search_stats->set_query_processor_lexer_extract_token_latency_ms(
137 lexer_timer->GetElapsedMilliseconds());
138 }
139
140 std::unique_ptr<Timer> parser_timer = clock_.GetNewTimer();
141 Parser parser = Parser::Create(std::move(lexer_tokens));
142 ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
143 parser.ConsumeQuery());
144 if (search_stats != nullptr) {
145 search_stats->set_query_processor_parser_consume_query_latency_ms(
146 parser_timer->GetElapsedMilliseconds());
147 }
148
149 if (tree_root == nullptr) {
150 QueryResults results;
151 results.root_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
152 document_store_.last_added_document_id());
153 return results;
154 }
155 ICING_ASSIGN_OR_RETURN(
156 std::unique_ptr<Tokenizer> plain_tokenizer,
157 tokenizer_factory::CreateIndexingTokenizer(
158 StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
159 DocHitInfoIteratorFilter::Options options =
160 GetFilterOptions(search_spec, document_store_, schema_store_);
161 bool needs_term_frequency_info =
162 ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
163
164 std::unique_ptr<Timer> query_visitor_timer = clock_.GetNewTimer();
165 QueryVisitor query_visitor(
166 &index_, &numeric_index_, &embedding_index_, &document_store_,
167 &schema_store_, &normalizer_, plain_tokenizer.get(), search_spec.query(),
168 &search_spec.embedding_query_vectors(), std::move(options),
169 search_spec.term_match_type(), search_spec.embedding_query_metric_type(),
170 needs_term_frequency_info, current_time_ms);
171 tree_root->Accept(&query_visitor);
172 ICING_ASSIGN_OR_RETURN(QueryResults results,
173 std::move(query_visitor).ConsumeResults());
174 if (search_stats != nullptr) {
175 search_stats->set_query_processor_query_visitor_latency_ms(
176 query_visitor_timer->GetElapsedMilliseconds());
177 }
178
179 return results;
180 }
181
182 } // namespace lib
183 } // namespace icing
184