• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/query/query-processor.h"
16 
17 #include <cstdint>
18 #include <memory>
19 #include <unordered_set>
20 #include <utility>
21 #include <vector>
22 
23 #include "icing/text_classifier/lib3/utils/base/statusor.h"
24 #include "icing/absl_ports/canonical_errors.h"
25 #include "icing/absl_ports/str_cat.h"
26 #include "icing/index/embed/embedding-index.h"
27 #include "icing/index/index.h"
28 #include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
29 #include "icing/index/iterator/doc-hit-info-iterator-filter.h"
30 #include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
31 #include "icing/index/numeric/numeric-index.h"
32 #include "icing/proto/logging.pb.h"
33 #include "icing/proto/search.pb.h"
34 #include "icing/query/advanced_query_parser/abstract-syntax-tree.h"
35 #include "icing/query/advanced_query_parser/lexer.h"
36 #include "icing/query/advanced_query_parser/parser.h"
37 #include "icing/query/advanced_query_parser/query-visitor.h"
38 #include "icing/query/query-features.h"
39 #include "icing/query/query-results.h"
40 #include "icing/query/query-utils.h"
41 #include "icing/schema/schema-store.h"
42 #include "icing/store/document-store.h"
43 #include "icing/tokenization/language-segmenter.h"
44 #include "icing/tokenization/tokenizer-factory.h"
45 #include "icing/tokenization/tokenizer.h"
46 #include "icing/transform/normalizer.h"
47 #include "icing/util/clock.h"
48 #include "icing/util/status-macros.h"
49 
50 namespace icing {
51 namespace lib {
52 
53 libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>>
Create(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const Clock * clock)54 QueryProcessor::Create(Index* index, const NumericIndex<int64_t>* numeric_index,
55                        const EmbeddingIndex* embedding_index,
56                        const LanguageSegmenter* language_segmenter,
57                        const Normalizer* normalizer,
58                        const DocumentStore* document_store,
59                        const SchemaStore* schema_store, const Clock* clock) {
60   ICING_RETURN_ERROR_IF_NULL(index);
61   ICING_RETURN_ERROR_IF_NULL(numeric_index);
62   ICING_RETURN_ERROR_IF_NULL(embedding_index);
63   ICING_RETURN_ERROR_IF_NULL(language_segmenter);
64   ICING_RETURN_ERROR_IF_NULL(normalizer);
65   ICING_RETURN_ERROR_IF_NULL(document_store);
66   ICING_RETURN_ERROR_IF_NULL(schema_store);
67   ICING_RETURN_ERROR_IF_NULL(clock);
68 
69   return std::unique_ptr<QueryProcessor>(new QueryProcessor(
70       index, numeric_index, embedding_index, language_segmenter, normalizer,
71       document_store, schema_store, clock));
72 }
73 
QueryProcessor(Index * index,const NumericIndex<int64_t> * numeric_index,const EmbeddingIndex * embedding_index,const LanguageSegmenter * language_segmenter,const Normalizer * normalizer,const DocumentStore * document_store,const SchemaStore * schema_store,const Clock * clock)74 QueryProcessor::QueryProcessor(Index* index,
75                                const NumericIndex<int64_t>* numeric_index,
76                                const EmbeddingIndex* embedding_index,
77                                const LanguageSegmenter* language_segmenter,
78                                const Normalizer* normalizer,
79                                const DocumentStore* document_store,
80                                const SchemaStore* schema_store,
81                                const Clock* clock)
82     : index_(*index),
83       numeric_index_(*numeric_index),
84       embedding_index_(*embedding_index),
85       language_segmenter_(*language_segmenter),
86       normalizer_(*normalizer),
87       document_store_(*document_store),
88       schema_store_(*schema_store),
89       clock_(*clock) {}
90 
ParseSearch(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats)91 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseSearch(
92     const SearchSpecProto& search_spec,
93     ScoringSpecProto::RankingStrategy::Code ranking_strategy,
94     int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) {
95   ICING_ASSIGN_OR_RETURN(QueryResults results,
96                          ParseAdvancedQuery(search_spec, ranking_strategy,
97                                             current_time_ms, search_stats));
98 
99   // Check that all new features used in the search have been enabled in the
100   // SearchSpec.
101   const std::unordered_set<Feature> enabled_features(
102       search_spec.enabled_features().begin(),
103       search_spec.enabled_features().end());
104   for (const Feature feature : results.features_in_use) {
105     if (enabled_features.find(feature) == enabled_features.end()) {
106       return absl_ports::InvalidArgumentError(absl_ports::StrCat(
107           "Attempted use of unenabled feature ", feature,
108           ". Please make sure that you have explicitly set all advanced query "
109           "features used in this query as enabled in the SearchSpec."));
110     }
111   }
112 
113   DocHitInfoIteratorFilter::Options options =
114       GetFilterOptions(search_spec, document_store_, schema_store_);
115   results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
116       std::move(results.root_iterator), &document_store_, &schema_store_,
117       options, current_time_ms);
118   if (!search_spec.type_property_filters().empty()) {
119     results.root_iterator =
120         DocHitInfoIteratorSectionRestrict::ApplyRestrictions(
121             std::move(results.root_iterator), &document_store_, &schema_store_,
122             search_spec, current_time_ms);
123   }
124   return results;
125 }
126 
ParseAdvancedQuery(const SearchSpecProto & search_spec,ScoringSpecProto::RankingStrategy::Code ranking_strategy,int64_t current_time_ms,QueryStatsProto::SearchStats * search_stats) const127 libtextclassifier3::StatusOr<QueryResults> QueryProcessor::ParseAdvancedQuery(
128     const SearchSpecProto& search_spec,
129     ScoringSpecProto::RankingStrategy::Code ranking_strategy,
130     int64_t current_time_ms, QueryStatsProto::SearchStats* search_stats) const {
131   std::unique_ptr<Timer> lexer_timer = clock_.GetNewTimer();
132   Lexer lexer(search_spec.query(), Lexer::Language::QUERY);
133   ICING_ASSIGN_OR_RETURN(std::vector<Lexer::LexerToken> lexer_tokens,
134                          lexer.ExtractTokens());
135   if (search_stats != nullptr) {
136     search_stats->set_query_processor_lexer_extract_token_latency_ms(
137         lexer_timer->GetElapsedMilliseconds());
138   }
139 
140   std::unique_ptr<Timer> parser_timer = clock_.GetNewTimer();
141   Parser parser = Parser::Create(std::move(lexer_tokens));
142   ICING_ASSIGN_OR_RETURN(std::unique_ptr<Node> tree_root,
143                          parser.ConsumeQuery());
144   if (search_stats != nullptr) {
145     search_stats->set_query_processor_parser_consume_query_latency_ms(
146         parser_timer->GetElapsedMilliseconds());
147   }
148 
149   if (tree_root == nullptr) {
150     QueryResults results;
151     results.root_iterator = std::make_unique<DocHitInfoIteratorAllDocumentId>(
152         document_store_.last_added_document_id());
153     return results;
154   }
155   ICING_ASSIGN_OR_RETURN(
156       std::unique_ptr<Tokenizer> plain_tokenizer,
157       tokenizer_factory::CreateIndexingTokenizer(
158           StringIndexingConfig::TokenizerType::PLAIN, &language_segmenter_));
159   DocHitInfoIteratorFilter::Options options =
160       GetFilterOptions(search_spec, document_store_, schema_store_);
161   bool needs_term_frequency_info =
162       ranking_strategy == ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE;
163 
164   std::unique_ptr<Timer> query_visitor_timer = clock_.GetNewTimer();
165   QueryVisitor query_visitor(
166       &index_, &numeric_index_, &embedding_index_, &document_store_,
167       &schema_store_, &normalizer_, plain_tokenizer.get(), search_spec.query(),
168       &search_spec.embedding_query_vectors(), std::move(options),
169       search_spec.term_match_type(), search_spec.embedding_query_metric_type(),
170       needs_term_frequency_info, current_time_ms);
171   tree_root->Accept(&query_visitor);
172   ICING_ASSIGN_OR_RETURN(QueryResults results,
173                          std::move(query_visitor).ConsumeResults());
174   if (search_stats != nullptr) {
175     search_stats->set_query_processor_query_visitor_latency_ms(
176         query_visitor_timer->GetElapsedMilliseconds());
177   }
178 
179   return results;
180 }
181 
182 }  // namespace lib
183 }  // namespace icing
184