1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_QUERY_QUERY_PROCESSOR_H_ 16 #define ICING_QUERY_QUERY_PROCESSOR_H_ 17 18 #include <cstdint> 19 #include <memory> 20 21 #include "icing/text_classifier/lib3/utils/base/statusor.h" 22 #include "icing/index/embed/embedding-index.h" 23 #include "icing/index/index.h" 24 #include "icing/index/numeric/numeric-index.h" 25 #include "icing/proto/logging.pb.h" 26 #include "icing/proto/search.pb.h" 27 #include "icing/query/query-results.h" 28 #include "icing/schema/schema-store.h" 29 #include "icing/store/document-store.h" 30 #include "icing/tokenization/language-segmenter.h" 31 #include "icing/transform/normalizer.h" 32 #include "icing/util/clock.h" 33 34 namespace icing { 35 namespace lib { 36 37 // Processes SearchSpecProtos and retrieves the specified DocHitInfos that 38 // satisfies the query and its restrictions. This does not perform any scoring, 39 // and returns matched documents in a descending DocumentId order. 40 class QueryProcessor { 41 public: 42 // Factory function to create a QueryProcessor which does not take ownership 43 // of any input components, and all pointers must refer to valid objects that 44 // outlive the created QueryProcessor instance. 45 // 46 // Returns: 47 // An QueryProcessor on success 48 // FAILED_PRECONDITION if any of the pointers is null. 49 static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create( 50 Index* index, const NumericIndex<int64_t>* numeric_index, 51 const EmbeddingIndex* embedding_index, 52 const LanguageSegmenter* language_segmenter, const Normalizer* normalizer, 53 const DocumentStore* document_store, const SchemaStore* schema_store, 54 const Clock* clock); 55 56 // Parse the search configurations (including the query, any additional 57 // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator. 58 // 59 // When ranking_strategy == RELEVANCE_SCORE, the root_iterator and the 60 // query_term_iterators returned will keep term frequency information 61 // internally, so that term frequency stats will be collected when calling 62 // PopulateMatchedTermsStats to the iterators. 63 // 64 // Returns: 65 // On success, 66 // - One iterator that represents the entire query 67 // - A map representing the query terms and any section restrictions 68 // INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized 69 // INTERNAL_ERROR on all other errors 70 libtextclassifier3::StatusOr<QueryResults> ParseSearch( 71 const SearchSpecProto& search_spec, 72 ScoringSpecProto::RankingStrategy::Code ranking_strategy, 73 int64_t current_time_ms, 74 QueryStatsProto::SearchStats* search_stats = nullptr); 75 76 private: 77 explicit QueryProcessor(Index* index, 78 const NumericIndex<int64_t>* numeric_index, 79 const EmbeddingIndex* embedding_index, 80 const LanguageSegmenter* language_segmenter, 81 const Normalizer* normalizer, 82 const DocumentStore* document_store, 83 const SchemaStore* schema_store, const Clock* clock); 84 85 // Parse the query into a one DocHitInfoIterator that represents the root of a 86 // query tree in our new Advanced Query Language. 87 // 88 // Returns: 89 // On success, 90 // - One iterator that represents the entire query 91 // INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized 92 libtextclassifier3::StatusOr<QueryResults> ParseAdvancedQuery( 93 const SearchSpecProto& search_spec, 94 ScoringSpecProto::RankingStrategy::Code ranking_strategy, 95 int64_t current_time_ms, 96 QueryStatsProto::SearchStats* search_stats) const; 97 98 // Parse the query into a one DocHitInfoIterator that represents the root of a 99 // query tree. 100 // 101 // Returns: 102 // On success, 103 // - One iterator that represents the entire query 104 // - A map representing the query terms and any section restrictions 105 // INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized 106 // INTERNAL_ERROR on all other errors 107 libtextclassifier3::StatusOr<QueryResults> ParseRawQuery( 108 const SearchSpecProto& search_spec, 109 ScoringSpecProto::RankingStrategy::Code ranking_strategy, 110 int64_t current_time_ms); 111 112 // Not const because we could modify/sort the hit buffer in the lite index at 113 // query time. 114 Index& index_; // Does not own. 115 const NumericIndex<int64_t>& numeric_index_; // Does not own. 116 const EmbeddingIndex& embedding_index_; // Does not own. 117 const LanguageSegmenter& language_segmenter_; // Does not own. 118 const Normalizer& normalizer_; // Does not own. 119 const DocumentStore& document_store_; // Does not own. 120 const SchemaStore& schema_store_; // Does not own. 121 const Clock& clock_; // Does not own. 122 }; 123 124 } // namespace lib 125 } // namespace icing 126 127 #endif // ICING_QUERY_QUERY_PROCESSOR_H_ 128