1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_RESULT_SNIPPET_CONTEXT_H_ 16 #define ICING_RESULT_SNIPPET_CONTEXT_H_ 17 18 #include <string> 19 #include <unordered_map> 20 #include <unordered_set> 21 #include <utility> 22 #include <vector> 23 24 #include "icing/proto/search.pb.h" 25 #include "icing/proto/term.pb.h" 26 #include "icing/query/query-terms.h" 27 #include "icing/schema/section.h" 28 #include "icing/store/document-id.h" 29 30 namespace icing { 31 namespace lib { 32 33 // Stores data needed for snippeting. With SnippetContext we can fetch snippets 34 // for queries with multiple pages. 35 struct SnippetContext { 36 // A struct to store the cache entry for embedding match info. 37 struct EmbeddingMatchInfoEntry { 38 double score; 39 SearchSpecProto::EmbeddingQueryMetricType::Code metric_type; 40 // The position of the matched embedding vector in a section relative to 41 // other vectors with the same (dimension, signature) combination. Note that 42 // this is not the universal position of the vector in the section. 43 // 44 // E.g. If a repeated vector property contains the following vectors: 45 // - vector1: [1, 2, 3] (signature = "signature1", dimension = 3) 46 // - vector2: [7, 8, 9] (signature = "signature1", dimension = 3) 47 // - vector3: [4, 5, 6, 8] (signature = "signature2", dimension = 4) 48 // - vector4: [10, 11, 12] (signature = "signature1", dimension = 3) 49 // 50 // Then the position values for each vector would be: 51 // - vector1: 0 52 // - vector2: 1 53 // - vector3: 0 54 // - vector4: 2 55 int position; 56 int query_vector_index; 57 SectionId section_id; 58 EmbeddingMatchInfoEntrySnippetContext::EmbeddingMatchInfoEntry59 explicit EmbeddingMatchInfoEntry( 60 double score_in, 61 SearchSpecProto::EmbeddingQueryMetricType::Code metric_type_in, 62 int position_in, int query_vector_index_in, SectionId section_id_in) { 63 score = score_in; 64 metric_type = metric_type_in; 65 position = position_in; 66 query_vector_index = query_vector_index_in; 67 section_id = section_id_in; 68 } 69 }; 70 71 // Maps from document_id to a vector of EmbeddingMatchInfoEntry. This 72 // is used to retrieve the full embedding match info for a given document 73 // during snippeting. 74 using DocumentEmbeddingMatchInfoMap = 75 std::unordered_map<DocumentId, std::vector<EmbeddingMatchInfoEntry>>; 76 77 // Map of 78 // (query_vector_dimension -> (model_signature -> set of query_vector_index)) 79 // for the embedding query vectors in the search spec. 80 using EmbeddingQueryVectorMetadataMap = std::unordered_map< 81 int, std::unordered_map<std::string, std::unordered_set<int>>>; 82 SnippetContextSnippetContext83 explicit SnippetContext( 84 SectionRestrictQueryTermsMap query_terms_in, 85 EmbeddingQueryVectorMetadataMap embedding_query_vector_metadata_map_in, 86 DocumentEmbeddingMatchInfoMap embedding_match_info_map_in, 87 ResultSpecProto::SnippetSpecProto snippet_spec_in, 88 TermMatchType::Code match_type_in) 89 : query_terms(std::move(query_terms_in)), 90 embedding_query_vector_metadata_map( 91 std::move(embedding_query_vector_metadata_map_in)), 92 embedding_match_info_map(std::move(embedding_match_info_map_in)), 93 snippet_spec(std::move(snippet_spec_in)), 94 match_type(match_type_in) {} 95 96 // Query terms that are used to find snippets 97 SectionRestrictQueryTermsMap query_terms; 98 99 // Query vector metadata map for finding the global section positions for 100 // each embedding match. 101 // 102 // Map of (query_vector_dimension -> (model_signature -> query_vector_index)) 103 // for the embedding query vectors in the search spec. 104 EmbeddingQueryVectorMetadataMap embedding_query_vector_metadata_map; 105 106 // Results retrieved from embedding queries. 107 DocumentEmbeddingMatchInfoMap embedding_match_info_map; 108 109 // Spec that defines some quantities of snippeting 110 ResultSpecProto::SnippetSpecProto snippet_spec; 111 112 // Defines how we match each term 113 TermMatchType::Code match_type; 114 }; 115 116 } // namespace lib 117 } // namespace icing 118 119 #endif // ICING_RESULT_SNIPPET_CONTEXT_H_ 120