• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_RESULT_SNIPPET_CONTEXT_H_
16 #define ICING_RESULT_SNIPPET_CONTEXT_H_
17 
18 #include <string>
19 #include <unordered_map>
20 #include <unordered_set>
21 #include <utility>
22 #include <vector>
23 
24 #include "icing/proto/search.pb.h"
25 #include "icing/proto/term.pb.h"
26 #include "icing/query/query-terms.h"
27 #include "icing/schema/section.h"
28 #include "icing/store/document-id.h"
29 
30 namespace icing {
31 namespace lib {
32 
33 // Stores data needed for snippeting. With SnippetContext we can fetch snippets
34 // for queries with multiple pages.
35 struct SnippetContext {
36   // A struct to store the cache entry for embedding match info.
37   struct EmbeddingMatchInfoEntry {
38     double score;
39     SearchSpecProto::EmbeddingQueryMetricType::Code metric_type;
40     // The position of the matched embedding vector in a section relative to
41     // other vectors with the same (dimension, signature) combination. Note that
42     // this is not the universal position of the vector in the section.
43     //
44     // E.g. If a repeated vector property contains the following vectors:
45     // - vector1: [1, 2, 3] (signature = "signature1", dimension = 3)
46     // - vector2: [7, 8, 9] (signature = "signature1", dimension = 3)
47     // - vector3: [4, 5, 6, 8] (signature = "signature2", dimension = 4)
48     // - vector4: [10, 11, 12] (signature = "signature1", dimension = 3)
49     //
50     // Then the position values for each vector would be:
51     // - vector1: 0
52     // - vector2: 1
53     // - vector3: 0
54     // - vector4: 2
55     int position;
56     int query_vector_index;
57     SectionId section_id;
58 
EmbeddingMatchInfoEntrySnippetContext::EmbeddingMatchInfoEntry59     explicit EmbeddingMatchInfoEntry(
60         double score_in,
61         SearchSpecProto::EmbeddingQueryMetricType::Code metric_type_in,
62         int position_in, int query_vector_index_in, SectionId section_id_in) {
63       score = score_in;
64       metric_type = metric_type_in;
65       position = position_in;
66       query_vector_index = query_vector_index_in;
67       section_id = section_id_in;
68     }
69   };
70 
71   // Maps from document_id to a vector of EmbeddingMatchInfoEntry. This
72   // is used to retrieve the full embedding match info for a given document
73   // during snippeting.
74   using DocumentEmbeddingMatchInfoMap =
75       std::unordered_map<DocumentId, std::vector<EmbeddingMatchInfoEntry>>;
76 
77   // Map of
78   // (query_vector_dimension -> (model_signature -> set of query_vector_index))
79   // for the embedding query vectors in the search spec.
80   using EmbeddingQueryVectorMetadataMap = std::unordered_map<
81       int, std::unordered_map<std::string, std::unordered_set<int>>>;
82 
SnippetContextSnippetContext83   explicit SnippetContext(
84       SectionRestrictQueryTermsMap query_terms_in,
85       EmbeddingQueryVectorMetadataMap embedding_query_vector_metadata_map_in,
86       DocumentEmbeddingMatchInfoMap embedding_match_info_map_in,
87       ResultSpecProto::SnippetSpecProto snippet_spec_in,
88       TermMatchType::Code match_type_in)
89       : query_terms(std::move(query_terms_in)),
90         embedding_query_vector_metadata_map(
91             std::move(embedding_query_vector_metadata_map_in)),
92         embedding_match_info_map(std::move(embedding_match_info_map_in)),
93         snippet_spec(std::move(snippet_spec_in)),
94         match_type(match_type_in) {}
95 
96   // Query terms that are used to find snippets
97   SectionRestrictQueryTermsMap query_terms;
98 
99   // Query vector metadata map for finding the global section positions for
100   // each embedding match.
101   //
102   // Map of (query_vector_dimension -> (model_signature -> query_vector_index))
103   // for the embedding query vectors in the search spec.
104   EmbeddingQueryVectorMetadataMap embedding_query_vector_metadata_map;
105 
106   // Results retrieved from embedding queries.
107   DocumentEmbeddingMatchInfoMap embedding_match_info_map;
108 
109   // Spec that defines some quantities of snippeting
110   ResultSpecProto::SnippetSpecProto snippet_spec;
111 
112   // Defines how we match each term
113   TermMatchType::Code match_type;
114 };
115 
116 }  // namespace lib
117 }  // namespace icing
118 
119 #endif  // ICING_RESULT_SNIPPET_CONTEXT_H_
120