• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/document.proto";
20import "icing/proto/logging.proto";
21import "icing/proto/scoring.proto";
22import "icing/proto/status.proto";
23import "icing/proto/term.proto";
24
25option java_package = "com.google.android.icing.proto";
26option java_multiple_files = true;
27option objc_class_prefix = "ICNG";
28
29// Client-supplied specifications on what documents to retrieve.
30// Next tag: 13
31message SearchSpecProto {
32  // REQUIRED: The "raw" query string that users may type. For example, "cat"
33  // will search for documents with the term cat in it.
34  optional string query = 1;
35
36  // Indicates how the query terms should match terms in the index.
37  //
38  // TermMatchType.Code=UNKNOWN
39  // Should never purposely be set and may lead to undefined behavior. This is
40  // used for backwards compatibility reasons.
41  //
42  // TermMatchType.Code=EXACT_ONLY
43  // Query terms will only match exact tokens in the index.
44  // Ex. A query term "foo" will only match indexed token "foo", and not "foot"
45  // or "football"
46  //
47  // TermMatchType.Code=PREFIX
48  // Query terms will match indexed tokens when the query term is a prefix of
49  // the token.
50  // Ex. A query term "foo" will match indexed tokens like "foo", "foot", and
51  // "football".
52  optional TermMatchType.Code term_match_type = 2;
53
54  // OPTIONAL: Only search for documents that have the specified namespaces. If
55  // unset, the query will search over all namespaces. Note that this applies to
56  // the entire 'query'. To issue different queries for different namespaces,
57  // separate Search()'s will need to be made.
58  repeated string namespace_filters = 3;
59
60  // OPTIONAL: Only search for documents that have the specified schema types.
61  // If unset, the query will search over all schema types. Note that this
62  // applies to the entire 'query'. To issue different queries for different
63  // schema types, separate Search()'s will need to be made. Also note that
64  // schema filters will not be expanded for polymorphism.
65  repeated string schema_type_filters = 4;
66
67  // Timestamp taken just before sending proto across the JNI boundary from java
68  // to native side.
69  optional int64 java_to_native_start_timestamp_ms = 5;
70
71  // OPTIONAL: If this field is present, join documents based on a nested
72  // SearchSpec.
73  optional JoinSpecProto join_spec = 7;
74
75  // Features enabled in this search spec.
76  repeated string enabled_features = 8;
77
78  // OPTIONAL: Whether to use the read-only implementation of
79  // IcingSearchEngine::Search.
80  // The read-only version enables multiple queries to be performed concurrently
81  // as it only acquires the read lock at IcingSearchEngine's level.
82  // Finer-grained locks are implemented around code paths that write changes to
83  // Icing during Search.
84  optional bool use_read_only_search = 9 [default = true];
85
86  // TODO(b/294266822): Handle multiple property filter lists for same schema
87  // type.
88  // How to specify a subset of properties to be searched. If no type property
89  // filter has been specified for a schema type (no TypePropertyMask for the
90  // given schema type), then *all* properties of that schema type will be
91  // searched. If an empty property filter is specified for a given schema type
92  // (TypePropertyMask for the given schema type has empty paths field), no
93  // properties of that schema type will be searched.
94  repeated TypePropertyMask type_property_filters = 10;
95
96  // The vectors to be used in embedding queries.
97  repeated PropertyProto.VectorProto embedding_query_vectors = 11;
98
99  message EmbeddingQueryMetricType {
100    enum Code {
101      UNKNOWN = 0;
102      COSINE = 1;
103      DOT_PRODUCT = 2;
104      EUCLIDEAN = 3;
105    }
106  }
107
108  // The default metric type used to calculate the scores for embedding
109  // queries.
110  optional EmbeddingQueryMetricType.Code embedding_query_metric_type = 12;
111
112  reserved 6;
113}
114
115// Client-supplied specifications on what to include/how to format the search
116// results.
117// Next tag: 10
118message ResultSpecProto {
119  // The results will be returned in pages, and num_per_page specifies the
120  // number of documents in one page.
121  optional int32 num_per_page = 1 [default = 10];
122
123  // Whether to collect and return debug_info in the SearchResultProto.
124  optional bool debug_info = 2;
125
126  // How to provide snippeting information in the SearchResultProto.
127  // Next tag: 5
128  message SnippetSpecProto {
129    // Only the first num_to_snippet documents will have snippet information
130    // provided. If set to 0, snippeting is disabled.
131    optional int32 num_to_snippet = 1;
132
133    // Only the first num_matches_per_property matches for a single section will
134    // have snippet information provided. If set to 0, snippeting is disabled.
135    optional int32 num_matches_per_property = 2;
136
137    // How large of a window to provide. Windows start at
138    // max_window_utf32_length / 2 bytes before the middle of the matching token
139    // and end at max_window_utf32_length / 2 bytes after the middle of the
140    // matching token. Windowing respects token boundaries. Therefore, the
141    // returned window may be smaller than requested. Setting
142    // max_window_utf32_length to 0 will disable windowing information. If
143    // matches enabled is also set to false, then snippeting is disabled. Ex.
144    // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz"
145    // will return a window of "bar baz bat" which is only 11 bytes long.
146    optional int32 max_window_utf32_length = 3;
147  }
148  optional SnippetSpecProto snippet_spec = 3;
149
150  // How to specify a subset of properties to retrieve. If no type property mask
151  // has been specified for a schema type, then *all* properties of that schema
152  // type will be retrieved.
153  repeated TypePropertyMask type_property_masks = 4;
154
155  // Groupings of namespaces and schema types whose total returned results
156  // should be limited together.
157  // Next tag: 3
158  message ResultGrouping {
159    // Grouping of namespace and schema type.
160    // Next tag: 3
161    message Entry {
162      // The namespace in this grouping that should be returned.
163      // This field should be empty if ResultGroupingType is SCHEMA_TYPE
164      optional string namespace = 1;
165
166      // The schema in this grouping that should be returned.
167      // This field should be empty if ResultGroupingType is NAMESPACE
168      optional string schema = 2;
169    }
170
171    // Identifier for namespace and schema type pairs.
172    repeated Entry entry_groupings = 1;
173
174    // The maximum number of results in this grouping that should be returned.
175    optional int32 max_results = 2;
176  }
177
178  // How to limit the number of results returned per set of namespaces or schema
179  // type. If results match for a namespace or schema type that is not present
180  // in any result groupings, then those results will be returned without limit.
181  //
182  // Non-existent namespaces and/or schema type will be ignored.
183  //
184  // Example : Suppose that there are four namespaces each with three results
185  // matching the query for "foo". Without any result groupings, Icing would
186  // return the following results:
187  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1",
188  //  "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"].
189  //
190  // The following result groupings will be returned if that the
191  // ResultGroupingType is set to NAMESPACE:
192  // [ { [ {"namespace0"} ], 2 }, { [ {"namespace1"}, {"namespace2"} ], 2} ]
193  //
194  // The following results will be returned:
195  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1",
196  //  "ns3doc2"].
197  repeated ResultGrouping result_groupings = 5;
198
199  // The threshold of total bytes of all documents to cutoff, in order to limit
200  // # of bytes in a single page.
201  // Note that it doesn't guarantee the result # of bytes will be smaller, equal
202  // to, or larger than the threshold. Instead, it is just a threshold to
203  // cutoff, and only guarantees total bytes of search results will exceed the
204  // threshold by less than the size of the final search result.
205  optional int32 num_total_bytes_per_page_threshold = 6
206      [default = 2147483647];  // INT_MAX
207
208  // The value by which the search results will get grouped by.
209  // Can get grouped by schema type, namespace (default), or by namespace and
210  // schema type.
211  enum ResultGroupingType {
212    NONE = 0;
213    SCHEMA_TYPE = 1;
214    NAMESPACE = 2;
215    NAMESPACE_AND_SCHEMA_TYPE = 3;
216  }
217  optional ResultGroupingType result_group_type = 7;
218
219  // The max # of child documents will be attached and returned in the result
220  // for each parent. It is only used for join API.
221  optional int32 max_joined_children_per_parent_to_return = 8;
222
223  // The max # of results being scored and ranked.
224  // Running time of ScoringProcessor and Ranker is O(num_to_score) according to
225  // results of //icing/scoring:score-and-rank_benchmark. Note that
226  // the process includes scoring, building a heap, and popping results from the
227  // heap.
228  //
229  // 30000 results can be scored and ranked within 3 ms on a Pixel 3 XL
230  // according to results of
231  // //icing/scoring:score-and-rank_benchmark, so set it as the
232  // default value.
233  optional int32 num_to_score = 9 [default = 30000];
234}
235
236// The representation of a single match within a DocumentProto property.
237//
238// Example : A document whose content is "Necesito comprar comida mañana." and a
239// query for "mana" with window=15
240// Next tag: 12
241message SnippetMatchProto {
242  // The index of the byte in the string at which the match begins and the
243  // length in bytes of the match.
244  //
245  // For the example above, the values of these fields would be
246  // exact_match_byte_position=24, exact_match_byte_length=7 "mañana"
247  optional int32 exact_match_byte_position = 2;
248  optional int32 exact_match_byte_length = 3;
249
250  // The length in bytes of the subterm that matches the query. The beginning of
251  // the submatch is the same as exact_match_byte_position.
252  //
253  // For the example above, the value of this field would be 5. With
254  // exact_match_byte_position=24 above, it would produce the substring "maña"
255  optional int32 submatch_byte_length = 10;
256
257  // The index of the UTF-16 code unit in the string at which the match begins
258  // and the length in UTF-16 code units of the match. This is for use with
259  // UTF-16 encoded strings like Java.lang.String.
260  //
261  // For the example above, the values of these fields would be
262  // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana"
263  optional int32 exact_match_utf16_position = 6;
264  optional int32 exact_match_utf16_length = 7;
265
266  // The length in UTF-16 code units of the subterm that matches the query. The
267  // beginning of the submatch is the same as exact_match_utf16_position. This
268  // is for use with UTF-16 encoded strings like Java.lang.String.
269  //
270  // For the example above, the value of this field would be 4. With
271  // exact_match_utf16_position=24 above, it would produce the substring "maña"
272  optional int32 submatch_utf16_length = 11;
273
274  // The index of the byte in the string at which the suggested snippet window
275  // begins and the length in bytes of the window.
276  //
277  // For the example above, the values of these fields would be
278  // window_byte_position=17, window_byte_length=15 "comida mañana."
279  optional int32 window_byte_position = 4;
280  optional int32 window_byte_length = 5;
281
282  // The index of the UTF-16 code unit in the string at which the suggested
283  // snippet window begins and the length in UTF-16 code units of the window.
284  // This is for use with UTF-16 encoded strings like Java.lang.String.
285  //
286  // For the example above, the values of these fields would be
287  // window_utf16_position=17, window_utf16_length=14 "comida mañana."
288  optional int32 window_utf16_position = 8;
289  optional int32 window_utf16_length = 9;
290
291  reserved 1;
292}
293
294// A Proto representing all snippets for a single DocumentProto.
295// Next tag: 2
296message SnippetProto {
297  // A pair of property name and all snippet matches that correspond to the
298  // property values in the corresponding DocumentProto.
299  // Next tag: 3
300  message EntryProto {
301    // A property path indicating which property in the DocumentProto these
302    // snippets correspond to. Property paths will contain 1) property names,
303    // 2) the property separator character '.' used to represent nested property
304    // and 3) indices surrounded by brackets to represent a specific value in
305    // that property.
306    //
307    // Example properties:
308    // - 'body'               : the first and only string value of a top-level
309    //                          property called 'body'.
310    // - 'sender.name'        : the first and only string value of a property
311    //                          called 'name' that is a subproperty of a
312    //                          property called 'sender'.
313    // - 'bcc[1].emailaddress': the first and only string value of a property
314    //                          called 'emailaddress' that is a subproperty of
315    //                          the second document value of a property called
316    //                          'bcc'.
317    // - 'attachments[0]'     : the first (of more than one) string value of a
318    //                          property called 'attachments'.
319    // NOTE: If there is only a single value for a property (like
320    // 'sender.name'), then no value index will be added to the property path.
321    // An index of [0] is implied. If there is more than one value for a
322    // property, then the value index will be added to the property path (like
323    // 'attachements[0]').
324    optional string property_name = 1;
325
326    repeated SnippetMatchProto snippet_matches = 2;
327  }
328  // Properties that do not appear in entries do not contain any matches.
329  repeated EntryProto entries = 1;
330}
331
332// Icing lib-supplied results from a search results.
333// Next tag: 6
334message SearchResultProto {
335  // Status code can be one of:
336  //   OK
337  //   FAILED_PRECONDITION
338  //   INVALID_ARGUMENT
339  //   ABORTED
340  //   INTERNAL
341  //
342  // See status.proto for more details.
343  //
344  // TODO(b/147699081): Fix error codes: +ABORTED.
345  // go/icing-library-apis.
346  optional StatusProto status = 1;
347
348  // The Results that matched the query. Empty if there was an error.
349  // Next tag: 6
350  message ResultProto {
351    // Document that matches the SearchSpecProto.
352    optional DocumentProto document = 1;
353
354    // Snippeting information for the document if requested in the
355    // ResultSpecProto. A default instance, if not requested.
356    optional SnippetProto snippet = 2;
357
358    // The score that the document was ranked by. The meaning of this score is
359    // determined by ScoringSpecProto.rank_by.
360    optional double score = 3;
361
362    // The child documents that were joined to a parent document.
363    repeated ResultProto joined_results = 4;
364
365    // Extra helpful scores as specified by
366    // ScoringSpecProto.additional_advanced_scoring_expressions. The scores will
367    // not be used for ranking.
368    repeated double additional_scores = 5 [packed = true];
369  }
370  repeated ResultProto results = 2;
371
372  // Various debug fields. Not populated if ResultSpecProto.debug_info = false.
373  // Next tag: 4
374  message DebugInfoProto {
375    // The internal representation of the actual query string that was executed.
376    // This may be different from the SearchSpecProto.query if the original
377    // query was malformed.
378    optional string executed_query = 3;
379
380    reserved 1, 2;
381  }
382  optional DebugInfoProto debug_info = 3;
383
384  // An opaque token used internally to keep track of information needed for
385  // pagination. A valid pagination token is required to fetch other pages of
386  // results. A value 0 means that there're no more pages.
387  // LINT.IfChange(next_page_token)
388  optional uint64 next_page_token = 4;
389  // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
390
391  // Stats for query execution performance.
392  optional QueryStatsProto query_stats = 5;
393}
394
395// Next tag: 3
396message TypePropertyMask {
397  // The schema type to which these property masks should apply.
398  // If the schema type is the wildcard ("*"), then the type property masks
399  // will apply to all results of types that don't have their own, specific
400  // type property mask entry.
401  optional string schema_type = 1;
402
403  // The property masks specifying the property to be retrieved. Property
404  // masks must be composed only of property names, property separators (the
405  // '.' character). For example, "subject", "recipients.name". Specifying no
406  // property masks will result in *no* properties being retrieved.
407  repeated string paths = 2;
408}
409
410// Next tag: 2
411message GetResultSpecProto {
412  // How to specify a subset of properties to retrieve. If no type property mask
413  // has been specified for a schema type, then *all* properties of that schema
414  // type will be retrieved.
415  repeated TypePropertyMask type_property_masks = 1;
416}
417
418// Next tag: 8
419message SuggestionSpecProto {
420  // REQUIRED: The "raw" prefix string that users may type. For example, "f"
421  // will search for suggested query that start with "f" like "foo", "fool".
422  optional string prefix = 1;
423
424  // OPTIONAL: Only search for suggestions that under the specified namespaces.
425  // If unset, the suggestion will search over all namespaces. Note that this
426  // applies to the entire 'prefix'. To issue different suggestions for
427  // different namespaces, separate RunSuggestion()'s will need to be made.
428  repeated string namespace_filters = 2;
429
430  // REQUIRED: The number of suggestions to be returned.
431  optional int32 num_to_return = 3;
432
433  // Indicates how the suggestion terms should be scored and ranked.
434  optional SuggestionScoringSpecProto scoring_spec = 4;
435
436  // OPTIONAL: Only search for suggestions that under the specified
437  // DocumentUris. If unset, the suggestion will search over all Documents.
438  //
439  // All namespace in the given NamespaceDocumentUriGroup should match the
440  // namespace_filters. i.e. appears in the namespace_filter or namespace_filter
441  // is empty.
442  //
443  // All given NamespaceDocumentUriGroup cannot have empty. Please use the
444  // namespace_filter to exclude a namespace.
445  //
446  // Note that this applies to the entire 'prefix'. To issue different
447  // suggestions for different DocumentIds, separate RunSuggestion()'s will need
448  // to be made.
449  repeated NamespaceDocumentUriGroup document_uri_filters = 5;
450
451  // OPTIONAL: Only search for suggestions that under the specified schemas.
452  // If unset, the suggestion will search over all schema types. Note that this
453  // applies to the entire 'prefix'. To issue different suggestions for
454  // different schema typs, separate RunSuggestion()'s will need to be made.
455  // Also note that schema filters will not be expanded for polymorphism.
456  repeated string schema_type_filters = 6;
457
458  // OPTIONAL: Only search for suggestions that under the specified types and
459  // properties.
460  //
461  // If unset, the suggestion will search over all types.
462  // If the TypePropertyMask.paths is unset, the suggestion will search over all
463  // properties under the TypePropertyMask.schema_type.
464  //
465  // Note that this applies to the entire 'prefix'. To issue different
466  // suggestions for different types, separate RunSuggestion()'s will need to be
467  // made.
468  repeated TypePropertyMask type_property_filters = 7;
469}
470
471// A group that holds namespace and document_uris under it.
472message NamespaceDocumentUriGroup {
473  optional string namespace_ = 1;
474  repeated string document_uris = 2;
475}
476
477// Next tag: 3
478message SuggestionResponse {
479  message Suggestion {
480    // The suggested query string for client to search for.
481    optional string query = 1;
482  }
483
484  // Status code can be one of:
485  //   OK
486  //   FAILED_PRECONDITION
487  //   INTERNAL
488  //
489  // See status.proto for more details.
490  optional StatusProto status = 1;
491
492  repeated Suggestion suggestions = 2;
493}
494
495// Specification for a left outer join.
496//
497// Next tag: 7
498message JoinSpecProto {
499  // Collection of several specs that will be used for searching and joining
500  // child documents.
501  //
502  // Next tag: 4
503  message NestedSpecProto {
504    // A nested SearchSpec that will be used to retrieve child documents. If you
505    // are only looking to join on a specific type documents, you could set a
506    // schema filter in this SearchSpec. This includes the nested search query.
507    // See SearchSpecProto.
508    optional SearchSpecProto search_spec = 1;
509
510    // A nested ScoringSpec that will be used to score child documents.
511    // See ScoringSpecProto.
512    optional ScoringSpecProto scoring_spec = 2;
513
514    // A nested ResultSpec that will be used to format child documents in the
515    // result joined documents, e.g. snippeting, projection.
516    // See ResultSpecProto.
517    optional ResultSpecProto result_spec = 3;
518  }
519  optional NestedSpecProto nested_spec = 1;
520
521  // The equivalent of a primary key in SQL. This is an expression that will be
522  // used to match child documents from the nested search to this document. One
523  // such expression is qualifiedId(). When used, it means the contents of
524  // child_property_expression property in the child documents must be equal to
525  // the qualified id.
526  // TODO(b/256022027) allow for parent_property_expression to be any property
527  // of the parent document.
528  optional string parent_property_expression = 2;
529
530  // The equivalent of a foreign key in SQL. This defines an equality constraint
531  // between a property in a child document and a property in the parent
532  // document. For example, if you want to join child documents which an
533  // entityId property containing a fully qualified document id,
534  // child_property_expression can be set to "entityId".
535  // TODO(b/256022027) figure out how to allow this to refer to documents
536  // outside of same pkg+db+ns.
537  optional string child_property_expression = 3;
538
539  // The max number of child documents to join to a parent document.
540  // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to
541  // control the number of children that are returned. There is no supported
542  // control for the number of children being scored at this time.
543  optional int32 max_joined_child_count = 4 [deprecated = true];
544
545  // The strategy by which to score the aggregation of child documents. For
546  // example, you might want to know which entity document has the most actions
547  // taken on it. If JOIN_AGGREGATE_SCORE is used in the base SearchSpecProto,
548  // the COUNT value will rank entity documents based on the number of child
549  // documents.
550  message AggregationScoringStrategy {
551    enum Code {
552      NONE = 0;  // No aggregation strategy for child documents and use parent
553                 // document score.
554      COUNT = 1;
555      MIN = 2;
556      AVG = 3;
557      MAX = 4;
558      SUM = 5;
559    }
560  }
561  optional AggregationScoringStrategy.Code aggregation_scoring_strategy = 5;
562}
563