• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/document.proto";
20import "icing/proto/logging.proto";
21import "icing/proto/scoring.proto";
22import "icing/proto/status.proto";
23import "icing/proto/term.proto";
24
25option java_package = "com.google.android.icing.proto";
26option java_multiple_files = true;
27option objc_class_prefix = "ICNG";
28
29// Client-supplied specifications on what documents to retrieve.
30// Next tag: 10
31message SearchSpecProto {
32  // REQUIRED: The "raw" query string that users may type. For example, "cat"
33  // will search for documents with the term cat in it.
34  optional string query = 1;
35
36  // Indicates how the query terms should match terms in the index.
37  //
38  // TermMatchType.Code=UNKNOWN
39  // Should never purposely be set and may lead to undefined behavior. This is
40  // used for backwards compatibility reasons.
41  //
42  // TermMatchType.Code=EXACT_ONLY
43  // Query terms will only match exact tokens in the index.
44  // Ex. A query term "foo" will only match indexed token "foo", and not "foot"
45  // or "football"
46  //
47  // TermMatchType.Code=PREFIX
48  // Query terms will match indexed tokens when the query term is a prefix of
49  // the token.
50  // Ex. A query term "foo" will match indexed tokens like "foo", "foot", and
51  // "football".
52  optional TermMatchType.Code term_match_type = 2;
53
54  // OPTIONAL: Only search for documents that have the specified namespaces. If
55  // unset, the query will search over all namespaces. Note that this applies to
56  // the entire 'query'. To issue different queries for different namespaces,
57  // separate Search()'s will need to be made.
58  repeated string namespace_filters = 3;
59
60  // OPTIONAL: Only search for documents that have the specified schema types.
61  // If unset, the query will search over all schema types. Note that this
62  // applies to the entire 'query'. To issue different queries for different
63  // schema types, separate Search()'s will need to be made.
64  repeated string schema_type_filters = 4;
65
66  // Timestamp taken just before sending proto across the JNI boundary from java
67  // to native side.
68  optional int64 java_to_native_start_timestamp_ms = 5;
69
70  message SearchType {
71    enum Code {
72      UNDEFINED = 0;
73      ICING_RAW_QUERY = 1;
74      EXPERIMENTAL_ICING_ADVANCED_QUERY = 2;
75    }
76  }
77  // This field determines which type of query parsing Icing will use to fulfill
78  // the query.
79  // ICING_RAW_QUERY is the current query language as released, which supports
80  // basic ands, ors and nots as well as grouping and property restricts.
81  // EXPERIMENTAL_ICING_ADVANCED_QUERY is a superset of ICING_RAW_QUERY that
82  // will also support the use of functions defined by Icing Lib.
83  // This field is only temporary. When fully complete, all queries will be
84  // parsed by EXPERIMENTAL_ICING_ADVANCED_QUERY. This field only exists to
85  // enable testing.
86  // TODO(b/208654892) Remove this field once EXPERIMENTAL_ICING_ADVANCED_QUERY
87  // is fully supported.
88  optional SearchType.Code search_type = 6
89      [default = EXPERIMENTAL_ICING_ADVANCED_QUERY];
90
91  // OPTIONAL: If this field is present, join documents based on a nested
92  // SearchSpec.
93  optional JoinSpecProto join_spec = 7;
94
95  // Features enabled in this search spec.
96  repeated string enabled_features = 8;
97
98  // OPTIONAL: Whether to use the read-only implementation of
99  // IcingSearchEngine::Search.
100  // The read-only version enables multiple queries to be performed concurrently
101  // as it only acquires the read lock at IcingSearchEngine's level.
102  // Finer-grained locks are implemented around code paths that write changes to
103  // Icing during Search.
104  optional bool use_read_only_search = 9 [default = true];
105}
106
107// Client-supplied specifications on what to include/how to format the search
108// results.
109// Next tag: 9
110message ResultSpecProto {
111  // The results will be returned in pages, and num_per_page specifies the
112  // number of documents in one page.
113  optional int32 num_per_page = 1 [default = 10];
114
115  // Whether to collect and return debug_info in the SearchResultProto.
116  optional bool debug_info = 2;
117
118  // How to provide snippeting information in the SearchResultProto.
119  // Next tag: 5
120  message SnippetSpecProto {
121    // Only the first num_to_snippet documents will have snippet information
122    // provided. If set to 0, snippeting is disabled.
123    optional int32 num_to_snippet = 1;
124
125    // Only the first num_matches_per_property matches for a single section will
126    // have snippet information provided. If set to 0, snippeting is disabled.
127    optional int32 num_matches_per_property = 2;
128
129    // How large of a window to provide. Windows start at
130    // max_window_utf32_length / 2 bytes before the middle of the matching token
131    // and end at max_window_utf32_length / 2 bytes after the middle of the
132    // matching token. Windowing respects token boundaries. Therefore, the
133    // returned window may be smaller than requested. Setting
134    // max_window_utf32_length to 0 will disable windowing information. If
135    // matches enabled is also set to false, then snippeting is disabled. Ex.
136    // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz"
137    // will return a window of "bar baz bat" which is only 11 bytes long.
138    optional int32 max_window_utf32_length = 3;
139  }
140  optional SnippetSpecProto snippet_spec = 3;
141
142  // How to specify a subset of properties to retrieve. If no type property mask
143  // has been specified for a schema type, then *all* properties of that schema
144  // type will be retrieved.
145  repeated TypePropertyMask type_property_masks = 4;
146
147  // Groupings of namespaces and schema types whose total returned results
148  // should be limited together.
149  // Next tag: 3
150  message ResultGrouping {
151    // Grouping of namespace and schema type.
152    // Next tag: 3
153    message Entry {
154      // The namespace in this grouping that should be returned.
155      // This field should be empty if ResultGroupingType is SCHEMA_TYPE
156      optional string namespace = 1;
157
158      // The schema in this grouping that should be returned.
159      // This field should be empty if ResultGroupingType is NAMESPACE
160      optional string schema = 2;
161    }
162
163    // Identifier for namespace and schema type pairs.
164    repeated Entry entry_groupings = 1;
165
166    // The maximum number of results in this grouping that should be returned.
167    optional int32 max_results = 2;
168  }
169
170  // How to limit the number of results returned per set of namespaces or schema
171  // type. If results match for a namespace or schema type that is not present
172  // in any result groupings, then those results will be returned without limit.
173  //
174  // Non-existent namespaces and/or schema type will be ignored.
175  //
176  // Example : Suppose that there are four namespaces each with three results
177  // matching the query for "foo". Without any result groupings, Icing would
178  // return the following results:
179  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1",
180  //  "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"].
181  //
182  // The following result groupings will be returned if that the
183  // ResultGroupingType is set to NAMESPACE:
184  // [ { [ {"namespace0"} ], 2 }, { [ {"namespace1"}, {"namespace2"} ], 2} ]
185  //
186  // The following results will be returned:
187  // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1",
188  //  "ns3doc2"].
189  repeated ResultGrouping result_groupings = 5;
190
191  // The threshold of total bytes of all documents to cutoff, in order to limit
192  // # of bytes in a single page.
193  // Note that it doesn't guarantee the result # of bytes will be smaller, equal
194  // to, or larger than the threshold. Instead, it is just a threshold to
195  // cutoff, and only guarantees total bytes of search results will exceed the
196  // threshold by less than the size of the final search result.
197  optional int32 num_total_bytes_per_page_threshold = 6
198      [default = 2147483647];  // INT_MAX
199
200  // The value by which the search results will get grouped by.
201  // Can get grouped by schema type, namespace (default), or by namespace and
202  // schema type.
203  enum ResultGroupingType {
204    NONE = 0;
205    SCHEMA_TYPE = 1;
206    NAMESPACE = 2;
207    NAMESPACE_AND_SCHEMA_TYPE = 3;
208  }
209  optional ResultGroupingType result_group_type = 7;
210
211  // The max # of child documents will be attached and returned in the result
212  // for each parent. It is only used for join API.
213  optional int32 max_joined_children_per_parent_to_return = 8;
214}
215
216// The representation of a single match within a DocumentProto property.
217//
218// Example : A document whose content is "Necesito comprar comida mañana." and a
219// query for "mana" with window=15
220// Next tag: 12
221message SnippetMatchProto {
222  // The index of the byte in the string at which the match begins and the
223  // length in bytes of the match.
224  //
225  // For the example above, the values of these fields would be
226  // exact_match_byte_position=24, exact_match_byte_length=7 "mañana"
227  optional int32 exact_match_byte_position = 2;
228  optional int32 exact_match_byte_length = 3;
229
230  // The length in bytes of the subterm that matches the query. The beginning of
231  // the submatch is the same as exact_match_byte_position.
232  //
233  // For the example above, the value of this field would be 5. With
234  // exact_match_byte_position=24 above, it would produce the substring "maña"
235  optional int32 submatch_byte_length = 10;
236
237  // The index of the UTF-16 code unit in the string at which the match begins
238  // and the length in UTF-16 code units of the match. This is for use with
239  // UTF-16 encoded strings like Java.lang.String.
240  //
241  // For the example above, the values of these fields would be
242  // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana"
243  optional int32 exact_match_utf16_position = 6;
244  optional int32 exact_match_utf16_length = 7;
245
246  // The length in UTF-16 code units of the subterm that matches the query. The
247  // beginning of the submatch is the same as exact_match_utf16_position. This
248  // is for use with UTF-16 encoded strings like Java.lang.String.
249  //
250  // For the example above, the value of this field would be 4. With
251  // exact_match_utf16_position=24 above, it would produce the substring "maña"
252  optional int32 submatch_utf16_length = 11;
253
254  // The index of the byte in the string at which the suggested snippet window
255  // begins and the length in bytes of the window.
256  //
257  // For the example above, the values of these fields would be
258  // window_byte_position=17, window_byte_length=15 "comida mañana."
259  optional int32 window_byte_position = 4;
260  optional int32 window_byte_length = 5;
261
262  // The index of the UTF-16 code unit in the string at which the suggested
263  // snippet window begins and the length in UTF-16 code units of the window.
264  // This is for use with UTF-16 encoded strings like Java.lang.String.
265  //
266  // For the example above, the values of these fields would be
267  // window_utf16_position=17, window_utf16_length=14 "comida mañana."
268  optional int32 window_utf16_position = 8;
269  optional int32 window_utf16_length = 9;
270
271  reserved 1;
272}
273
274// A Proto representing all snippets for a single DocumentProto.
275// Next tag: 2
276message SnippetProto {
277  // A pair of property name and all snippet matches that correspond to the
278  // property values in the corresponding DocumentProto.
279  // Next tag: 3
280  message EntryProto {
281    // A property path indicating which property in the DocumentProto these
282    // snippets correspond to. Property paths will contain 1) property names,
283    // 2) the property separator character '.' used to represent nested property
284    // and 3) indices surrounded by brackets to represent a specific value in
285    // that property.
286    //
287    // Example properties:
288    // - 'body'               : the first and only string value of a top-level
289    //                          property called 'body'.
290    // - 'sender.name'        : the first and only string value of a property
291    //                          called 'name' that is a subproperty of a
292    //                          property called 'sender'.
293    // - 'bcc[1].emailaddress': the first and only string value of a property
294    //                          called 'emailaddress' that is a subproperty of
295    //                          the second document value of a property called
296    //                          'bcc'.
297    // - 'attachments[0]'     : the first (of more than one) string value of a
298    //                          property called 'attachments'.
299    // NOTE: If there is only a single value for a property (like
300    // 'sender.name'), then no value index will be added to the property path.
301    // An index of [0] is implied. If there is more than one value for a
302    // property, then the value index will be added to the property path (like
303    // 'attachements[0]').
304    optional string property_name = 1;
305
306    repeated SnippetMatchProto snippet_matches = 2;
307  }
308  // Properties that do not appear in entries do not contain any matches.
309  repeated EntryProto entries = 1;
310}
311
312// Icing lib-supplied results from a search results.
313// Next tag: 6
314message SearchResultProto {
315  // Status code can be one of:
316  //   OK
317  //   FAILED_PRECONDITION
318  //   INVALID_ARGUMENT
319  //   ABORTED
320  //   INTERNAL
321  //
322  // See status.proto for more details.
323  //
324  // TODO(b/147699081): Fix error codes: +ABORTED.
325  // go/icing-library-apis.
326  optional StatusProto status = 1;
327
328  // The Results that matched the query. Empty if there was an error.
329  // Next tag: 5
330  message ResultProto {
331    // Document that matches the SearchSpecProto.
332    optional DocumentProto document = 1;
333
334    // Snippeting information for the document if requested in the
335    // ResultSpecProto. A default instance, if not requested.
336    optional SnippetProto snippet = 2;
337
338    // The score that the document was ranked by. The meaning of this score is
339    // determined by ScoringSpecProto.rank_by.
340    optional double score = 3;
341
342    // The child documents that were joined to a parent document.
343    repeated ResultProto joined_results = 4;
344  }
345  repeated ResultProto results = 2;
346
347  // Various debug fields. Not populated if ResultSpecProto.debug_info = false.
348  // Next tag: 4
349  message DebugInfoProto {
350    // The internal representation of the actual query string that was executed.
351    // This may be different from the SearchSpecProto.query if the original
352    // query was malformed.
353    optional string executed_query = 3;
354
355    reserved 1, 2;
356  }
357  optional DebugInfoProto debug_info = 3;
358
359  // An opaque token used internally to keep track of information needed for
360  // pagination. A valid pagination token is required to fetch other pages of
361  // results. A value 0 means that there're no more pages.
362  // LINT.IfChange(next_page_token)
363  optional uint64 next_page_token = 4;
364  // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
365
366  // Stats for query execution performance.
367  optional QueryStatsProto query_stats = 5;
368}
369
370// Next tag: 3
371message TypePropertyMask {
372  // The schema type to which these property masks should apply.
373  // If the schema type is the wildcard ("*"), then the type property masks
374  // will apply to all results of types that don't have their own, specific
375  // type property mask entry.
376  optional string schema_type = 1;
377
378  // The property masks specifying the property to be retrieved. Property
379  // masks must be composed only of property names, property separators (the
380  // '.' character). For example, "subject", "recipients.name". Specifying no
381  // property masks will result in *no* properties being retrieved.
382  repeated string paths = 2;
383}
384
385// Next tag: 2
386message GetResultSpecProto {
387  // How to specify a subset of properties to retrieve. If no type property mask
388  // has been specified for a schema type, then *all* properties of that schema
389  // type will be retrieved.
390  repeated TypePropertyMask type_property_masks = 1;
391}
392
393// Next tag: 8
394message SuggestionSpecProto {
395  // REQUIRED: The "raw" prefix string that users may type. For example, "f"
396  // will search for suggested query that start with "f" like "foo", "fool".
397  optional string prefix = 1;
398
399  // OPTIONAL: Only search for suggestions that under the specified namespaces.
400  // If unset, the suggestion will search over all namespaces. Note that this
401  // applies to the entire 'prefix'. To issue different suggestions for
402  // different namespaces, separate RunSuggestion()'s will need to be made.
403  repeated string namespace_filters = 2;
404
405  // REQUIRED: The number of suggestions to be returned.
406  optional int32 num_to_return = 3;
407
408  // Indicates how the suggestion terms should be scored and ranked.
409  optional SuggestionScoringSpecProto scoring_spec = 4;
410
411  // OPTIONAL: Only search for suggestions that under the specified
412  // DocumentUris. If unset, the suggestion will search over all Documents.
413  //
414  // All namespace in the given NamespaceDocumentUriGroup should match the
415  // namespace_filters. i.e. appears in the namespace_filter or namespace_filter
416  // is empty.
417  //
418  // All given NamespaceDocumentUriGroup cannot have empty. Please use the
419  // namespace_filter to exclude a namespace.
420  //
421  // Note that this applies to the entire 'prefix'. To issue different
422  // suggestions for different DocumentIds, separate RunSuggestion()'s will need
423  // to be made.
424  repeated NamespaceDocumentUriGroup document_uri_filters = 5;
425
426  // OPTIONAL: Only search for suggestions that under the specified schemas.
427  // If unset, the suggestion will search over all schema types. Note that this
428  // applies to the entire 'prefix'. To issue different suggestions for
429  // different schema typs, separate RunSuggestion()'s will need to be made.
430  repeated string schema_type_filters = 6;
431
432  // OPTIONAL: Only search for suggestions that under the specified types and
433  // properties.
434  //
435  // If unset, the suggestion will search over all types.
436  // If the TypePropertyMask.paths is unset, the suggestion will search over all
437  // properties under the TypePropertyMask.schema_type.
438  //
439  // Note that this applies to the entire 'prefix'. To issue different
440  // suggestions for different types, separate RunSuggestion()'s will need to be
441  // made.
442  repeated TypePropertyMask type_property_filters = 7;
443}
444
445// A group that holds namespace and document_uris under it.
446message NamespaceDocumentUriGroup {
447  optional string namespace_ = 1;
448  repeated string document_uris = 2;
449}
450
451// Next tag: 3
452message SuggestionResponse {
453  message Suggestion {
454    // The suggested query string for client to search for.
455    optional string query = 1;
456  }
457
458  // Status code can be one of:
459  //   OK
460  //   FAILED_PRECONDITION
461  //   INTERNAL
462  //
463  // See status.proto for more details.
464  optional StatusProto status = 1;
465
466  repeated Suggestion suggestions = 2;
467}
468
469// Specification for a left outer join.
470//
471// Next tag: 7
472message JoinSpecProto {
473  // Collection of several specs that will be used for searching and joining
474  // child documents.
475  //
476  // Next tag: 4
477  message NestedSpecProto {
478    // A nested SearchSpec that will be used to retrieve child documents. If you
479    // are only looking to join on a specific type documents, you could set a
480    // schema filter in this SearchSpec. This includes the nested search query.
481    // See SearchSpecProto.
482    optional SearchSpecProto search_spec = 1;
483
484    // A nested ScoringSpec that will be used to score child documents.
485    // See ScoringSpecProto.
486    optional ScoringSpecProto scoring_spec = 2;
487
488    // A nested ResultSpec that will be used to format child documents in the
489    // result joined documents, e.g. snippeting, projection.
490    // See ResultSpecProto.
491    optional ResultSpecProto result_spec = 3;
492  }
493  optional NestedSpecProto nested_spec = 1;
494
495  // The equivalent of a primary key in SQL. This is an expression that will be
496  // used to match child documents from the nested search to this document. One
497  // such expression is qualifiedId(). When used, it means the contents of
498  // child_property_expression property in the child documents must be equal to
499  // the qualified id.
500  // TODO(b/256022027) allow for parent_property_expression to be any property
501  // of the parent document.
502  optional string parent_property_expression = 2;
503
504  // The equivalent of a foreign key in SQL. This defines an equality constraint
505  // between a property in a child document and a property in the parent
506  // document. For example, if you want to join child documents which an
507  // entityId property containing a fully qualified document id,
508  // child_property_expression can be set to "entityId".
509  // TODO(b/256022027) figure out how to allow this to refer to documents
510  // outside of same pkg+db+ns.
511  optional string child_property_expression = 3;
512
513  // The max number of child documents to join to a parent document.
514  // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to
515  // control the number of children that are returned. There is no supported
516  // control for the number of children being scored at this time.
517  optional int32 max_joined_child_count = 4 [deprecated = true];
518
519  // The strategy by which to score the aggregation of child documents. For
520  // example, you might want to know which entity document has the most actions
521  // taken on it. If JOIN_AGGREGATE_SCORE is used in the base SearchSpecProto,
522  // the COUNT value will rank entity documents based on the number of child
523  // documents.
524  message AggregationScoringStrategy {
525    enum Code {
526      NONE = 0;  // No aggregation strategy for child documents and use parent
527                 // document score.
528      COUNT = 1;
529      MIN = 2;
530      AVG = 3;
531      MAX = 4;
532      SUM = 5;
533    }
534  }
535  optional AggregationScoringStrategy.Code aggregation_scoring_strategy = 5;
536}
537