1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/document.proto"; 20import "icing/proto/logging.proto"; 21import "icing/proto/scoring.proto"; 22import "icing/proto/status.proto"; 23import "icing/proto/term.proto"; 24 25option java_package = "com.google.android.icing.proto"; 26option java_multiple_files = true; 27option objc_class_prefix = "ICNG"; 28 29// Client-supplied specifications on what documents to retrieve. 30// Next tag: 10 31message SearchSpecProto { 32 // REQUIRED: The "raw" query string that users may type. For example, "cat" 33 // will search for documents with the term cat in it. 34 optional string query = 1; 35 36 // Indicates how the query terms should match terms in the index. 37 // 38 // TermMatchType.Code=UNKNOWN 39 // Should never purposely be set and may lead to undefined behavior. This is 40 // used for backwards compatibility reasons. 41 // 42 // TermMatchType.Code=EXACT_ONLY 43 // Query terms will only match exact tokens in the index. 44 // Ex. A query term "foo" will only match indexed token "foo", and not "foot" 45 // or "football" 46 // 47 // TermMatchType.Code=PREFIX 48 // Query terms will match indexed tokens when the query term is a prefix of 49 // the token. 50 // Ex. A query term "foo" will match indexed tokens like "foo", "foot", and 51 // "football". 52 optional TermMatchType.Code term_match_type = 2; 53 54 // OPTIONAL: Only search for documents that have the specified namespaces. If 55 // unset, the query will search over all namespaces. Note that this applies to 56 // the entire 'query'. To issue different queries for different namespaces, 57 // separate Search()'s will need to be made. 58 repeated string namespace_filters = 3; 59 60 // OPTIONAL: Only search for documents that have the specified schema types. 61 // If unset, the query will search over all schema types. Note that this 62 // applies to the entire 'query'. To issue different queries for different 63 // schema types, separate Search()'s will need to be made. 64 repeated string schema_type_filters = 4; 65 66 // Timestamp taken just before sending proto across the JNI boundary from java 67 // to native side. 68 optional int64 java_to_native_start_timestamp_ms = 5; 69 70 message SearchType { 71 enum Code { 72 UNDEFINED = 0; 73 ICING_RAW_QUERY = 1; 74 EXPERIMENTAL_ICING_ADVANCED_QUERY = 2; 75 } 76 } 77 // This field determines which type of query parsing Icing will use to fulfill 78 // the query. 79 // ICING_RAW_QUERY is the current query language as released, which supports 80 // basic ands, ors and nots as well as grouping and property restricts. 81 // EXPERIMENTAL_ICING_ADVANCED_QUERY is a superset of ICING_RAW_QUERY that 82 // will also support the use of functions defined by Icing Lib. 83 // This field is only temporary. When fully complete, all queries will be 84 // parsed by EXPERIMENTAL_ICING_ADVANCED_QUERY. This field only exists to 85 // enable testing. 86 // TODO(b/208654892) Remove this field once EXPERIMENTAL_ICING_ADVANCED_QUERY 87 // is fully supported. 88 optional SearchType.Code search_type = 6 89 [default = EXPERIMENTAL_ICING_ADVANCED_QUERY]; 90 91 // OPTIONAL: If this field is present, join documents based on a nested 92 // SearchSpec. 93 optional JoinSpecProto join_spec = 7; 94 95 // Features enabled in this search spec. 96 repeated string enabled_features = 8; 97 98 // OPTIONAL: Whether to use the read-only implementation of 99 // IcingSearchEngine::Search. 100 // The read-only version enables multiple queries to be performed concurrently 101 // as it only acquires the read lock at IcingSearchEngine's level. 102 // Finer-grained locks are implemented around code paths that write changes to 103 // Icing during Search. 104 optional bool use_read_only_search = 9 [default = true]; 105} 106 107// Client-supplied specifications on what to include/how to format the search 108// results. 109// Next tag: 9 110message ResultSpecProto { 111 // The results will be returned in pages, and num_per_page specifies the 112 // number of documents in one page. 113 optional int32 num_per_page = 1 [default = 10]; 114 115 // Whether to collect and return debug_info in the SearchResultProto. 116 optional bool debug_info = 2; 117 118 // How to provide snippeting information in the SearchResultProto. 119 // Next tag: 5 120 message SnippetSpecProto { 121 // Only the first num_to_snippet documents will have snippet information 122 // provided. If set to 0, snippeting is disabled. 123 optional int32 num_to_snippet = 1; 124 125 // Only the first num_matches_per_property matches for a single section will 126 // have snippet information provided. If set to 0, snippeting is disabled. 127 optional int32 num_matches_per_property = 2; 128 129 // How large of a window to provide. Windows start at 130 // max_window_utf32_length / 2 bytes before the middle of the matching token 131 // and end at max_window_utf32_length / 2 bytes after the middle of the 132 // matching token. Windowing respects token boundaries. Therefore, the 133 // returned window may be smaller than requested. Setting 134 // max_window_utf32_length to 0 will disable windowing information. If 135 // matches enabled is also set to false, then snippeting is disabled. Ex. 136 // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz" 137 // will return a window of "bar baz bat" which is only 11 bytes long. 138 optional int32 max_window_utf32_length = 3; 139 } 140 optional SnippetSpecProto snippet_spec = 3; 141 142 // How to specify a subset of properties to retrieve. If no type property mask 143 // has been specified for a schema type, then *all* properties of that schema 144 // type will be retrieved. 145 repeated TypePropertyMask type_property_masks = 4; 146 147 // Groupings of namespaces and schema types whose total returned results 148 // should be limited together. 149 // Next tag: 3 150 message ResultGrouping { 151 // Grouping of namespace and schema type. 152 // Next tag: 3 153 message Entry { 154 // The namespace in this grouping that should be returned. 155 // This field should be empty if ResultGroupingType is SCHEMA_TYPE 156 optional string namespace = 1; 157 158 // The schema in this grouping that should be returned. 159 // This field should be empty if ResultGroupingType is NAMESPACE 160 optional string schema = 2; 161 } 162 163 // Identifier for namespace and schema type pairs. 164 repeated Entry entry_groupings = 1; 165 166 // The maximum number of results in this grouping that should be returned. 167 optional int32 max_results = 2; 168 } 169 170 // How to limit the number of results returned per set of namespaces or schema 171 // type. If results match for a namespace or schema type that is not present 172 // in any result groupings, then those results will be returned without limit. 173 // 174 // Non-existent namespaces and/or schema type will be ignored. 175 // 176 // Example : Suppose that there are four namespaces each with three results 177 // matching the query for "foo". Without any result groupings, Icing would 178 // return the following results: 179 // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1", 180 // "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"]. 181 // 182 // The following result groupings will be returned if that the 183 // ResultGroupingType is set to NAMESPACE: 184 // [ { [ {"namespace0"} ], 2 }, { [ {"namespace1"}, {"namespace2"} ], 2} ] 185 // 186 // The following results will be returned: 187 // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1", 188 // "ns3doc2"]. 189 repeated ResultGrouping result_groupings = 5; 190 191 // The threshold of total bytes of all documents to cutoff, in order to limit 192 // # of bytes in a single page. 193 // Note that it doesn't guarantee the result # of bytes will be smaller, equal 194 // to, or larger than the threshold. Instead, it is just a threshold to 195 // cutoff, and only guarantees total bytes of search results will exceed the 196 // threshold by less than the size of the final search result. 197 optional int32 num_total_bytes_per_page_threshold = 6 198 [default = 2147483647]; // INT_MAX 199 200 // The value by which the search results will get grouped by. 201 // Can get grouped by schema type, namespace (default), or by namespace and 202 // schema type. 203 enum ResultGroupingType { 204 NONE = 0; 205 SCHEMA_TYPE = 1; 206 NAMESPACE = 2; 207 NAMESPACE_AND_SCHEMA_TYPE = 3; 208 } 209 optional ResultGroupingType result_group_type = 7; 210 211 // The max # of child documents will be attached and returned in the result 212 // for each parent. It is only used for join API. 213 optional int32 max_joined_children_per_parent_to_return = 8; 214} 215 216// The representation of a single match within a DocumentProto property. 217// 218// Example : A document whose content is "Necesito comprar comida mañana." and a 219// query for "mana" with window=15 220// Next tag: 12 221message SnippetMatchProto { 222 // The index of the byte in the string at which the match begins and the 223 // length in bytes of the match. 224 // 225 // For the example above, the values of these fields would be 226 // exact_match_byte_position=24, exact_match_byte_length=7 "mañana" 227 optional int32 exact_match_byte_position = 2; 228 optional int32 exact_match_byte_length = 3; 229 230 // The length in bytes of the subterm that matches the query. The beginning of 231 // the submatch is the same as exact_match_byte_position. 232 // 233 // For the example above, the value of this field would be 5. With 234 // exact_match_byte_position=24 above, it would produce the substring "maña" 235 optional int32 submatch_byte_length = 10; 236 237 // The index of the UTF-16 code unit in the string at which the match begins 238 // and the length in UTF-16 code units of the match. This is for use with 239 // UTF-16 encoded strings like Java.lang.String. 240 // 241 // For the example above, the values of these fields would be 242 // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana" 243 optional int32 exact_match_utf16_position = 6; 244 optional int32 exact_match_utf16_length = 7; 245 246 // The length in UTF-16 code units of the subterm that matches the query. The 247 // beginning of the submatch is the same as exact_match_utf16_position. This 248 // is for use with UTF-16 encoded strings like Java.lang.String. 249 // 250 // For the example above, the value of this field would be 4. With 251 // exact_match_utf16_position=24 above, it would produce the substring "maña" 252 optional int32 submatch_utf16_length = 11; 253 254 // The index of the byte in the string at which the suggested snippet window 255 // begins and the length in bytes of the window. 256 // 257 // For the example above, the values of these fields would be 258 // window_byte_position=17, window_byte_length=15 "comida mañana." 259 optional int32 window_byte_position = 4; 260 optional int32 window_byte_length = 5; 261 262 // The index of the UTF-16 code unit in the string at which the suggested 263 // snippet window begins and the length in UTF-16 code units of the window. 264 // This is for use with UTF-16 encoded strings like Java.lang.String. 265 // 266 // For the example above, the values of these fields would be 267 // window_utf16_position=17, window_utf16_length=14 "comida mañana." 268 optional int32 window_utf16_position = 8; 269 optional int32 window_utf16_length = 9; 270 271 reserved 1; 272} 273 274// A Proto representing all snippets for a single DocumentProto. 275// Next tag: 2 276message SnippetProto { 277 // A pair of property name and all snippet matches that correspond to the 278 // property values in the corresponding DocumentProto. 279 // Next tag: 3 280 message EntryProto { 281 // A property path indicating which property in the DocumentProto these 282 // snippets correspond to. Property paths will contain 1) property names, 283 // 2) the property separator character '.' used to represent nested property 284 // and 3) indices surrounded by brackets to represent a specific value in 285 // that property. 286 // 287 // Example properties: 288 // - 'body' : the first and only string value of a top-level 289 // property called 'body'. 290 // - 'sender.name' : the first and only string value of a property 291 // called 'name' that is a subproperty of a 292 // property called 'sender'. 293 // - 'bcc[1].emailaddress': the first and only string value of a property 294 // called 'emailaddress' that is a subproperty of 295 // the second document value of a property called 296 // 'bcc'. 297 // - 'attachments[0]' : the first (of more than one) string value of a 298 // property called 'attachments'. 299 // NOTE: If there is only a single value for a property (like 300 // 'sender.name'), then no value index will be added to the property path. 301 // An index of [0] is implied. If there is more than one value for a 302 // property, then the value index will be added to the property path (like 303 // 'attachements[0]'). 304 optional string property_name = 1; 305 306 repeated SnippetMatchProto snippet_matches = 2; 307 } 308 // Properties that do not appear in entries do not contain any matches. 309 repeated EntryProto entries = 1; 310} 311 312// Icing lib-supplied results from a search results. 313// Next tag: 6 314message SearchResultProto { 315 // Status code can be one of: 316 // OK 317 // FAILED_PRECONDITION 318 // INVALID_ARGUMENT 319 // ABORTED 320 // INTERNAL 321 // 322 // See status.proto for more details. 323 // 324 // TODO(b/147699081): Fix error codes: +ABORTED. 325 // go/icing-library-apis. 326 optional StatusProto status = 1; 327 328 // The Results that matched the query. Empty if there was an error. 329 // Next tag: 5 330 message ResultProto { 331 // Document that matches the SearchSpecProto. 332 optional DocumentProto document = 1; 333 334 // Snippeting information for the document if requested in the 335 // ResultSpecProto. A default instance, if not requested. 336 optional SnippetProto snippet = 2; 337 338 // The score that the document was ranked by. The meaning of this score is 339 // determined by ScoringSpecProto.rank_by. 340 optional double score = 3; 341 342 // The child documents that were joined to a parent document. 343 repeated ResultProto joined_results = 4; 344 } 345 repeated ResultProto results = 2; 346 347 // Various debug fields. Not populated if ResultSpecProto.debug_info = false. 348 // Next tag: 4 349 message DebugInfoProto { 350 // The internal representation of the actual query string that was executed. 351 // This may be different from the SearchSpecProto.query if the original 352 // query was malformed. 353 optional string executed_query = 3; 354 355 reserved 1, 2; 356 } 357 optional DebugInfoProto debug_info = 3; 358 359 // An opaque token used internally to keep track of information needed for 360 // pagination. A valid pagination token is required to fetch other pages of 361 // results. A value 0 means that there're no more pages. 362 // LINT.IfChange(next_page_token) 363 optional uint64 next_page_token = 4; 364 // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken) 365 366 // Stats for query execution performance. 367 optional QueryStatsProto query_stats = 5; 368} 369 370// Next tag: 3 371message TypePropertyMask { 372 // The schema type to which these property masks should apply. 373 // If the schema type is the wildcard ("*"), then the type property masks 374 // will apply to all results of types that don't have their own, specific 375 // type property mask entry. 376 optional string schema_type = 1; 377 378 // The property masks specifying the property to be retrieved. Property 379 // masks must be composed only of property names, property separators (the 380 // '.' character). For example, "subject", "recipients.name". Specifying no 381 // property masks will result in *no* properties being retrieved. 382 repeated string paths = 2; 383} 384 385// Next tag: 2 386message GetResultSpecProto { 387 // How to specify a subset of properties to retrieve. If no type property mask 388 // has been specified for a schema type, then *all* properties of that schema 389 // type will be retrieved. 390 repeated TypePropertyMask type_property_masks = 1; 391} 392 393// Next tag: 8 394message SuggestionSpecProto { 395 // REQUIRED: The "raw" prefix string that users may type. For example, "f" 396 // will search for suggested query that start with "f" like "foo", "fool". 397 optional string prefix = 1; 398 399 // OPTIONAL: Only search for suggestions that under the specified namespaces. 400 // If unset, the suggestion will search over all namespaces. Note that this 401 // applies to the entire 'prefix'. To issue different suggestions for 402 // different namespaces, separate RunSuggestion()'s will need to be made. 403 repeated string namespace_filters = 2; 404 405 // REQUIRED: The number of suggestions to be returned. 406 optional int32 num_to_return = 3; 407 408 // Indicates how the suggestion terms should be scored and ranked. 409 optional SuggestionScoringSpecProto scoring_spec = 4; 410 411 // OPTIONAL: Only search for suggestions that under the specified 412 // DocumentUris. If unset, the suggestion will search over all Documents. 413 // 414 // All namespace in the given NamespaceDocumentUriGroup should match the 415 // namespace_filters. i.e. appears in the namespace_filter or namespace_filter 416 // is empty. 417 // 418 // All given NamespaceDocumentUriGroup cannot have empty. Please use the 419 // namespace_filter to exclude a namespace. 420 // 421 // Note that this applies to the entire 'prefix'. To issue different 422 // suggestions for different DocumentIds, separate RunSuggestion()'s will need 423 // to be made. 424 repeated NamespaceDocumentUriGroup document_uri_filters = 5; 425 426 // OPTIONAL: Only search for suggestions that under the specified schemas. 427 // If unset, the suggestion will search over all schema types. Note that this 428 // applies to the entire 'prefix'. To issue different suggestions for 429 // different schema typs, separate RunSuggestion()'s will need to be made. 430 repeated string schema_type_filters = 6; 431 432 // OPTIONAL: Only search for suggestions that under the specified types and 433 // properties. 434 // 435 // If unset, the suggestion will search over all types. 436 // If the TypePropertyMask.paths is unset, the suggestion will search over all 437 // properties under the TypePropertyMask.schema_type. 438 // 439 // Note that this applies to the entire 'prefix'. To issue different 440 // suggestions for different types, separate RunSuggestion()'s will need to be 441 // made. 442 repeated TypePropertyMask type_property_filters = 7; 443} 444 445// A group that holds namespace and document_uris under it. 446message NamespaceDocumentUriGroup { 447 optional string namespace_ = 1; 448 repeated string document_uris = 2; 449} 450 451// Next tag: 3 452message SuggestionResponse { 453 message Suggestion { 454 // The suggested query string for client to search for. 455 optional string query = 1; 456 } 457 458 // Status code can be one of: 459 // OK 460 // FAILED_PRECONDITION 461 // INTERNAL 462 // 463 // See status.proto for more details. 464 optional StatusProto status = 1; 465 466 repeated Suggestion suggestions = 2; 467} 468 469// Specification for a left outer join. 470// 471// Next tag: 7 472message JoinSpecProto { 473 // Collection of several specs that will be used for searching and joining 474 // child documents. 475 // 476 // Next tag: 4 477 message NestedSpecProto { 478 // A nested SearchSpec that will be used to retrieve child documents. If you 479 // are only looking to join on a specific type documents, you could set a 480 // schema filter in this SearchSpec. This includes the nested search query. 481 // See SearchSpecProto. 482 optional SearchSpecProto search_spec = 1; 483 484 // A nested ScoringSpec that will be used to score child documents. 485 // See ScoringSpecProto. 486 optional ScoringSpecProto scoring_spec = 2; 487 488 // A nested ResultSpec that will be used to format child documents in the 489 // result joined documents, e.g. snippeting, projection. 490 // See ResultSpecProto. 491 optional ResultSpecProto result_spec = 3; 492 } 493 optional NestedSpecProto nested_spec = 1; 494 495 // The equivalent of a primary key in SQL. This is an expression that will be 496 // used to match child documents from the nested search to this document. One 497 // such expression is qualifiedId(). When used, it means the contents of 498 // child_property_expression property in the child documents must be equal to 499 // the qualified id. 500 // TODO(b/256022027) allow for parent_property_expression to be any property 501 // of the parent document. 502 optional string parent_property_expression = 2; 503 504 // The equivalent of a foreign key in SQL. This defines an equality constraint 505 // between a property in a child document and a property in the parent 506 // document. For example, if you want to join child documents which an 507 // entityId property containing a fully qualified document id, 508 // child_property_expression can be set to "entityId". 509 // TODO(b/256022027) figure out how to allow this to refer to documents 510 // outside of same pkg+db+ns. 511 optional string child_property_expression = 3; 512 513 // The max number of child documents to join to a parent document. 514 // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to 515 // control the number of children that are returned. There is no supported 516 // control for the number of children being scored at this time. 517 optional int32 max_joined_child_count = 4 [deprecated = true]; 518 519 // The strategy by which to score the aggregation of child documents. For 520 // example, you might want to know which entity document has the most actions 521 // taken on it. If JOIN_AGGREGATE_SCORE is used in the base SearchSpecProto, 522 // the COUNT value will rank entity documents based on the number of child 523 // documents. 524 message AggregationScoringStrategy { 525 enum Code { 526 NONE = 0; // No aggregation strategy for child documents and use parent 527 // document score. 528 COUNT = 1; 529 MIN = 2; 530 AVG = 3; 531 MAX = 4; 532 SUM = 5; 533 } 534 } 535 optional AggregationScoringStrategy.Code aggregation_scoring_strategy = 5; 536} 537