1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/document.proto"; 20import "icing/proto/logging.proto"; 21import "icing/proto/scoring.proto"; 22import "icing/proto/status.proto"; 23import "icing/proto/term.proto"; 24 25option java_package = "com.google.android.icing.proto"; 26option java_multiple_files = true; 27option objc_class_prefix = "ICNG"; 28 29// Client-supplied specifications on what documents to retrieve. 30// Next tag: 13 31message SearchSpecProto { 32 // REQUIRED: The "raw" query string that users may type. For example, "cat" 33 // will search for documents with the term cat in it. 34 optional string query = 1; 35 36 // Indicates how the query terms should match terms in the index. 37 // 38 // TermMatchType.Code=UNKNOWN 39 // Should never purposely be set and may lead to undefined behavior. This is 40 // used for backwards compatibility reasons. 41 // 42 // TermMatchType.Code=EXACT_ONLY 43 // Query terms will only match exact tokens in the index. 44 // Ex. A query term "foo" will only match indexed token "foo", and not "foot" 45 // or "football" 46 // 47 // TermMatchType.Code=PREFIX 48 // Query terms will match indexed tokens when the query term is a prefix of 49 // the token. 50 // Ex. A query term "foo" will match indexed tokens like "foo", "foot", and 51 // "football". 52 optional TermMatchType.Code term_match_type = 2; 53 54 // OPTIONAL: Only search for documents that have the specified namespaces. If 55 // unset, the query will search over all namespaces. Note that this applies to 56 // the entire 'query'. To issue different queries for different namespaces, 57 // separate Search()'s will need to be made. 58 repeated string namespace_filters = 3; 59 60 // OPTIONAL: Only search for documents that have the specified schema types. 61 // If unset, the query will search over all schema types. Note that this 62 // applies to the entire 'query'. To issue different queries for different 63 // schema types, separate Search()'s will need to be made. Also note that 64 // schema filters will not be expanded for polymorphism. 65 repeated string schema_type_filters = 4; 66 67 // Timestamp taken just before sending proto across the JNI boundary from java 68 // to native side. 69 optional int64 java_to_native_start_timestamp_ms = 5; 70 71 // OPTIONAL: If this field is present, join documents based on a nested 72 // SearchSpec. 73 optional JoinSpecProto join_spec = 7; 74 75 // Features enabled in this search spec. 76 repeated string enabled_features = 8; 77 78 // OPTIONAL: Whether to use the read-only implementation of 79 // IcingSearchEngine::Search. 80 // The read-only version enables multiple queries to be performed concurrently 81 // as it only acquires the read lock at IcingSearchEngine's level. 82 // Finer-grained locks are implemented around code paths that write changes to 83 // Icing during Search. 84 optional bool use_read_only_search = 9 [default = true]; 85 86 // TODO(b/294266822): Handle multiple property filter lists for same schema 87 // type. 88 // How to specify a subset of properties to be searched. If no type property 89 // filter has been specified for a schema type (no TypePropertyMask for the 90 // given schema type), then *all* properties of that schema type will be 91 // searched. If an empty property filter is specified for a given schema type 92 // (TypePropertyMask for the given schema type has empty paths field), no 93 // properties of that schema type will be searched. 94 repeated TypePropertyMask type_property_filters = 10; 95 96 // The vectors to be used in embedding queries. 97 repeated PropertyProto.VectorProto embedding_query_vectors = 11; 98 99 message EmbeddingQueryMetricType { 100 enum Code { 101 UNKNOWN = 0; 102 COSINE = 1; 103 DOT_PRODUCT = 2; 104 EUCLIDEAN = 3; 105 } 106 } 107 108 // The default metric type used to calculate the scores for embedding 109 // queries. 110 optional EmbeddingQueryMetricType.Code embedding_query_metric_type = 12; 111 112 reserved 6; 113} 114 115// Client-supplied specifications on what to include/how to format the search 116// results. 117// Next tag: 10 118message ResultSpecProto { 119 // The results will be returned in pages, and num_per_page specifies the 120 // number of documents in one page. 121 optional int32 num_per_page = 1 [default = 10]; 122 123 // Whether to collect and return debug_info in the SearchResultProto. 124 optional bool debug_info = 2; 125 126 // How to provide snippeting information in the SearchResultProto. 127 // Next tag: 5 128 message SnippetSpecProto { 129 // Only the first num_to_snippet documents will have snippet information 130 // provided. If set to 0, snippeting is disabled. 131 optional int32 num_to_snippet = 1; 132 133 // Only the first num_matches_per_property matches for a single section will 134 // have snippet information provided. If set to 0, snippeting is disabled. 135 optional int32 num_matches_per_property = 2; 136 137 // How large of a window to provide. Windows start at 138 // max_window_utf32_length / 2 bytes before the middle of the matching token 139 // and end at max_window_utf32_length / 2 bytes after the middle of the 140 // matching token. Windowing respects token boundaries. Therefore, the 141 // returned window may be smaller than requested. Setting 142 // max_window_utf32_length to 0 will disable windowing information. If 143 // matches enabled is also set to false, then snippeting is disabled. Ex. 144 // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz" 145 // will return a window of "bar baz bat" which is only 11 bytes long. 146 optional int32 max_window_utf32_length = 3; 147 } 148 optional SnippetSpecProto snippet_spec = 3; 149 150 // How to specify a subset of properties to retrieve. If no type property mask 151 // has been specified for a schema type, then *all* properties of that schema 152 // type will be retrieved. 153 repeated TypePropertyMask type_property_masks = 4; 154 155 // Groupings of namespaces and schema types whose total returned results 156 // should be limited together. 157 // Next tag: 3 158 message ResultGrouping { 159 // Grouping of namespace and schema type. 160 // Next tag: 3 161 message Entry { 162 // The namespace in this grouping that should be returned. 163 // This field should be empty if ResultGroupingType is SCHEMA_TYPE 164 optional string namespace = 1; 165 166 // The schema in this grouping that should be returned. 167 // This field should be empty if ResultGroupingType is NAMESPACE 168 optional string schema = 2; 169 } 170 171 // Identifier for namespace and schema type pairs. 172 repeated Entry entry_groupings = 1; 173 174 // The maximum number of results in this grouping that should be returned. 175 optional int32 max_results = 2; 176 } 177 178 // How to limit the number of results returned per set of namespaces or schema 179 // type. If results match for a namespace or schema type that is not present 180 // in any result groupings, then those results will be returned without limit. 181 // 182 // Non-existent namespaces and/or schema type will be ignored. 183 // 184 // Example : Suppose that there are four namespaces each with three results 185 // matching the query for "foo". Without any result groupings, Icing would 186 // return the following results: 187 // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1", 188 // "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"]. 189 // 190 // The following result groupings will be returned if that the 191 // ResultGroupingType is set to NAMESPACE: 192 // [ { [ {"namespace0"} ], 2 }, { [ {"namespace1"}, {"namespace2"} ], 2} ] 193 // 194 // The following results will be returned: 195 // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1", 196 // "ns3doc2"]. 197 repeated ResultGrouping result_groupings = 5; 198 199 // The threshold of total bytes of all documents to cutoff, in order to limit 200 // # of bytes in a single page. 201 // Note that it doesn't guarantee the result # of bytes will be smaller, equal 202 // to, or larger than the threshold. Instead, it is just a threshold to 203 // cutoff, and only guarantees total bytes of search results will exceed the 204 // threshold by less than the size of the final search result. 205 optional int32 num_total_bytes_per_page_threshold = 6 206 [default = 2147483647]; // INT_MAX 207 208 // The value by which the search results will get grouped by. 209 // Can get grouped by schema type, namespace (default), or by namespace and 210 // schema type. 211 enum ResultGroupingType { 212 NONE = 0; 213 SCHEMA_TYPE = 1; 214 NAMESPACE = 2; 215 NAMESPACE_AND_SCHEMA_TYPE = 3; 216 } 217 optional ResultGroupingType result_group_type = 7; 218 219 // The max # of child documents will be attached and returned in the result 220 // for each parent. It is only used for join API. 221 optional int32 max_joined_children_per_parent_to_return = 8; 222 223 // The max # of results being scored and ranked. 224 // Running time of ScoringProcessor and Ranker is O(num_to_score) according to 225 // results of //icing/scoring:score-and-rank_benchmark. Note that 226 // the process includes scoring, building a heap, and popping results from the 227 // heap. 228 // 229 // 30000 results can be scored and ranked within 3 ms on a Pixel 3 XL 230 // according to results of 231 // //icing/scoring:score-and-rank_benchmark, so set it as the 232 // default value. 233 optional int32 num_to_score = 9 [default = 30000]; 234} 235 236// The representation of a single match within a DocumentProto property. 237// 238// Example : A document whose content is "Necesito comprar comida mañana." and a 239// query for "mana" with window=15 240// Next tag: 12 241message SnippetMatchProto { 242 // The index of the byte in the string at which the match begins and the 243 // length in bytes of the match. 244 // 245 // For the example above, the values of these fields would be 246 // exact_match_byte_position=24, exact_match_byte_length=7 "mañana" 247 optional int32 exact_match_byte_position = 2; 248 optional int32 exact_match_byte_length = 3; 249 250 // The length in bytes of the subterm that matches the query. The beginning of 251 // the submatch is the same as exact_match_byte_position. 252 // 253 // For the example above, the value of this field would be 5. With 254 // exact_match_byte_position=24 above, it would produce the substring "maña" 255 optional int32 submatch_byte_length = 10; 256 257 // The index of the UTF-16 code unit in the string at which the match begins 258 // and the length in UTF-16 code units of the match. This is for use with 259 // UTF-16 encoded strings like Java.lang.String. 260 // 261 // For the example above, the values of these fields would be 262 // exact_match_utf16_position=24, exact_match_utf16_length=6 "mañana" 263 optional int32 exact_match_utf16_position = 6; 264 optional int32 exact_match_utf16_length = 7; 265 266 // The length in UTF-16 code units of the subterm that matches the query. The 267 // beginning of the submatch is the same as exact_match_utf16_position. This 268 // is for use with UTF-16 encoded strings like Java.lang.String. 269 // 270 // For the example above, the value of this field would be 4. With 271 // exact_match_utf16_position=24 above, it would produce the substring "maña" 272 optional int32 submatch_utf16_length = 11; 273 274 // The index of the byte in the string at which the suggested snippet window 275 // begins and the length in bytes of the window. 276 // 277 // For the example above, the values of these fields would be 278 // window_byte_position=17, window_byte_length=15 "comida mañana." 279 optional int32 window_byte_position = 4; 280 optional int32 window_byte_length = 5; 281 282 // The index of the UTF-16 code unit in the string at which the suggested 283 // snippet window begins and the length in UTF-16 code units of the window. 284 // This is for use with UTF-16 encoded strings like Java.lang.String. 285 // 286 // For the example above, the values of these fields would be 287 // window_utf16_position=17, window_utf16_length=14 "comida mañana." 288 optional int32 window_utf16_position = 8; 289 optional int32 window_utf16_length = 9; 290 291 reserved 1; 292} 293 294// A Proto representing all snippets for a single DocumentProto. 295// Next tag: 2 296message SnippetProto { 297 // A pair of property name and all snippet matches that correspond to the 298 // property values in the corresponding DocumentProto. 299 // Next tag: 3 300 message EntryProto { 301 // A property path indicating which property in the DocumentProto these 302 // snippets correspond to. Property paths will contain 1) property names, 303 // 2) the property separator character '.' used to represent nested property 304 // and 3) indices surrounded by brackets to represent a specific value in 305 // that property. 306 // 307 // Example properties: 308 // - 'body' : the first and only string value of a top-level 309 // property called 'body'. 310 // - 'sender.name' : the first and only string value of a property 311 // called 'name' that is a subproperty of a 312 // property called 'sender'. 313 // - 'bcc[1].emailaddress': the first and only string value of a property 314 // called 'emailaddress' that is a subproperty of 315 // the second document value of a property called 316 // 'bcc'. 317 // - 'attachments[0]' : the first (of more than one) string value of a 318 // property called 'attachments'. 319 // NOTE: If there is only a single value for a property (like 320 // 'sender.name'), then no value index will be added to the property path. 321 // An index of [0] is implied. If there is more than one value for a 322 // property, then the value index will be added to the property path (like 323 // 'attachements[0]'). 324 optional string property_name = 1; 325 326 repeated SnippetMatchProto snippet_matches = 2; 327 } 328 // Properties that do not appear in entries do not contain any matches. 329 repeated EntryProto entries = 1; 330} 331 332// Icing lib-supplied results from a search results. 333// Next tag: 6 334message SearchResultProto { 335 // Status code can be one of: 336 // OK 337 // FAILED_PRECONDITION 338 // INVALID_ARGUMENT 339 // ABORTED 340 // INTERNAL 341 // 342 // See status.proto for more details. 343 // 344 // TODO(b/147699081): Fix error codes: +ABORTED. 345 // go/icing-library-apis. 346 optional StatusProto status = 1; 347 348 // The Results that matched the query. Empty if there was an error. 349 // Next tag: 6 350 message ResultProto { 351 // Document that matches the SearchSpecProto. 352 optional DocumentProto document = 1; 353 354 // Snippeting information for the document if requested in the 355 // ResultSpecProto. A default instance, if not requested. 356 optional SnippetProto snippet = 2; 357 358 // The score that the document was ranked by. The meaning of this score is 359 // determined by ScoringSpecProto.rank_by. 360 optional double score = 3; 361 362 // The child documents that were joined to a parent document. 363 repeated ResultProto joined_results = 4; 364 365 // Extra helpful scores as specified by 366 // ScoringSpecProto.additional_advanced_scoring_expressions. The scores will 367 // not be used for ranking. 368 repeated double additional_scores = 5 [packed = true]; 369 } 370 repeated ResultProto results = 2; 371 372 // Various debug fields. Not populated if ResultSpecProto.debug_info = false. 373 // Next tag: 4 374 message DebugInfoProto { 375 // The internal representation of the actual query string that was executed. 376 // This may be different from the SearchSpecProto.query if the original 377 // query was malformed. 378 optional string executed_query = 3; 379 380 reserved 1, 2; 381 } 382 optional DebugInfoProto debug_info = 3; 383 384 // An opaque token used internally to keep track of information needed for 385 // pagination. A valid pagination token is required to fetch other pages of 386 // results. A value 0 means that there're no more pages. 387 // LINT.IfChange(next_page_token) 388 optional uint64 next_page_token = 4; 389 // LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken) 390 391 // Stats for query execution performance. 392 optional QueryStatsProto query_stats = 5; 393} 394 395// Next tag: 3 396message TypePropertyMask { 397 // The schema type to which these property masks should apply. 398 // If the schema type is the wildcard ("*"), then the type property masks 399 // will apply to all results of types that don't have their own, specific 400 // type property mask entry. 401 optional string schema_type = 1; 402 403 // The property masks specifying the property to be retrieved. Property 404 // masks must be composed only of property names, property separators (the 405 // '.' character). For example, "subject", "recipients.name". Specifying no 406 // property masks will result in *no* properties being retrieved. 407 repeated string paths = 2; 408} 409 410// Next tag: 2 411message GetResultSpecProto { 412 // How to specify a subset of properties to retrieve. If no type property mask 413 // has been specified for a schema type, then *all* properties of that schema 414 // type will be retrieved. 415 repeated TypePropertyMask type_property_masks = 1; 416} 417 418// Next tag: 8 419message SuggestionSpecProto { 420 // REQUIRED: The "raw" prefix string that users may type. For example, "f" 421 // will search for suggested query that start with "f" like "foo", "fool". 422 optional string prefix = 1; 423 424 // OPTIONAL: Only search for suggestions that under the specified namespaces. 425 // If unset, the suggestion will search over all namespaces. Note that this 426 // applies to the entire 'prefix'. To issue different suggestions for 427 // different namespaces, separate RunSuggestion()'s will need to be made. 428 repeated string namespace_filters = 2; 429 430 // REQUIRED: The number of suggestions to be returned. 431 optional int32 num_to_return = 3; 432 433 // Indicates how the suggestion terms should be scored and ranked. 434 optional SuggestionScoringSpecProto scoring_spec = 4; 435 436 // OPTIONAL: Only search for suggestions that under the specified 437 // DocumentUris. If unset, the suggestion will search over all Documents. 438 // 439 // All namespace in the given NamespaceDocumentUriGroup should match the 440 // namespace_filters. i.e. appears in the namespace_filter or namespace_filter 441 // is empty. 442 // 443 // All given NamespaceDocumentUriGroup cannot have empty. Please use the 444 // namespace_filter to exclude a namespace. 445 // 446 // Note that this applies to the entire 'prefix'. To issue different 447 // suggestions for different DocumentIds, separate RunSuggestion()'s will need 448 // to be made. 449 repeated NamespaceDocumentUriGroup document_uri_filters = 5; 450 451 // OPTIONAL: Only search for suggestions that under the specified schemas. 452 // If unset, the suggestion will search over all schema types. Note that this 453 // applies to the entire 'prefix'. To issue different suggestions for 454 // different schema typs, separate RunSuggestion()'s will need to be made. 455 // Also note that schema filters will not be expanded for polymorphism. 456 repeated string schema_type_filters = 6; 457 458 // OPTIONAL: Only search for suggestions that under the specified types and 459 // properties. 460 // 461 // If unset, the suggestion will search over all types. 462 // If the TypePropertyMask.paths is unset, the suggestion will search over all 463 // properties under the TypePropertyMask.schema_type. 464 // 465 // Note that this applies to the entire 'prefix'. To issue different 466 // suggestions for different types, separate RunSuggestion()'s will need to be 467 // made. 468 repeated TypePropertyMask type_property_filters = 7; 469} 470 471// A group that holds namespace and document_uris under it. 472message NamespaceDocumentUriGroup { 473 optional string namespace_ = 1; 474 repeated string document_uris = 2; 475} 476 477// Next tag: 3 478message SuggestionResponse { 479 message Suggestion { 480 // The suggested query string for client to search for. 481 optional string query = 1; 482 } 483 484 // Status code can be one of: 485 // OK 486 // FAILED_PRECONDITION 487 // INTERNAL 488 // 489 // See status.proto for more details. 490 optional StatusProto status = 1; 491 492 repeated Suggestion suggestions = 2; 493} 494 495// Specification for a left outer join. 496// 497// Next tag: 7 498message JoinSpecProto { 499 // Collection of several specs that will be used for searching and joining 500 // child documents. 501 // 502 // Next tag: 4 503 message NestedSpecProto { 504 // A nested SearchSpec that will be used to retrieve child documents. If you 505 // are only looking to join on a specific type documents, you could set a 506 // schema filter in this SearchSpec. This includes the nested search query. 507 // See SearchSpecProto. 508 optional SearchSpecProto search_spec = 1; 509 510 // A nested ScoringSpec that will be used to score child documents. 511 // See ScoringSpecProto. 512 optional ScoringSpecProto scoring_spec = 2; 513 514 // A nested ResultSpec that will be used to format child documents in the 515 // result joined documents, e.g. snippeting, projection. 516 // See ResultSpecProto. 517 optional ResultSpecProto result_spec = 3; 518 } 519 optional NestedSpecProto nested_spec = 1; 520 521 // The equivalent of a primary key in SQL. This is an expression that will be 522 // used to match child documents from the nested search to this document. One 523 // such expression is qualifiedId(). When used, it means the contents of 524 // child_property_expression property in the child documents must be equal to 525 // the qualified id. 526 // TODO(b/256022027) allow for parent_property_expression to be any property 527 // of the parent document. 528 optional string parent_property_expression = 2; 529 530 // The equivalent of a foreign key in SQL. This defines an equality constraint 531 // between a property in a child document and a property in the parent 532 // document. For example, if you want to join child documents which an 533 // entityId property containing a fully qualified document id, 534 // child_property_expression can be set to "entityId". 535 // TODO(b/256022027) figure out how to allow this to refer to documents 536 // outside of same pkg+db+ns. 537 optional string child_property_expression = 3; 538 539 // The max number of child documents to join to a parent document. 540 // DEPRECATED: use ResultSpecProto.max_joined_children_per_parent_to_return to 541 // control the number of children that are returned. There is no supported 542 // control for the number of children being scored at this time. 543 optional int32 max_joined_child_count = 4 [deprecated = true]; 544 545 // The strategy by which to score the aggregation of child documents. For 546 // example, you might want to know which entity document has the most actions 547 // taken on it. If JOIN_AGGREGATE_SCORE is used in the base SearchSpecProto, 548 // the COUNT value will rank entity documents based on the number of child 549 // documents. 550 message AggregationScoringStrategy { 551 enum Code { 552 NONE = 0; // No aggregation strategy for child documents and use parent 553 // document score. 554 COUNT = 1; 555 MIN = 2; 556 AVG = 3; 557 MAX = 4; 558 SUM = 5; 559 } 560 } 561 optional AggregationScoringStrategy.Code aggregation_scoring_strategy = 5; 562} 563