1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/status.proto"; 20import "icing/proto/term.proto"; 21 22option java_package = "com.google.android.icing.proto"; 23option java_multiple_files = true; 24option objc_class_prefix = "ICNG"; 25 26// Defines the schema that every Document of a specific "type" should adhere 27// to. These can be considered as definitions of rich structured types for 28// Documents accepted by IcingSearchEngine. 29// 30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be 31// based on types defined in schema.org. This makes the data/config/code more 32// shareable and easier to extend in the future. 33// 34// TODO(cassiewang) Define a sample proto file that can be used by tests and for 35// documentation. 36// 37// Next tag: 9 38message SchemaTypeConfigProto { 39 // REQUIRED: Named type that identifies the structured, logical schema being 40 // defined. 41 // 42 // Recommended format: Human readable string that's one of the types defined 43 // in http://schema.org. Eg: DigitalDocument, Message, Person, etc. 44 optional string schema_type = 1; 45 46 // OPTIONAL: A natural language description of the SchemaTypeConfigProto. 47 // 48 // This string is not used by Icing in any way. It simply exists to allow 49 // users to store semantic information about the SchemaTypeConfigProto for 50 // future retrieval. 51 optional string description = 7; 52 53 // OPTIONAL: Identifies a database that the schema type belongs to. This 54 // groups schema types that are related to each other, which is useful for 55 // setting or retrieving a subset of schema types. If unset, the schema type 56 // will be considered as part of the default empty database group. 57 // 58 // NOTE: Only schemas from one database can be set in a single SetSchema call. 59 // Please use multiple SetSchema calls if you want to set schemas across 60 // multiple databases. 61 optional string database = 8; 62 63 // List of all properties that are supported by Documents of this type. 64 // An Document should never have properties that are not listed here. 65 // 66 // TODO(cassiewang) Figure out if we should disallow, ignore or accept 67 // unknown properties. Accepting them could make switching between versions 68 // easier. 69 repeated PropertyConfigProto properties = 4; 70 71 // Version is an arbitrary number that the client may use to keep track of 72 // different incarnations of the schema. Icing library imposes no requirements 73 // on this field and will not validate it in anyway. If a client calls 74 // SetSchema with a schema that contains one or more new version numbers, then 75 // those version numbers will be updated so long as the SetSchema call 76 // succeeds. Clients are free to leave the version number unset, in which case 77 // it will default to value == 0. 78 optional int32 version = 5; 79 80 // An experimental field to make the type as a subtype of parent_types, which 81 // enables parent_types to be interpreted as its subtypes in the context of 82 // the Search APIs, including schema type filters and projections specified in 83 // TypePropertyMask. 84 repeated string parent_types = 6; 85 86 reserved 2, 3; 87} 88 89// Describes how a string property should be indexed. 90// Next tag: 3 91message StringIndexingConfig { 92 // Indicates how the content of this property should be matched in the index. 93 // 94 // TermMatchType.Code=UNKNOWN 95 // Content in this property will not be tokenized or indexed. Useful if the 96 // data type is not indexable. See schema-util for details. 97 // 98 // TermMatchType.Code=EXACT_ONLY 99 // Content in this property should only be returned for queries matching the 100 // exact tokens appearing in this property. 101 // Ex. A property with "fool" should NOT match a query for "foo". 102 // 103 // TermMatchType.Code=PREFIX 104 // Content in this property should be returned for queries that are either 105 // exact matches or query matches of the tokens appearing in this property. 106 // Ex. A property with "fool" *should* match a query for "foo". 107 // 108 // TermMatchType.Code=STEMMING 109 // Content in this property should also be returned for queries that are stems 110 // of the tokens appearing in this property. 111 // Ex. A property with "running" *should* match a query for "run". 112 // 113 // TODO: b/344915547 - Refactor this to be a repeated field so that clients 114 // can choose multiple fuzzy match types. 115 optional TermMatchType.Code term_match_type = 1; 116 117 message TokenizerType { 118 enum Code { 119 // It is only valid for tokenizer_type to be 'NONE' if the data type is 120 // not indexed. 121 NONE = 0; 122 123 // Tokenization for plain text. 124 PLAIN = 1; 125 126 // Tokenizes text in verbatim. This means no normalization or segmentation 127 // is applied to string values that are tokenized using this type. 128 // Therefore, the output token is equivalent to the raw string text. For 129 // example, "Hello, world!" would be tokenized as "Hello, world!" 130 // preserving punctuation and capitalization, and not creating separate 131 // tokens between the space. 132 VERBATIM = 2; 133 134 // Tokenizes text as an email address. This means it will tokenize a 135 // string into multiple emails, and further tokenize those into parts of 136 // an email address. These parts include the local address, host 137 // components, local components, as well as the name and comments. For 138 // example, "User (comment) <user@domain.com>" would be tokenized into a 139 // "User" name token, a "comment" comment token, a "user" local address, a 140 // "user" local component token, a "domain" host component token, a "com" 141 // host component token, a "user@domain.com" address token, and the entire 142 // original string as an rfc822 token. 143 // See more here: https://datatracker.ietf.org/doc/html/rfc822 144 RFC822 = 3; 145 146 // Tokenizes text as an url address. This tokenizes a url string into a 147 // token for each component in the url, as well as any significant 148 // url suffixes. For example, 149 // https://www.google.com/path/subpath?query#ref would be tokenizes into a 150 // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path 151 // tokens "path", "subpath"; a query token "query"; a reference token 152 // "ref"; and 3 suffix tokens 153 // "https://www.google.com/path/subpath?query#ref", 154 // "www.google.com/path/subpath?query#ref", 155 // "google.com/path/subpath?query#ref". 156 // Currently only supports tokenization of one url string at a time 157 // i.e. the input string cannot have spaces in the middle, but can have 158 // leading or trailing spaces. 159 URL = 4; 160 } 161 } 162 optional TokenizerType.Code tokenizer_type = 2; 163} 164 165// Describes how a document property should be indexed. 166// Next tag: 3 167message DocumentIndexingConfig { 168 // OPTIONAL: Whether nested properties within the document property should be 169 // indexed. If true, then all nested properties will be indexed according to 170 // the property's own indexing configurations. If false, nested documents' 171 // properties will not be indexed even if they have an indexing configuration. 172 // 173 // The default value is false. 174 optional bool index_nested_properties = 1; 175 176 // List of nested properties within the document to index. Only the 177 // provided list of properties will be indexed according to the property's 178 // indexing configurations. 179 // 180 // index_nested_properties must be false in order to use this feature. 181 repeated string indexable_nested_properties_list = 2; 182} 183 184// Describes how a int64 property should be indexed. 185// Next tag: 3 186message IntegerIndexingConfig { 187 // OPTIONAL: Indicates how the int64 contents of this property should be 188 // matched. 189 // 190 // The default value is UNKNOWN. 191 message NumericMatchType { 192 enum Code { 193 // Contents in this property will not be indexed. Useful if the int64 194 // property type is not indexable. 195 UNKNOWN = 0; 196 197 // Contents in this property should only be returned for queries matching 198 // the range. 199 RANGE = 1; 200 } 201 } 202 optional NumericMatchType.Code numeric_match_type = 1; 203} 204 205// Describes how a vector property should be indexed. 206// Next tag: 3 207message EmbeddingIndexingConfig { 208 // OPTIONAL: Indicates how the vector contents of this property should be 209 // matched. 210 // 211 // The default value is UNKNOWN. 212 message EmbeddingIndexingType { 213 enum Code { 214 // Contents in this property will not be indexed. Useful if the vector 215 // property type is not indexable. 216 UNKNOWN = 0; 217 218 // Contents in this property will be indexed for linear search. 219 LINEAR_SEARCH = 1; 220 } 221 } 222 optional EmbeddingIndexingType.Code embedding_indexing_type = 1; 223 224 // OPTIONAL: Indicates whether the vector contents of this property should be 225 // quantized. Quantization can reduce the size of the embedding search index, 226 // potentially leading to faster embedding search due to lower I/O bandwidth. 227 // 228 // Quantization is usually very reliable and in most cases will have a 229 // negligible impact on recall. Using quantization is strongly recommended. 230 // 231 // The default value is NONE. 232 message QuantizationType { 233 enum Code { 234 // Contents in this property will not be quantized. 235 NONE = 0; 236 // Contents in this property will be quantized to 8 bits. 237 QUANTIZE_8_BIT = 1; 238 } 239 } 240 optional QuantizationType.Code quantization_type = 2; 241} 242 243// Describes how a property can be used to join this document with another 244// document. See JoinSpecProto (in search.proto) for more details. 245// Next tag: 4 246message JoinableConfig { 247 // OPTIONAL: Indicates what joinable type the content value of this property 248 // is. 249 // 250 // The default value is NONE. 251 message ValueType { 252 enum Code { 253 // Value in this property is not joinable. 254 NONE = 0; 255 256 // Value in this property is a joinable (string) qualified id, which is 257 // composed of namespace and uri. 258 // See JoinSpecProto (in search.proto) and DocumentProto (in 259 // document.proto) for more details about qualified id, namespace and uri. 260 QUALIFIED_ID = 1; 261 } 262 } 263 optional ValueType.Code value_type = 1; 264 265 // OPTIONAL: Indicates how to propagate the deletion between the document and 266 // the (referenced) joinable document. 267 // 268 // The default value is NONE. 269 // 270 // If delete propagation is enabled (i.e. not NONE), then value_type must be 271 // QUALIFIED_ID. 272 message DeletePropagationType { 273 enum Code { 274 // No delete propagation. 275 NONE = 0; 276 277 // Propagate delete from the referenced document to the doucument. 278 PROPAGATE_FROM = 1; 279 } 280 } 281 optional DeletePropagationType.Code delete_propagation_type = 3; 282 283 reserved 2; 284} 285 286// Describes the schema of a single property of Documents that belong to a 287// specific SchemaTypeConfigProto. These can be considered as a rich, structured 288// type for each property of Documents accepted by IcingSearchEngine. 289// Next tag: 12 290message PropertyConfigProto { 291 // REQUIRED: Name that uniquely identifies a property within an Document of 292 // a specific SchemaTypeConfigProto. 293 // 294 // Recommended format: Human readable string that's one of the properties 295 // defined in schema.org for the parent SchemaTypeConfigProto. 296 // Eg: 'author' for http://schema.org/DigitalDocument. 297 // Eg: 'address' for http://schema.org/Place. 298 optional string property_name = 1; 299 300 // OPTIONAL: A natural language description of the property. 301 // 302 // This string is not used by Icing in any way. It simply exists to allow 303 // users to store semantic information about the PropertyConfigProto for 304 // future retrieval. 305 optional string description = 9; 306 307 // REQUIRED: Physical data-types of the contents of the property. 308 message DataType { 309 enum Code { 310 // This value should never purposely be used. This is used for backwards 311 // compatibility reasons. 312 UNKNOWN = 0; 313 314 STRING = 1; 315 INT64 = 2; 316 DOUBLE = 3; 317 BOOLEAN = 4; 318 319 // Unstructured BLOB. 320 BYTES = 5; 321 322 // Indicates that the property itself is an Document, making it part 323 // a hierarchical Document schema. Any property using this data_type 324 // MUST have a valid 'schema_type'. 325 DOCUMENT = 6; 326 327 // A list of floats. Vector type is used for embedding searches. 328 VECTOR = 7; 329 330 // A handle to uniquely identify a large blob of data. 331 BLOB_HANDLE = 8; 332 } 333 } 334 optional DataType.Code data_type = 2; 335 336 // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise. 337 // Indicates the logical schema-type of the contents of this property. 338 // 339 // TODO(cassiewang): This could be useful for non-document properties, e.g. 340 // to set this field as a schema.org/address for some string property. 341 // Re-evaluate what recommendation we should give clients if we want to start 342 // using this for non-document properties as well. 343 // 344 // Recommended format: Human readable string that is one of the types defined 345 // in schema.org, matching the SchemaTypeConfigProto.schema_type of another 346 // type. 347 optional string schema_type = 3; 348 349 // REQUIRED: The cardinality of the property. 350 message Cardinality { 351 // NOTE: The order of the cardinality is purposefully set to be from least 352 // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it 353 // easier to check if a field is backwards compatible by doing a simple 354 // greater-than/less-than check on the enum ints. Changing/adding new 355 // cardinalities should be done cautiously. 356 enum Code { 357 // This should never purposely be set. This is used for backwards 358 // compatibility reasons. 359 UNKNOWN = 0; 360 361 // Any number of items (including zero) [0...*]. 362 REPEATED = 1; 363 364 // Zero or one value [0,1]. 365 OPTIONAL = 2; 366 367 // Exactly one value [1]. 368 REQUIRED = 3; 369 } 370 } 371 optional Cardinality.Code cardinality = 4; 372 373 // OPTIONAL: Describes how string properties should be indexed. String 374 // properties that do not set the indexing config will not be indexed. 375 optional StringIndexingConfig string_indexing_config = 5; 376 377 // OPTIONAL: Describes how document properties should be indexed. 378 optional DocumentIndexingConfig document_indexing_config = 6; 379 380 // OPTIONAL: Describes how int64 properties should be indexed. Int64 381 // properties that do not set the indexing config will not be indexed. 382 optional IntegerIndexingConfig integer_indexing_config = 7; 383 384 // OPTIONAL: Describes how string properties can be used as a document joining 385 // matcher. 386 // 387 // Note: currently we only support STRING single joining, so if a property is 388 // set as joinable (i.e. joinable_config.content_type is not NONE), then: 389 // - DataType should be STRING. Otherwise joinable_config will be ignored. 390 // - The property itself and any upper-level (nested doc) property should 391 // contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED). 392 optional JoinableConfig joinable_config = 8; 393 394 // OPTIONAL: Describes how vector properties should be indexed. Vector 395 // properties that do not set the indexing config will not be indexed. 396 optional EmbeddingIndexingConfig embedding_indexing_config = 10; 397 398 // OPTIONAL: Describes how a property can be used to for scoring. 399 // 400 // The ScorableType should only be enabled for the following data types: 401 // - INT64 402 // - DOUBLE 403 // - BOOLEAN 404 message ScorableType { 405 enum Code { 406 // This value should not be used on purpose. 407 // It will be treated as DISABLED in icing. 408 UNKNOWN = 0; 409 410 // Property is disabled for scoring. 411 DISABLED = 1; 412 413 // Property is enabled for scoring. 414 ENABLED = 2; 415 } 416 } 417 optional ScorableType.Code scorable_type = 11; 418} 419 420// List of all supported types constitutes the schema used by Icing. 421// Next tag: 2 422message SchemaProto { 423 repeated SchemaTypeConfigProto types = 1; 424} 425 426// Request for a call to IcingSearchEngine.SetSchema 427// Next tag: 4 428message SetSchemaRequestProto { 429 // REQUIRED: The new schema to set. This will replace the existing schema 430 // stored in IcingSearchEngine. 431 // 432 // schema.types is allowed to be empty. In this case, the SetSchema call will 433 // try to delete all types and indexed documents for the provided database, 434 // which is only allowed if ignore_errors_and_delete_documents=true 435 optional SchemaProto schema = 1; 436 437 // OPTIONAL: The database for the set schema request. Only schema types for 438 // this database will be modified. 439 // 440 // For a valid set schema request, this must match the database fields of 441 // schema.types. 442 // 443 // If unset, the default empty database is assumed for the set schema request. 444 optional string database = 2; 445 446 // OPTIONAL: Whether to ignore errors and delete documents when setting the 447 // schema. 448 // 449 // If true, then Icing will try to set the schema even if it is incompatible. 450 // In that case, documents that are invalidated by the new schema would be 451 // deleted from Icing. This cannot be used to force set an invalid schema. 452 // 453 // The default value is false. 454 optional bool ignore_errors_and_delete_documents = 3; 455} 456 457// Result of a call to IcingSearchEngine.SetSchema 458// Next tag: 9 459message SetSchemaResultProto { 460 // Status code can be one of: 461 // OK 462 // INVALID_ARGUMENT 463 // FAILED_PRECONDITION 464 // INTERNAL 465 // 466 // See status.proto for more details. 467 // 468 // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS, 469 // -INTERNAL. go/icing-library-apis. 470 optional StatusProto status = 1; 471 472 // Schema types that existed in the previous schema, but were deleted from the 473 // new schema. If ignore_errors_and_delete_documents=true, then all documents 474 // of these types were also deleted. 475 repeated string deleted_schema_types = 2; 476 477 // Schema types that existed in the previous schema and were incompatible with 478 // the new schema type. If ignore_errors_and_delete_documents=true, then any 479 // documents that fail validation against the new schema types would also be 480 // deleted. 481 repeated string incompatible_schema_types = 3; 482 483 // Schema types that did not exist in the previous schema and were added with 484 // the new schema type. 485 repeated string new_schema_types = 4; 486 487 // Schema types that were changed in a way that was backwards compatible and 488 // didn't invalidate the index. 489 repeated string fully_compatible_changed_schema_types = 5; 490 491 // Schema types that were changed in a way that was backwards compatible, but 492 // invalidated the index. 493 repeated string index_incompatible_changed_schema_types = 6; 494 495 // Overall time used for the function call. 496 optional int32 latency_ms = 7; 497 498 // Schema types that were changed in a way that was backwards compatible, but 499 // invalidated the joinable cache. 500 // 501 // For example, a property was set non joinable in the old schema definition, 502 // but changed to joinable in the new definition. In this case, this property 503 // will be considered join incompatible when setting new schema. 504 repeated string join_incompatible_changed_schema_types = 8; 505} 506 507// Result of a call to IcingSearchEngine.GetSchema 508// Next tag: 3 509message GetSchemaResultProto { 510 // Status code can be one of: 511 // OK 512 // FAILED_PRECONDITION 513 // NOT_FOUND 514 // INTERNAL 515 // 516 // See status.proto for more details. 517 // 518 // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL 519 // go/icing-library-apis. 520 optional StatusProto status = 1; 521 522 // Copy of the Schema proto. Modifying this does not affect the Schema that 523 // IcingSearchEngine holds. 524 optional SchemaProto schema = 2; 525} 526 527// Result of a call to IcingSearchEngine.GetSchemaType 528// Next tag: 3 529message GetSchemaTypeResultProto { 530 // Status code can be one of: 531 // OK 532 // FAILED_PRECONDITION 533 // NOT_FOUND 534 // INTERNAL 535 // 536 // See status.proto for more details. 537 // 538 // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL 539 // go/icing-library-apis. 540 optional StatusProto status = 1; 541 542 // Copy of the SchemaTypeConfig proto with the specified schema_type. 543 // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine 544 // holds. 545 optional SchemaTypeConfigProto schema_type_config = 2; 546} 547