1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/status.proto"; 20import "icing/proto/term.proto"; 21 22option java_package = "com.google.android.icing.proto"; 23option java_multiple_files = true; 24option objc_class_prefix = "ICNG"; 25 26// Defines the schema that every Document of a specific "type" should adhere 27// to. These can be considered as definitions of rich structured types for 28// Documents accepted by IcingSearchEngine. 29// 30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be 31// based on types defined in schema.org. This makes the data/config/code more 32// shareable and easier to extend in the future. 33// 34// TODO(cassiewang) Define a sample proto file that can be used by tests and for 35// documentation. 36// 37// Next tag: 8 38message SchemaTypeConfigProto { 39 // REQUIRED: Named type that uniquely identifies the structured, logical 40 // schema being defined. 41 // 42 // Recommended format: Human readable string that's one of the types defined 43 // in http://schema.org. Eg: DigitalDocument, Message, Person, etc. 44 optional string schema_type = 1; 45 46 // OPTIONAL: A natural language description of the SchemaTypeConfigProto. 47 // 48 // This string is not used by Icing in any way. It simply exists to allow 49 // users to store semantic information about the SchemaTypeConfigProto for 50 // future retrieval. 51 optional string description = 7; 52 53 // List of all properties that are supported by Documents of this type. 54 // An Document should never have properties that are not listed here. 55 // 56 // TODO(cassiewang) Figure out if we should disallow, ignore or accept 57 // unknown properties. Accepting them could make switching between versions 58 // easier. 59 repeated PropertyConfigProto properties = 4; 60 61 // Version is an arbitrary number that the client may use to keep track of 62 // different incarnations of the schema. Icing library imposes no requirements 63 // on this field and will not validate it in anyway. If a client calls 64 // SetSchema with a schema that contains one or more new version numbers, then 65 // those version numbers will be updated so long as the SetSchema call 66 // succeeds. Clients are free to leave the version number unset, in which case 67 // it will default to value == 0. 68 optional int32 version = 5; 69 70 // An experimental field to make the type as a subtype of parent_types, which 71 // enables parent_types to be interpreted as its subtypes in the context of 72 // the Search APIs, including schema type filters and projections specified in 73 // TypePropertyMask. 74 repeated string parent_types = 6; 75 76 reserved 2, 3; 77} 78 79// Describes how a string property should be indexed. 80// Next tag: 3 81message StringIndexingConfig { 82 // Indicates how the content of this property should be matched in the index. 83 // 84 // TermMatchType.Code=UNKNOWN 85 // Content in this property will not be tokenized or indexed. Useful if the 86 // data type is not indexable. See schema-util for details. 87 // 88 // TermMatchType.Code=EXACT_ONLY 89 // Content in this property should only be returned for queries matching the 90 // exact tokens appearing in this property. 91 // Ex. A property with "fool" should NOT match a query for "foo". 92 // 93 // TermMatchType.Code=PREFIX 94 // Content in this property should be returned for queries that are either 95 // exact matches or query matches of the tokens appearing in this property. 96 // Ex. A property with "fool" *should* match a query for "foo". 97 optional TermMatchType.Code term_match_type = 1; 98 99 message TokenizerType { 100 enum Code { 101 // It is only valid for tokenizer_type to be 'NONE' if the data type is 102 // not indexed. 103 NONE = 0; 104 105 // Tokenization for plain text. 106 PLAIN = 1; 107 108 // Tokenizes text in verbatim. This means no normalization or segmentation 109 // is applied to string values that are tokenized using this type. 110 // Therefore, the output token is equivalent to the raw string text. For 111 // example, "Hello, world!" would be tokenized as "Hello, world!" 112 // preserving punctuation and capitalization, and not creating separate 113 // tokens between the space. 114 VERBATIM = 2; 115 116 // Tokenizes text as an email address. This means it will tokenize a 117 // string into multiple emails, and further tokenize those into parts of 118 // an email address. These parts include the local address, host 119 // components, local components, as well as the name and comments. For 120 // example, "User (comment) <user@domain.com>" would be tokenized into a 121 // "User" name token, a "comment" comment token, a "user" local address, a 122 // "user" local component token, a "domain" host component token, a "com" 123 // host component token, a "user@domain.com" address token, and the entire 124 // original string as an rfc822 token. 125 // See more here: https://datatracker.ietf.org/doc/html/rfc822 126 RFC822 = 3; 127 128 // Tokenizes text as an url address. This tokenizes a url string into a 129 // token for each component in the url, as well as any significant 130 // url suffixes. For example, 131 // https://www.google.com/path/subpath?query#ref would be tokenizes into a 132 // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path 133 // tokens "path", "subpath"; a query token "query"; a reference token 134 // "ref"; and 3 suffix tokens 135 // "https://www.google.com/path/subpath?query#ref", 136 // "www.google.com/path/subpath?query#ref", 137 // "google.com/path/subpath?query#ref". 138 // Currently only supports tokenization of one url string at a time 139 // i.e. the input string cannot have spaces in the middle, but can have 140 // leading or trailing spaces. 141 URL = 4; 142 } 143 } 144 optional TokenizerType.Code tokenizer_type = 2; 145} 146 147// Describes how a document property should be indexed. 148// Next tag: 3 149message DocumentIndexingConfig { 150 // OPTIONAL: Whether nested properties within the document property should be 151 // indexed. If true, then all nested properties will be indexed according to 152 // the property's own indexing configurations. If false, nested documents' 153 // properties will not be indexed even if they have an indexing configuration. 154 // 155 // The default value is false. 156 optional bool index_nested_properties = 1; 157 158 // List of nested properties within the document to index. Only the 159 // provided list of properties will be indexed according to the property's 160 // indexing configurations. 161 // 162 // index_nested_properties must be false in order to use this feature. 163 repeated string indexable_nested_properties_list = 2; 164} 165 166// Describes how a int64 property should be indexed. 167// Next tag: 3 168message IntegerIndexingConfig { 169 // OPTIONAL: Indicates how the int64 contents of this property should be 170 // matched. 171 // 172 // The default value is UNKNOWN. 173 message NumericMatchType { 174 enum Code { 175 // Contents in this property will not be indexed. Useful if the int64 176 // property type is not indexable. 177 UNKNOWN = 0; 178 179 // Contents in this property should only be returned for queries matching 180 // the range. 181 RANGE = 1; 182 } 183 } 184 optional NumericMatchType.Code numeric_match_type = 1; 185} 186 187// Describes how a vector property should be indexed. 188// Next tag: 3 189message EmbeddingIndexingConfig { 190 // OPTIONAL: Indicates how the vector contents of this property should be 191 // matched. 192 // 193 // The default value is UNKNOWN. 194 message EmbeddingIndexingType { 195 enum Code { 196 // Contents in this property will not be indexed. Useful if the vector 197 // property type is not indexable. 198 UNKNOWN = 0; 199 200 // Contents in this property will be indexed for linear search. 201 LINEAR_SEARCH = 1; 202 } 203 } 204 optional EmbeddingIndexingType.Code embedding_indexing_type = 1; 205} 206 207// Describes how a property can be used to join this document with another 208// document. See JoinSpecProto (in search.proto) for more details. 209// Next tag: 3 210message JoinableConfig { 211 // OPTIONAL: Indicates what joinable type the content value of this property 212 // is. 213 // 214 // The default value is NONE. 215 message ValueType { 216 enum Code { 217 // Value in this property is not joinable. 218 NONE = 0; 219 220 // Value in this property is a joinable (string) qualified id, which is 221 // composed of namespace and uri. 222 // See JoinSpecProto (in search.proto) and DocumentProto (in 223 // document.proto) for more details about qualified id, namespace and uri. 224 QUALIFIED_ID = 1; 225 } 226 } 227 optional ValueType.Code value_type = 1; 228 229 // If the parent document a child document is joined to is deleted, delete the 230 // child document as well. This will only apply to children joined through 231 // QUALIFIED_ID, other (future) joinable value types won't use it. 232 optional bool propagate_delete = 2 [default = false]; 233} 234 235// Describes the schema of a single property of Documents that belong to a 236// specific SchemaTypeConfigProto. These can be considered as a rich, structured 237// type for each property of Documents accepted by IcingSearchEngine. 238// Next tag: 11 239message PropertyConfigProto { 240 // REQUIRED: Name that uniquely identifies a property within an Document of 241 // a specific SchemaTypeConfigProto. 242 // 243 // Recommended format: Human readable string that's one of the properties 244 // defined in schema.org for the parent SchemaTypeConfigProto. 245 // Eg: 'author' for http://schema.org/DigitalDocument. 246 // Eg: 'address' for http://schema.org/Place. 247 optional string property_name = 1; 248 249 // OPTIONAL: A natural language description of the property. 250 // 251 // This string is not used by Icing in any way. It simply exists to allow 252 // users to store semantic information about the PropertyConfigProto for 253 // future retrieval. 254 optional string description = 9; 255 256 // REQUIRED: Physical data-types of the contents of the property. 257 message DataType { 258 enum Code { 259 // This value should never purposely be used. This is used for backwards 260 // compatibility reasons. 261 UNKNOWN = 0; 262 263 STRING = 1; 264 INT64 = 2; 265 DOUBLE = 3; 266 BOOLEAN = 4; 267 268 // Unstructured BLOB. 269 BYTES = 5; 270 271 // Indicates that the property itself is an Document, making it part 272 // a hierarchical Document schema. Any property using this data_type 273 // MUST have a valid 'schema_type'. 274 DOCUMENT = 6; 275 276 // A list of floats. Vector type is used for embedding searches. 277 VECTOR = 7; 278 } 279 } 280 optional DataType.Code data_type = 2; 281 282 // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise. 283 // Indicates the logical schema-type of the contents of this property. 284 // 285 // TODO(cassiewang): This could be useful for non-document properties, e.g. 286 // to set this field as a schema.org/address for some string property. 287 // Re-evaluate what recommendation we should give clients if we want to start 288 // using this for non-document properties as well. 289 // 290 // Recommended format: Human readable string that is one of the types defined 291 // in schema.org, matching the SchemaTypeConfigProto.schema_type of another 292 // type. 293 optional string schema_type = 3; 294 295 // REQUIRED: The cardinality of the property. 296 message Cardinality { 297 // NOTE: The order of the cardinality is purposefully set to be from least 298 // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it 299 // easier to check if a field is backwards compatible by doing a simple 300 // greater-than/less-than check on the enum ints. Changing/adding new 301 // cardinalities should be done cautiously. 302 enum Code { 303 // This should never purposely be set. This is used for backwards 304 // compatibility reasons. 305 UNKNOWN = 0; 306 307 // Any number of items (including zero) [0...*]. 308 REPEATED = 1; 309 310 // Zero or one value [0,1]. 311 OPTIONAL = 2; 312 313 // Exactly one value [1]. 314 REQUIRED = 3; 315 } 316 } 317 optional Cardinality.Code cardinality = 4; 318 319 // OPTIONAL: Describes how string properties should be indexed. String 320 // properties that do not set the indexing config will not be indexed. 321 optional StringIndexingConfig string_indexing_config = 5; 322 323 // OPTIONAL: Describes how document properties should be indexed. 324 optional DocumentIndexingConfig document_indexing_config = 6; 325 326 // OPTIONAL: Describes how int64 properties should be indexed. Int64 327 // properties that do not set the indexing config will not be indexed. 328 optional IntegerIndexingConfig integer_indexing_config = 7; 329 330 // OPTIONAL: Describes how string properties can be used as a document joining 331 // matcher. 332 // 333 // Note: currently we only support STRING single joining, so if a property is 334 // set as joinable (i.e. joinable_config.content_type is not NONE), then: 335 // - DataType should be STRING. Otherwise joinable_config will be ignored. 336 // - The property itself and any upper-level (nested doc) property should 337 // contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED). 338 optional JoinableConfig joinable_config = 8; 339 340 // OPTIONAL: Describes how vector properties should be indexed. Vector 341 // properties that do not set the indexing config will not be indexed. 342 optional EmbeddingIndexingConfig embedding_indexing_config = 10; 343} 344 345// List of all supported types constitutes the schema used by Icing. 346// Next tag: 2 347message SchemaProto { 348 repeated SchemaTypeConfigProto types = 1; 349} 350 351// Result of a call to IcingSearchEngine.SetSchema 352// Next tag: 9 353message SetSchemaResultProto { 354 // Status code can be one of: 355 // OK 356 // INVALID_ARGUMENT 357 // FAILED_PRECONDITION 358 // INTERNAL 359 // 360 // See status.proto for more details. 361 // 362 // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS, 363 // -INTERNAL. go/icing-library-apis. 364 optional StatusProto status = 1; 365 366 // Schema types that existed in the previous schema, but were deleted from the 367 // new schema. If ignore_errors_and_delete_documents=true, then all documents 368 // of these types were also deleted. 369 repeated string deleted_schema_types = 2; 370 371 // Schema types that existed in the previous schema and were incompatible with 372 // the new schema type. If ignore_errors_and_delete_documents=true, then any 373 // documents that fail validation against the new schema types would also be 374 // deleted. 375 repeated string incompatible_schema_types = 3; 376 377 // Schema types that did not exist in the previous schema and were added with 378 // the new schema type. 379 repeated string new_schema_types = 4; 380 381 // Schema types that were changed in a way that was backwards compatible and 382 // didn't invalidate the index. 383 repeated string fully_compatible_changed_schema_types = 5; 384 385 // Schema types that were changed in a way that was backwards compatible, but 386 // invalidated the index. 387 repeated string index_incompatible_changed_schema_types = 6; 388 389 // Overall time used for the function call. 390 optional int32 latency_ms = 7; 391 392 // Schema types that were changed in a way that was backwards compatible, but 393 // invalidated the joinable cache. 394 // 395 // For example, a property was set non joinable in the old schema definition, 396 // but changed to joinable in the new definition. In this case, this property 397 // will be considered join incompatible when setting new schema. 398 repeated string join_incompatible_changed_schema_types = 8; 399} 400 401// Result of a call to IcingSearchEngine.GetSchema 402// Next tag: 3 403message GetSchemaResultProto { 404 // Status code can be one of: 405 // OK 406 // FAILED_PRECONDITION 407 // NOT_FOUND 408 // INTERNAL 409 // 410 // See status.proto for more details. 411 // 412 // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL 413 // go/icing-library-apis. 414 optional StatusProto status = 1; 415 416 // Copy of the Schema proto. Modifying this does not affect the Schema that 417 // IcingSearchEngine holds. 418 optional SchemaProto schema = 2; 419} 420 421// Result of a call to IcingSearchEngine.GetSchemaType 422// Next tag: 3 423message GetSchemaTypeResultProto { 424 // Status code can be one of: 425 // OK 426 // FAILED_PRECONDITION 427 // NOT_FOUND 428 // INTERNAL 429 // 430 // See status.proto for more details. 431 // 432 // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL 433 // go/icing-library-apis. 434 optional StatusProto status = 1; 435 436 // Copy of the SchemaTypeConfig proto with the specified schema_type. 437 // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine 438 // holds. 439 optional SchemaTypeConfigProto schema_type_config = 2; 440} 441