1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/status.proto"; 20import "icing/proto/term.proto"; 21 22option java_package = "com.google.android.icing.proto"; 23option java_multiple_files = true; 24option objc_class_prefix = "ICNG"; 25 26// Defines the schema that every Document of a specific "type" should adhere 27// to. These can be considered as definitions of rich structured types for 28// Documents accepted by IcingSearchEngine. 29// 30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be 31// based on types defined in schema.org. This makes the data/config/code more 32// shareable and easier to extend in the future. 33// 34// TODO(cassiewang) Define a sample proto file that can be used by tests and for 35// documentation. 36// 37// Next tag: 7 38message SchemaTypeConfigProto { 39 // REQUIRED: Named type that uniquely identifies the structured, logical 40 // schema being defined. 41 // 42 // Recommended format: Human readable string that's one of the types defined 43 // in http://schema.org. Eg: DigitalDocument, Message, Person, etc. 44 optional string schema_type = 1; 45 46 // List of all properties that are supported by Documents of this type. 47 // An Document should never have properties that are not listed here. 48 // 49 // TODO(cassiewang) Figure out if we should disallow, ignore or accept 50 // unknown properties. Accepting them could make switching between versions 51 // easier. 52 repeated PropertyConfigProto properties = 4; 53 54 // Version is an arbitrary number that the client may use to keep track of 55 // different incarnations of the schema. Icing library imposes no requirements 56 // on this field and will not validate it in anyway. If a client calls 57 // SetSchema with a schema that contains one or more new version numbers, then 58 // those version numbers will be updated so long as the SetSchema call 59 // succeeds. Clients are free to leave the version number unset, in which case 60 // it will default to value == 0. 61 optional int32 version = 5; 62 63 // An experimental field to make the type as a subtype of parent_types, which 64 // enables parent_types to be interpreted as its subtypes in the context of 65 // the Search APIs, including schema type filters and projections specified in 66 // TypePropertyMask. 67 repeated string parent_types = 6; 68 69 reserved 2, 3; 70} 71 72// Describes how a string property should be indexed. 73// Next tag: 3 74message StringIndexingConfig { 75 // Indicates how the content of this property should be matched in the index. 76 // 77 // TermMatchType.Code=UNKNOWN 78 // Content in this property will not be tokenized or indexed. Useful if the 79 // data type is not indexable. See schema-util for details. 80 // 81 // TermMatchType.Code=EXACT_ONLY 82 // Content in this property should only be returned for queries matching the 83 // exact tokens appearing in this property. 84 // Ex. A property with "fool" should NOT match a query for "foo". 85 // 86 // TermMatchType.Code=PREFIX 87 // Content in this property should be returned for queries that are either 88 // exact matches or query matches of the tokens appearing in this property. 89 // Ex. A property with "fool" *should* match a query for "foo". 90 optional TermMatchType.Code term_match_type = 1; 91 92 message TokenizerType { 93 enum Code { 94 // It is only valid for tokenizer_type to be 'NONE' if the data type is 95 // not indexed. 96 NONE = 0; 97 98 // Tokenization for plain text. 99 PLAIN = 1; 100 101 // Tokenizes text in verbatim. This means no normalization or segmentation 102 // is applied to string values that are tokenized using this type. 103 // Therefore, the output token is equivalent to the raw string text. For 104 // example, "Hello, world!" would be tokenized as "Hello, world!" 105 // preserving punctuation and capitalization, and not creating separate 106 // tokens between the space. 107 VERBATIM = 2; 108 109 // Tokenizes text as an email address. This means it will tokenize a 110 // string into multiple emails, and further tokenize those into parts of 111 // an email address. These parts include the local address, host 112 // components, local components, as well as the name and comments. For 113 // example, "User (comment) <user@domain.com>" would be tokenized into a 114 // "User" name token, a "comment" comment token, a "user" local address, a 115 // "user" local component token, a "domain" host component token, a "com" 116 // host component token, a "user@domain.com" address token, and the entire 117 // original string as an rfc822 token. 118 // See more here: https://datatracker.ietf.org/doc/html/rfc822 119 RFC822 = 3; 120 121 // Tokenizes text as an url address. This tokenizes a url string into a 122 // token for each component in the url, as well as any significant 123 // url suffixes. For example, 124 // https://www.google.com/path/subpath?query#ref would be tokenizes into a 125 // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path 126 // tokens "path", "subpath"; a query token "query"; a reference token 127 // "ref"; and 3 suffix tokens 128 // "https://www.google.com/path/subpath?query#ref", 129 // "www.google.com/path/subpath?query#ref", 130 // "google.com/path/subpath?query#ref". 131 // Currently only supports tokenization of one url string at a time 132 // i.e. the input string cannot have spaces in the middle, but can have 133 // leading or trailing spaces. 134 URL = 4; 135 } 136 } 137 optional TokenizerType.Code tokenizer_type = 2; 138} 139 140// Describes how a document property should be indexed. 141// Next tag: 2 142message DocumentIndexingConfig { 143 // OPTIONAL: Whether nested properties within the document property should be 144 // indexed. If true, then the nested properties will be indexed according to 145 // the property's own indexing configurations. If false, nested documents' 146 // properties will not be indexed even if they have an indexing configuration. 147 // 148 // The default value is false. 149 optional bool index_nested_properties = 1; 150} 151 152// Describes how a int64 property should be indexed. 153// Next tag: 3 154message IntegerIndexingConfig { 155 // OPTIONAL: Indicates how the int64 contents of this property should be 156 // matched. 157 // 158 // The default value is UNKNOWN. 159 message NumericMatchType { 160 enum Code { 161 // Contents in this property will not be indexed. Useful if the int64 162 // property type is not indexable. 163 UNKNOWN = 0; 164 165 // Contents in this property should only be returned for queries matching 166 // the range. 167 RANGE = 1; 168 } 169 } 170 optional NumericMatchType.Code numeric_match_type = 1; 171} 172 173// Describes how a property can be used to join this document with another 174// document. See JoinSpecProto (in search.proto) for more details. 175// Next tag: 3 176message JoinableConfig { 177 // OPTIONAL: Indicates what joinable type the content value of this property 178 // is. 179 // 180 // The default value is NONE. 181 message ValueType { 182 enum Code { 183 // Value in this property is not joinable. 184 NONE = 0; 185 186 // Value in this property is a joinable (string) qualified id, which is 187 // composed of namespace and uri. 188 // See JoinSpecProto (in search.proto) and DocumentProto (in 189 // document.proto) for more details about qualified id, namespace and uri. 190 QUALIFIED_ID = 1; 191 } 192 } 193 optional ValueType.Code value_type = 1; 194 195 // If the parent document a child document is joined to is deleted, delete the 196 // child document as well. This will only apply to children joined through 197 // QUALIFIED_ID, other (future) joinable value types won't use it. 198 optional bool propagate_delete = 2 [default = false]; 199} 200 201// Describes the schema of a single property of Documents that belong to a 202// specific SchemaTypeConfigProto. These can be considered as a rich, structured 203// type for each property of Documents accepted by IcingSearchEngine. 204// Next tag: 9 205message PropertyConfigProto { 206 // REQUIRED: Name that uniquely identifies a property within an Document of 207 // a specific SchemaTypeConfigProto. 208 // 209 // Recommended format: Human readable string that's one of the properties 210 // defined in schema.org for the parent SchemaTypeConfigProto. 211 // Eg: 'author' for http://schema.org/DigitalDocument. 212 // Eg: 'address' for http://schema.org/Place. 213 optional string property_name = 1; 214 215 // REQUIRED: Physical data-types of the contents of the property. 216 message DataType { 217 enum Code { 218 // This value should never purposely be used. This is used for backwards 219 // compatibility reasons. 220 UNKNOWN = 0; 221 222 STRING = 1; 223 INT64 = 2; 224 DOUBLE = 3; 225 BOOLEAN = 4; 226 227 // Unstructured BLOB. 228 BYTES = 5; 229 230 // Indicates that the property itself is an Document, making it part 231 // a hierarchical Document schema. Any property using this data_type 232 // MUST have a valid 'schema_type'. 233 DOCUMENT = 6; 234 } 235 } 236 optional DataType.Code data_type = 2; 237 238 // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise. 239 // Indicates the logical schema-type of the contents of this property. 240 // 241 // TODO(cassiewang): This could be useful for non-document properties, e.g. 242 // to set this field as a schema.org/address for some string property. 243 // Re-evaluate what recommendation we should give clients if we want to start 244 // using this for non-document properties as well. 245 // 246 // Recommended format: Human readable string that is one of the types defined 247 // in schema.org, matching the SchemaTypeConfigProto.schema_type of another 248 // type. 249 optional string schema_type = 3; 250 251 // REQUIRED: The cardinality of the property. 252 message Cardinality { 253 // NOTE: The order of the cardinality is purposefully set to be from least 254 // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it 255 // easier to check if a field is backwards compatible by doing a simple 256 // greater-than/less-than check on the enum ints. Changing/adding new 257 // cardinalities should be done cautiously. 258 enum Code { 259 // This should never purposely be set. This is used for backwards 260 // compatibility reasons. 261 UNKNOWN = 0; 262 263 // Any number of items (including zero) [0...*]. 264 REPEATED = 1; 265 266 // Zero or one value [0,1]. 267 OPTIONAL = 2; 268 269 // Exactly one value [1]. 270 REQUIRED = 3; 271 } 272 } 273 optional Cardinality.Code cardinality = 4; 274 275 // OPTIONAL: Describes how string properties should be indexed. String 276 // properties that do not set the indexing config will not be indexed. 277 optional StringIndexingConfig string_indexing_config = 5; 278 279 // OPTIONAL: Describes how document properties should be indexed. 280 optional DocumentIndexingConfig document_indexing_config = 6; 281 282 // OPTIONAL: Describes how int64 properties should be indexed. Int64 283 // properties that do not set the indexing config will not be indexed. 284 optional IntegerIndexingConfig integer_indexing_config = 7; 285 286 // OPTIONAL: Describes how string properties can be used as a document joining 287 // matcher. 288 // 289 // Note: currently we only support STRING single joining, so if a property is 290 // set as joinable (i.e. joinable_config.content_type is not NONE), then: 291 // - DataType should be STRING. Otherwise joinable_config will be ignored. 292 // - The property itself and any upper-level (nested doc) property should 293 // contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED). 294 optional JoinableConfig joinable_config = 8; 295} 296 297// List of all supported types constitutes the schema used by Icing. 298// Next tag: 2 299message SchemaProto { 300 repeated SchemaTypeConfigProto types = 1; 301} 302 303// Result of a call to IcingSearchEngine.SetSchema 304// Next tag: 9 305message SetSchemaResultProto { 306 // Status code can be one of: 307 // OK 308 // INVALID_ARGUMENT 309 // FAILED_PRECONDITION 310 // INTERNAL 311 // 312 // See status.proto for more details. 313 // 314 // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS, 315 // -INTERNAL. go/icing-library-apis. 316 optional StatusProto status = 1; 317 318 // Schema types that existed in the previous schema, but were deleted from the 319 // new schema. If ignore_errors_and_delete_documents=true, then all documents 320 // of these types were also deleted. 321 repeated string deleted_schema_types = 2; 322 323 // Schema types that existed in the previous schema and were incompatible with 324 // the new schema type. If ignore_errors_and_delete_documents=true, then any 325 // documents that fail validation against the new schema types would also be 326 // deleted. 327 repeated string incompatible_schema_types = 3; 328 329 // Schema types that did not exist in the previous schema and were added with 330 // the new schema type. 331 repeated string new_schema_types = 4; 332 333 // Schema types that were changed in a way that was backwards compatible and 334 // didn't invalidate the index. 335 repeated string fully_compatible_changed_schema_types = 5; 336 337 // Schema types that were changed in a way that was backwards compatible, but 338 // invalidated the index. 339 repeated string index_incompatible_changed_schema_types = 6; 340 341 // Overall time used for the function call. 342 optional int32 latency_ms = 7; 343 344 // Schema types that were changed in a way that was backwards compatible, but 345 // invalidated the joinable cache. 346 // 347 // For example, a property was set non joinable in the old schema definition, 348 // but changed to joinable in the new definition. In this case, this property 349 // will be considered join incompatible when setting new schema. 350 repeated string join_incompatible_changed_schema_types = 8; 351} 352 353// Result of a call to IcingSearchEngine.GetSchema 354// Next tag: 3 355message GetSchemaResultProto { 356 // Status code can be one of: 357 // OK 358 // FAILED_PRECONDITION 359 // NOT_FOUND 360 // INTERNAL 361 // 362 // See status.proto for more details. 363 // 364 // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL 365 // go/icing-library-apis. 366 optional StatusProto status = 1; 367 368 // Copy of the Schema proto. Modifying this does not affect the Schema that 369 // IcingSearchEngine holds. 370 optional SchemaProto schema = 2; 371} 372 373// Result of a call to IcingSearchEngine.GetSchemaType 374// Next tag: 3 375message GetSchemaTypeResultProto { 376 // Status code can be one of: 377 // OK 378 // FAILED_PRECONDITION 379 // NOT_FOUND 380 // INTERNAL 381 // 382 // See status.proto for more details. 383 // 384 // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL 385 // go/icing-library-apis. 386 optional StatusProto status = 1; 387 388 // Copy of the SchemaTypeConfig proto with the specified schema_type. 389 // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine 390 // holds. 391 optional SchemaTypeConfigProto schema_type_config = 2; 392} 393