• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/status.proto";
20import "icing/proto/term.proto";
21
22option java_package = "com.google.android.icing.proto";
23option java_multiple_files = true;
24option objc_class_prefix = "ICNG";
25
26// Defines the schema that every Document of a specific "type" should adhere
27// to. These can be considered as definitions of rich structured types for
28// Documents accepted by IcingSearchEngine.
29//
30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
31// based on types defined in schema.org. This makes the data/config/code more
32// shareable and easier to extend in the future.
33//
34// TODO(cassiewang) Define a sample proto file that can be used by tests and for
35// documentation.
36//
37// Next tag: 9
38message SchemaTypeConfigProto {
39  // REQUIRED: Named type that identifies the structured, logical schema being
40  // defined.
41  //
42  // Recommended format: Human readable string that's one of the types defined
43  // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
44  optional string schema_type = 1;
45
46  // OPTIONAL: A natural language description of the SchemaTypeConfigProto.
47  //
48  // This string is not used by Icing in any way. It simply exists to allow
49  // users to store semantic information about the SchemaTypeConfigProto for
50  // future retrieval.
51  optional string description = 7;
52
53  // OPTIONAL: Identifies a database that the schema type belongs to. This
54  // groups schema types that are related to each other, which is useful for
55  // setting or retrieving a subset of schema types. If unset, the schema type
56  // will be considered as part of the default empty database group.
57  //
58  // NOTE: Only schemas from one database can be set in a single SetSchema call.
59  // Please use multiple SetSchema calls if you want to set schemas across
60  // multiple databases.
61  optional string database = 8;
62
63  // List of all properties that are supported by Documents of this type.
64  // An Document should never have properties that are not listed here.
65  //
66  // TODO(cassiewang) Figure out if we should disallow, ignore or accept
67  // unknown properties. Accepting them could make switching between versions
68  // easier.
69  repeated PropertyConfigProto properties = 4;
70
71  // Version is an arbitrary number that the client may use to keep track of
72  // different incarnations of the schema. Icing library imposes no requirements
73  // on this field and will not validate it in anyway. If a client calls
74  // SetSchema with a schema that contains one or more new version numbers, then
75  // those version numbers will be updated so long as the SetSchema call
76  // succeeds. Clients are free to leave the version number unset, in which case
77  // it will default to value == 0.
78  optional int32 version = 5;
79
80  // An experimental field to make the type as a subtype of parent_types, which
81  // enables parent_types to be interpreted as its subtypes in the context of
82  // the Search APIs, including schema type filters and projections specified in
83  // TypePropertyMask.
84  repeated string parent_types = 6;
85
86  reserved 2, 3;
87}
88
89// Describes how a string property should be indexed.
90// Next tag: 3
91message StringIndexingConfig {
92  // Indicates how the content of this property should be matched in the index.
93  //
94  // TermMatchType.Code=UNKNOWN
95  // Content in this property will not be tokenized or indexed. Useful if the
96  // data type is not indexable. See schema-util for details.
97  //
98  // TermMatchType.Code=EXACT_ONLY
99  // Content in this property should only be returned for queries matching the
100  // exact tokens appearing in this property.
101  // Ex. A property with "fool" should NOT match a query for "foo".
102  //
103  // TermMatchType.Code=PREFIX
104  // Content in this property should be returned for queries that are either
105  // exact matches or query matches of the tokens appearing in this property.
106  // Ex. A property with "fool" *should* match a query for "foo".
107  //
108  // TermMatchType.Code=STEMMING
109  // Content in this property should also be returned for queries that are stems
110  // of the tokens appearing in this property.
111  // Ex. A property with "running" *should* match a query for "run".
112  //
113  // TODO: b/344915547 - Refactor this to be a repeated field so that clients
114  // can choose multiple fuzzy match types.
115  optional TermMatchType.Code term_match_type = 1;
116
117  message TokenizerType {
118    enum Code {
119      // It is only valid for tokenizer_type to be 'NONE' if the data type is
120      // not indexed.
121      NONE = 0;
122
123      // Tokenization for plain text.
124      PLAIN = 1;
125
126      // Tokenizes text in verbatim. This means no normalization or segmentation
127      // is applied to string values that are tokenized using this type.
128      // Therefore, the output token is equivalent to the raw string text. For
129      // example, "Hello, world!" would be tokenized as "Hello, world!"
130      // preserving punctuation and capitalization, and not creating separate
131      // tokens between the space.
132      VERBATIM = 2;
133
134      // Tokenizes text as an email address. This means it will tokenize a
135      // string into multiple emails, and further tokenize those into parts of
136      // an email address. These parts include the local address, host
137      // components, local components, as well as the name and comments. For
138      // example, "User (comment) <user@domain.com>" would be tokenized into a
139      // "User" name token, a "comment" comment token, a "user" local address, a
140      // "user" local component token, a "domain" host component token, a "com"
141      // host component token, a "user@domain.com" address token, and the entire
142      // original string as an rfc822 token.
143      // See more here: https://datatracker.ietf.org/doc/html/rfc822
144      RFC822 = 3;
145
146      // Tokenizes text as an url address. This tokenizes a url string into a
147      // token for each component in the url, as well as any significant
148      // url suffixes. For example,
149      // https://www.google.com/path/subpath?query#ref would be tokenizes into a
150      // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path
151      // tokens "path", "subpath"; a query token "query"; a reference token
152      // "ref"; and 3 suffix tokens
153      // "https://www.google.com/path/subpath?query#ref",
154      // "www.google.com/path/subpath?query#ref",
155      // "google.com/path/subpath?query#ref".
156      // Currently only supports tokenization of one url string at a time
157      // i.e. the input string cannot have spaces in the middle, but can have
158      // leading or trailing spaces.
159      URL = 4;
160    }
161  }
162  optional TokenizerType.Code tokenizer_type = 2;
163}
164
165// Describes how a document property should be indexed.
166// Next tag: 3
167message DocumentIndexingConfig {
168  // OPTIONAL: Whether nested properties within the document property should be
169  // indexed. If true, then all nested properties will be indexed according to
170  // the property's own indexing configurations. If false, nested documents'
171  // properties will not be indexed even if they have an indexing configuration.
172  //
173  // The default value is false.
174  optional bool index_nested_properties = 1;
175
176  // List of nested properties within the document to index. Only the
177  // provided list of properties will be indexed according to the property's
178  // indexing configurations.
179  //
180  // index_nested_properties must be false in order to use this feature.
181  repeated string indexable_nested_properties_list = 2;
182}
183
184// Describes how a int64 property should be indexed.
185// Next tag: 3
186message IntegerIndexingConfig {
187  // OPTIONAL: Indicates how the int64 contents of this property should be
188  // matched.
189  //
190  // The default value is UNKNOWN.
191  message NumericMatchType {
192    enum Code {
193      // Contents in this property will not be indexed. Useful if the int64
194      // property type is not indexable.
195      UNKNOWN = 0;
196
197      // Contents in this property should only be returned for queries matching
198      // the range.
199      RANGE = 1;
200    }
201  }
202  optional NumericMatchType.Code numeric_match_type = 1;
203}
204
205// Describes how a vector property should be indexed.
206// Next tag: 3
207message EmbeddingIndexingConfig {
208  // OPTIONAL: Indicates how the vector contents of this property should be
209  // matched.
210  //
211  // The default value is UNKNOWN.
212  message EmbeddingIndexingType {
213    enum Code {
214      // Contents in this property will not be indexed. Useful if the vector
215      // property type is not indexable.
216      UNKNOWN = 0;
217
218      // Contents in this property will be indexed for linear search.
219      LINEAR_SEARCH = 1;
220    }
221  }
222  optional EmbeddingIndexingType.Code embedding_indexing_type = 1;
223
224  // OPTIONAL: Indicates whether the vector contents of this property should be
225  // quantized. Quantization can reduce the size of the embedding search index,
226  // potentially leading to faster embedding search due to lower I/O bandwidth.
227  //
228  // Quantization is usually very reliable and in most cases will have a
229  // negligible impact on recall. Using quantization is strongly recommended.
230  //
231  // The default value is NONE.
232  message QuantizationType {
233    enum Code {
234      // Contents in this property will not be quantized.
235      NONE = 0;
236      // Contents in this property will be quantized to 8 bits.
237      QUANTIZE_8_BIT = 1;
238    }
239  }
240  optional QuantizationType.Code quantization_type = 2;
241}
242
243// Describes how a property can be used to join this document with another
244// document. See JoinSpecProto (in search.proto) for more details.
245// Next tag: 4
246message JoinableConfig {
247  // OPTIONAL: Indicates what joinable type the content value of this property
248  // is.
249  //
250  // The default value is NONE.
251  message ValueType {
252    enum Code {
253      // Value in this property is not joinable.
254      NONE = 0;
255
256      // Value in this property is a joinable (string) qualified id, which is
257      // composed of namespace and uri.
258      // See JoinSpecProto (in search.proto) and DocumentProto (in
259      // document.proto) for more details about qualified id, namespace and uri.
260      QUALIFIED_ID = 1;
261    }
262  }
263  optional ValueType.Code value_type = 1;
264
265  // OPTIONAL: Indicates how to propagate the deletion between the document and
266  // the (referenced) joinable document.
267  //
268  // The default value is NONE.
269  //
270  // If delete propagation is enabled (i.e. not NONE), then value_type must be
271  // QUALIFIED_ID.
272  message DeletePropagationType {
273    enum Code {
274      // No delete propagation.
275      NONE = 0;
276
277      // Propagate delete from the referenced document to the doucument.
278      PROPAGATE_FROM = 1;
279    }
280  }
281  optional DeletePropagationType.Code delete_propagation_type = 3;
282
283  reserved 2;
284}
285
286// Describes the schema of a single property of Documents that belong to a
287// specific SchemaTypeConfigProto. These can be considered as a rich, structured
288// type for each property of Documents accepted by IcingSearchEngine.
289// Next tag: 12
290message PropertyConfigProto {
291  // REQUIRED: Name that uniquely identifies a property within an Document of
292  // a specific SchemaTypeConfigProto.
293  //
294  // Recommended format: Human readable string that's one of the properties
295  // defined in schema.org for the parent SchemaTypeConfigProto.
296  // Eg: 'author' for http://schema.org/DigitalDocument.
297  // Eg: 'address' for http://schema.org/Place.
298  optional string property_name = 1;
299
300  // OPTIONAL: A natural language description of the property.
301  //
302  // This string is not used by Icing in any way. It simply exists to allow
303  // users to store semantic information about the PropertyConfigProto for
304  // future retrieval.
305  optional string description = 9;
306
307  // REQUIRED: Physical data-types of the contents of the property.
308  message DataType {
309    enum Code {
310      // This value should never purposely be used. This is used for backwards
311      // compatibility reasons.
312      UNKNOWN = 0;
313
314      STRING = 1;
315      INT64 = 2;
316      DOUBLE = 3;
317      BOOLEAN = 4;
318
319      // Unstructured BLOB.
320      BYTES = 5;
321
322      // Indicates that the property itself is an Document, making it part
323      // a hierarchical Document schema. Any property using this data_type
324      // MUST have a valid 'schema_type'.
325      DOCUMENT = 6;
326
327      // A list of floats. Vector type is used for embedding searches.
328      VECTOR = 7;
329
330      // A handle to uniquely identify a large blob of data.
331      BLOB_HANDLE = 8;
332    }
333  }
334  optional DataType.Code data_type = 2;
335
336  // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
337  // Indicates the logical schema-type of the contents of this property.
338  //
339  // TODO(cassiewang): This could be useful for non-document properties, e.g.
340  // to set this field as a schema.org/address for some string property.
341  // Re-evaluate what recommendation we should give clients if we want to start
342  // using this for non-document properties as well.
343  //
344  // Recommended format: Human readable string that is one of the types defined
345  // in schema.org, matching the SchemaTypeConfigProto.schema_type of another
346  // type.
347  optional string schema_type = 3;
348
349  // REQUIRED: The cardinality of the property.
350  message Cardinality {
351    // NOTE: The order of the cardinality is purposefully set to be from least
352    // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
353    // easier to check if a field is backwards compatible by doing a simple
354    // greater-than/less-than check on the enum ints. Changing/adding new
355    // cardinalities should be done cautiously.
356    enum Code {
357      // This should never purposely be set. This is used for backwards
358      // compatibility reasons.
359      UNKNOWN = 0;
360
361      // Any number of items (including zero) [0...*].
362      REPEATED = 1;
363
364      // Zero or one value [0,1].
365      OPTIONAL = 2;
366
367      // Exactly one value [1].
368      REQUIRED = 3;
369    }
370  }
371  optional Cardinality.Code cardinality = 4;
372
373  // OPTIONAL: Describes how string properties should be indexed. String
374  // properties that do not set the indexing config will not be indexed.
375  optional StringIndexingConfig string_indexing_config = 5;
376
377  // OPTIONAL: Describes how document properties should be indexed.
378  optional DocumentIndexingConfig document_indexing_config = 6;
379
380  // OPTIONAL: Describes how int64 properties should be indexed. Int64
381  // properties that do not set the indexing config will not be indexed.
382  optional IntegerIndexingConfig integer_indexing_config = 7;
383
384  // OPTIONAL: Describes how string properties can be used as a document joining
385  // matcher.
386  //
387  // Note: currently we only support STRING single joining, so if a property is
388  // set as joinable (i.e. joinable_config.content_type is not NONE), then:
389  // - DataType should be STRING. Otherwise joinable_config will be ignored.
390  // - The property itself and any upper-level (nested doc) property should
391  //   contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED).
392  optional JoinableConfig joinable_config = 8;
393
394  // OPTIONAL: Describes how vector properties should be indexed. Vector
395  // properties that do not set the indexing config will not be indexed.
396  optional EmbeddingIndexingConfig embedding_indexing_config = 10;
397
398  // OPTIONAL: Describes how a property can be used to for scoring.
399  //
400  // The ScorableType should only be enabled for the following data types:
401  // - INT64
402  // - DOUBLE
403  // - BOOLEAN
404  message ScorableType {
405    enum Code {
406      // This value should not be used on purpose.
407      // It will be treated as DISABLED in icing.
408      UNKNOWN = 0;
409
410      // Property is disabled for scoring.
411      DISABLED = 1;
412
413      // Property is enabled for scoring.
414      ENABLED = 2;
415    }
416  }
417  optional ScorableType.Code scorable_type = 11;
418}
419
420// List of all supported types constitutes the schema used by Icing.
421// Next tag: 2
422message SchemaProto {
423  repeated SchemaTypeConfigProto types = 1;
424}
425
426// Request for a call to IcingSearchEngine.SetSchema
427// Next tag: 4
428message SetSchemaRequestProto {
429  // REQUIRED: The new schema to set. This will replace the existing schema
430  // stored in IcingSearchEngine.
431  //
432  // schema.types is allowed to be empty. In this case, the SetSchema call will
433  // try to delete all types and indexed documents for the provided database,
434  // which is only allowed if ignore_errors_and_delete_documents=true
435  optional SchemaProto schema = 1;
436
437  // OPTIONAL: The database for the set schema request. Only schema types for
438  // this database will be modified.
439  //
440  // For a valid set schema request, this must match the database fields of
441  // schema.types.
442  //
443  // If unset, the default empty database is assumed for the set schema request.
444  optional string database = 2;
445
446  // OPTIONAL: Whether to ignore errors and delete documents when setting the
447  // schema.
448  //
449  // If true, then Icing will try to set the schema even if it is incompatible.
450  // In that case, documents that are invalidated by the new schema would be
451  // deleted from Icing. This cannot be used to force set an invalid schema.
452  //
453  // The default value is false.
454  optional bool ignore_errors_and_delete_documents = 3;
455}
456
457// Result of a call to IcingSearchEngine.SetSchema
458// Next tag: 9
459message SetSchemaResultProto {
460  // Status code can be one of:
461  //   OK
462  //   INVALID_ARGUMENT
463  //   FAILED_PRECONDITION
464  //   INTERNAL
465  //
466  // See status.proto for more details.
467  //
468  // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
469  // -INTERNAL. go/icing-library-apis.
470  optional StatusProto status = 1;
471
472  // Schema types that existed in the previous schema, but were deleted from the
473  // new schema. If ignore_errors_and_delete_documents=true, then all documents
474  // of these types were also deleted.
475  repeated string deleted_schema_types = 2;
476
477  // Schema types that existed in the previous schema and were incompatible with
478  // the new schema type. If ignore_errors_and_delete_documents=true, then any
479  // documents that fail validation against the new schema types would also be
480  // deleted.
481  repeated string incompatible_schema_types = 3;
482
483  // Schema types that did not exist in the previous schema and were added with
484  // the new schema type.
485  repeated string new_schema_types = 4;
486
487  // Schema types that were changed in a way that was backwards compatible and
488  // didn't invalidate the index.
489  repeated string fully_compatible_changed_schema_types = 5;
490
491  // Schema types that were changed in a way that was backwards compatible, but
492  // invalidated the index.
493  repeated string index_incompatible_changed_schema_types = 6;
494
495  // Overall time used for the function call.
496  optional int32 latency_ms = 7;
497
498  // Schema types that were changed in a way that was backwards compatible, but
499  // invalidated the joinable cache.
500  //
501  // For example, a property was set non joinable in the old schema definition,
502  // but changed to joinable in the new definition. In this case, this property
503  // will be considered join incompatible when setting new schema.
504  repeated string join_incompatible_changed_schema_types = 8;
505}
506
507// Result of a call to IcingSearchEngine.GetSchema
508// Next tag: 3
509message GetSchemaResultProto {
510  // Status code can be one of:
511  //   OK
512  //   FAILED_PRECONDITION
513  //   NOT_FOUND
514  //   INTERNAL
515  //
516  // See status.proto for more details.
517  //
518  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
519  // go/icing-library-apis.
520  optional StatusProto status = 1;
521
522  // Copy of the Schema proto. Modifying this does not affect the Schema that
523  // IcingSearchEngine holds.
524  optional SchemaProto schema = 2;
525}
526
527// Result of a call to IcingSearchEngine.GetSchemaType
528// Next tag: 3
529message GetSchemaTypeResultProto {
530  // Status code can be one of:
531  //   OK
532  //   FAILED_PRECONDITION
533  //   NOT_FOUND
534  //   INTERNAL
535  //
536  // See status.proto for more details.
537  //
538  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
539  // go/icing-library-apis.
540  optional StatusProto status = 1;
541
542  // Copy of the SchemaTypeConfig proto with the specified schema_type.
543  // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
544  // holds.
545  optional SchemaTypeConfigProto schema_type_config = 2;
546}
547