• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/status.proto";
20import "icing/proto/term.proto";
21
22option java_package = "com.google.android.icing.proto";
23option java_multiple_files = true;
24option objc_class_prefix = "ICNG";
25
26// Defines the schema that every Document of a specific "type" should adhere
27// to. These can be considered as definitions of rich structured types for
28// Documents accepted by IcingSearchEngine.
29//
30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
31// based on types defined in schema.org. This makes the data/config/code more
32// shareable and easier to extend in the future.
33//
34// TODO(cassiewang) Define a sample proto file that can be used by tests and for
35// documentation.
36//
37// Next tag: 8
38message SchemaTypeConfigProto {
39  // REQUIRED: Named type that uniquely identifies the structured, logical
40  // schema being defined.
41  //
42  // Recommended format: Human readable string that's one of the types defined
43  // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
44  optional string schema_type = 1;
45
46  // OPTIONAL: A natural language description of the SchemaTypeConfigProto.
47  //
48  // This string is not used by Icing in any way. It simply exists to allow
49  // users to store semantic information about the SchemaTypeConfigProto for
50  // future retrieval.
51  optional string description = 7;
52
53  // List of all properties that are supported by Documents of this type.
54  // An Document should never have properties that are not listed here.
55  //
56  // TODO(cassiewang) Figure out if we should disallow, ignore or accept
57  // unknown properties. Accepting them could make switching between versions
58  // easier.
59  repeated PropertyConfigProto properties = 4;
60
61  // Version is an arbitrary number that the client may use to keep track of
62  // different incarnations of the schema. Icing library imposes no requirements
63  // on this field and will not validate it in anyway. If a client calls
64  // SetSchema with a schema that contains one or more new version numbers, then
65  // those version numbers will be updated so long as the SetSchema call
66  // succeeds. Clients are free to leave the version number unset, in which case
67  // it will default to value == 0.
68  optional int32 version = 5;
69
70  // An experimental field to make the type as a subtype of parent_types, which
71  // enables parent_types to be interpreted as its subtypes in the context of
72  // the Search APIs, including schema type filters and projections specified in
73  // TypePropertyMask.
74  repeated string parent_types = 6;
75
76  reserved 2, 3;
77}
78
79// Describes how a string property should be indexed.
80// Next tag: 3
81message StringIndexingConfig {
82  // Indicates how the content of this property should be matched in the index.
83  //
84  // TermMatchType.Code=UNKNOWN
85  // Content in this property will not be tokenized or indexed. Useful if the
86  // data type is not indexable. See schema-util for details.
87  //
88  // TermMatchType.Code=EXACT_ONLY
89  // Content in this property should only be returned for queries matching the
90  // exact tokens appearing in this property.
91  // Ex. A property with "fool" should NOT match a query for "foo".
92  //
93  // TermMatchType.Code=PREFIX
94  // Content in this property should be returned for queries that are either
95  // exact matches or query matches of the tokens appearing in this property.
96  // Ex. A property with "fool" *should* match a query for "foo".
97  optional TermMatchType.Code term_match_type = 1;
98
99  message TokenizerType {
100    enum Code {
101      // It is only valid for tokenizer_type to be 'NONE' if the data type is
102      // not indexed.
103      NONE = 0;
104
105      // Tokenization for plain text.
106      PLAIN = 1;
107
108      // Tokenizes text in verbatim. This means no normalization or segmentation
109      // is applied to string values that are tokenized using this type.
110      // Therefore, the output token is equivalent to the raw string text. For
111      // example, "Hello, world!" would be tokenized as "Hello, world!"
112      // preserving punctuation and capitalization, and not creating separate
113      // tokens between the space.
114      VERBATIM = 2;
115
116      // Tokenizes text as an email address. This means it will tokenize a
117      // string into multiple emails, and further tokenize those into parts of
118      // an email address. These parts include the local address, host
119      // components, local components, as well as the name and comments. For
120      // example, "User (comment) <user@domain.com>" would be tokenized into a
121      // "User" name token, a "comment" comment token, a "user" local address, a
122      // "user" local component token, a "domain" host component token, a "com"
123      // host component token, a "user@domain.com" address token, and the entire
124      // original string as an rfc822 token.
125      // See more here: https://datatracker.ietf.org/doc/html/rfc822
126      RFC822 = 3;
127
128      // Tokenizes text as an url address. This tokenizes a url string into a
129      // token for each component in the url, as well as any significant
130      // url suffixes. For example,
131      // https://www.google.com/path/subpath?query#ref would be tokenizes into a
132      // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path
133      // tokens "path", "subpath"; a query token "query"; a reference token
134      // "ref"; and 3 suffix tokens
135      // "https://www.google.com/path/subpath?query#ref",
136      // "www.google.com/path/subpath?query#ref",
137      // "google.com/path/subpath?query#ref".
138      // Currently only supports tokenization of one url string at a time
139      // i.e. the input string cannot have spaces in the middle, but can have
140      // leading or trailing spaces.
141      URL = 4;
142    }
143  }
144  optional TokenizerType.Code tokenizer_type = 2;
145}
146
147// Describes how a document property should be indexed.
148// Next tag: 3
149message DocumentIndexingConfig {
150  // OPTIONAL: Whether nested properties within the document property should be
151  // indexed. If true, then all nested properties will be indexed according to
152  // the property's own indexing configurations. If false, nested documents'
153  // properties will not be indexed even if they have an indexing configuration.
154  //
155  // The default value is false.
156  optional bool index_nested_properties = 1;
157
158  // List of nested properties within the document to index. Only the
159  // provided list of properties will be indexed according to the property's
160  // indexing configurations.
161  //
162  // index_nested_properties must be false in order to use this feature.
163  repeated string indexable_nested_properties_list = 2;
164}
165
166// Describes how a int64 property should be indexed.
167// Next tag: 3
168message IntegerIndexingConfig {
169  // OPTIONAL: Indicates how the int64 contents of this property should be
170  // matched.
171  //
172  // The default value is UNKNOWN.
173  message NumericMatchType {
174    enum Code {
175      // Contents in this property will not be indexed. Useful if the int64
176      // property type is not indexable.
177      UNKNOWN = 0;
178
179      // Contents in this property should only be returned for queries matching
180      // the range.
181      RANGE = 1;
182    }
183  }
184  optional NumericMatchType.Code numeric_match_type = 1;
185}
186
187// Describes how a vector property should be indexed.
188// Next tag: 3
189message EmbeddingIndexingConfig {
190  // OPTIONAL: Indicates how the vector contents of this property should be
191  // matched.
192  //
193  // The default value is UNKNOWN.
194  message EmbeddingIndexingType {
195    enum Code {
196      // Contents in this property will not be indexed. Useful if the vector
197      // property type is not indexable.
198      UNKNOWN = 0;
199
200      // Contents in this property will be indexed for linear search.
201      LINEAR_SEARCH = 1;
202    }
203  }
204  optional EmbeddingIndexingType.Code embedding_indexing_type = 1;
205}
206
207// Describes how a property can be used to join this document with another
208// document. See JoinSpecProto (in search.proto) for more details.
209// Next tag: 3
210message JoinableConfig {
211  // OPTIONAL: Indicates what joinable type the content value of this property
212  // is.
213  //
214  // The default value is NONE.
215  message ValueType {
216    enum Code {
217      // Value in this property is not joinable.
218      NONE = 0;
219
220      // Value in this property is a joinable (string) qualified id, which is
221      // composed of namespace and uri.
222      // See JoinSpecProto (in search.proto) and DocumentProto (in
223      // document.proto) for more details about qualified id, namespace and uri.
224      QUALIFIED_ID = 1;
225    }
226  }
227  optional ValueType.Code value_type = 1;
228
229  // If the parent document a child document is joined to is deleted, delete the
230  // child document as well. This will only apply to children joined through
231  // QUALIFIED_ID, other (future) joinable value types won't use it.
232  optional bool propagate_delete = 2 [default = false];
233}
234
235// Describes the schema of a single property of Documents that belong to a
236// specific SchemaTypeConfigProto. These can be considered as a rich, structured
237// type for each property of Documents accepted by IcingSearchEngine.
238// Next tag: 11
239message PropertyConfigProto {
240  // REQUIRED: Name that uniquely identifies a property within an Document of
241  // a specific SchemaTypeConfigProto.
242  //
243  // Recommended format: Human readable string that's one of the properties
244  // defined in schema.org for the parent SchemaTypeConfigProto.
245  // Eg: 'author' for http://schema.org/DigitalDocument.
246  // Eg: 'address' for http://schema.org/Place.
247  optional string property_name = 1;
248
249  // OPTIONAL: A natural language description of the property.
250  //
251  // This string is not used by Icing in any way. It simply exists to allow
252  // users to store semantic information about the PropertyConfigProto for
253  // future retrieval.
254  optional string description = 9;
255
256  // REQUIRED: Physical data-types of the contents of the property.
257  message DataType {
258    enum Code {
259      // This value should never purposely be used. This is used for backwards
260      // compatibility reasons.
261      UNKNOWN = 0;
262
263      STRING = 1;
264      INT64 = 2;
265      DOUBLE = 3;
266      BOOLEAN = 4;
267
268      // Unstructured BLOB.
269      BYTES = 5;
270
271      // Indicates that the property itself is an Document, making it part
272      // a hierarchical Document schema. Any property using this data_type
273      // MUST have a valid 'schema_type'.
274      DOCUMENT = 6;
275
276      // A list of floats. Vector type is used for embedding searches.
277      VECTOR = 7;
278    }
279  }
280  optional DataType.Code data_type = 2;
281
282  // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
283  // Indicates the logical schema-type of the contents of this property.
284  //
285  // TODO(cassiewang): This could be useful for non-document properties, e.g.
286  // to set this field as a schema.org/address for some string property.
287  // Re-evaluate what recommendation we should give clients if we want to start
288  // using this for non-document properties as well.
289  //
290  // Recommended format: Human readable string that is one of the types defined
291  // in schema.org, matching the SchemaTypeConfigProto.schema_type of another
292  // type.
293  optional string schema_type = 3;
294
295  // REQUIRED: The cardinality of the property.
296  message Cardinality {
297    // NOTE: The order of the cardinality is purposefully set to be from least
298    // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
299    // easier to check if a field is backwards compatible by doing a simple
300    // greater-than/less-than check on the enum ints. Changing/adding new
301    // cardinalities should be done cautiously.
302    enum Code {
303      // This should never purposely be set. This is used for backwards
304      // compatibility reasons.
305      UNKNOWN = 0;
306
307      // Any number of items (including zero) [0...*].
308      REPEATED = 1;
309
310      // Zero or one value [0,1].
311      OPTIONAL = 2;
312
313      // Exactly one value [1].
314      REQUIRED = 3;
315    }
316  }
317  optional Cardinality.Code cardinality = 4;
318
319  // OPTIONAL: Describes how string properties should be indexed. String
320  // properties that do not set the indexing config will not be indexed.
321  optional StringIndexingConfig string_indexing_config = 5;
322
323  // OPTIONAL: Describes how document properties should be indexed.
324  optional DocumentIndexingConfig document_indexing_config = 6;
325
326  // OPTIONAL: Describes how int64 properties should be indexed. Int64
327  // properties that do not set the indexing config will not be indexed.
328  optional IntegerIndexingConfig integer_indexing_config = 7;
329
330  // OPTIONAL: Describes how string properties can be used as a document joining
331  // matcher.
332  //
333  // Note: currently we only support STRING single joining, so if a property is
334  // set as joinable (i.e. joinable_config.content_type is not NONE), then:
335  // - DataType should be STRING. Otherwise joinable_config will be ignored.
336  // - The property itself and any upper-level (nested doc) property should
337  //   contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED).
338  optional JoinableConfig joinable_config = 8;
339
340  // OPTIONAL: Describes how vector properties should be indexed. Vector
341  // properties that do not set the indexing config will not be indexed.
342  optional EmbeddingIndexingConfig embedding_indexing_config = 10;
343}
344
345// List of all supported types constitutes the schema used by Icing.
346// Next tag: 2
347message SchemaProto {
348  repeated SchemaTypeConfigProto types = 1;
349}
350
351// Result of a call to IcingSearchEngine.SetSchema
352// Next tag: 9
353message SetSchemaResultProto {
354  // Status code can be one of:
355  //   OK
356  //   INVALID_ARGUMENT
357  //   FAILED_PRECONDITION
358  //   INTERNAL
359  //
360  // See status.proto for more details.
361  //
362  // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
363  // -INTERNAL. go/icing-library-apis.
364  optional StatusProto status = 1;
365
366  // Schema types that existed in the previous schema, but were deleted from the
367  // new schema. If ignore_errors_and_delete_documents=true, then all documents
368  // of these types were also deleted.
369  repeated string deleted_schema_types = 2;
370
371  // Schema types that existed in the previous schema and were incompatible with
372  // the new schema type. If ignore_errors_and_delete_documents=true, then any
373  // documents that fail validation against the new schema types would also be
374  // deleted.
375  repeated string incompatible_schema_types = 3;
376
377  // Schema types that did not exist in the previous schema and were added with
378  // the new schema type.
379  repeated string new_schema_types = 4;
380
381  // Schema types that were changed in a way that was backwards compatible and
382  // didn't invalidate the index.
383  repeated string fully_compatible_changed_schema_types = 5;
384
385  // Schema types that were changed in a way that was backwards compatible, but
386  // invalidated the index.
387  repeated string index_incompatible_changed_schema_types = 6;
388
389  // Overall time used for the function call.
390  optional int32 latency_ms = 7;
391
392  // Schema types that were changed in a way that was backwards compatible, but
393  // invalidated the joinable cache.
394  //
395  // For example, a property was set non joinable in the old schema definition,
396  // but changed to joinable in the new definition. In this case, this property
397  // will be considered join incompatible when setting new schema.
398  repeated string join_incompatible_changed_schema_types = 8;
399}
400
401// Result of a call to IcingSearchEngine.GetSchema
402// Next tag: 3
403message GetSchemaResultProto {
404  // Status code can be one of:
405  //   OK
406  //   FAILED_PRECONDITION
407  //   NOT_FOUND
408  //   INTERNAL
409  //
410  // See status.proto for more details.
411  //
412  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
413  // go/icing-library-apis.
414  optional StatusProto status = 1;
415
416  // Copy of the Schema proto. Modifying this does not affect the Schema that
417  // IcingSearchEngine holds.
418  optional SchemaProto schema = 2;
419}
420
421// Result of a call to IcingSearchEngine.GetSchemaType
422// Next tag: 3
423message GetSchemaTypeResultProto {
424  // Status code can be one of:
425  //   OK
426  //   FAILED_PRECONDITION
427  //   NOT_FOUND
428  //   INTERNAL
429  //
430  // See status.proto for more details.
431  //
432  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
433  // go/icing-library-apis.
434  optional StatusProto status = 1;
435
436  // Copy of the SchemaTypeConfig proto with the specified schema_type.
437  // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
438  // holds.
439  optional SchemaTypeConfigProto schema_type_config = 2;
440}
441