• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/status.proto";
20import "icing/proto/term.proto";
21
22option java_package = "com.google.android.icing.proto";
23option java_multiple_files = true;
24option objc_class_prefix = "ICNG";
25
26// Defines the schema that every Document of a specific "type" should adhere
27// to. These can be considered as definitions of rich structured types for
28// Documents accepted by IcingSearchEngine.
29//
30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
31// based on types defined in schema.org. This makes the data/config/code more
32// shareable and easier to extend in the future.
33//
34// TODO(cassiewang) Define a sample proto file that can be used by tests and for
35// documentation.
36//
37// Next tag: 7
38message SchemaTypeConfigProto {
39  // REQUIRED: Named type that uniquely identifies the structured, logical
40  // schema being defined.
41  //
42  // Recommended format: Human readable string that's one of the types defined
43  // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
44  optional string schema_type = 1;
45
46  // List of all properties that are supported by Documents of this type.
47  // An Document should never have properties that are not listed here.
48  //
49  // TODO(cassiewang) Figure out if we should disallow, ignore or accept
50  // unknown properties. Accepting them could make switching between versions
51  // easier.
52  repeated PropertyConfigProto properties = 4;
53
54  // Version is an arbitrary number that the client may use to keep track of
55  // different incarnations of the schema. Icing library imposes no requirements
56  // on this field and will not validate it in anyway. If a client calls
57  // SetSchema with a schema that contains one or more new version numbers, then
58  // those version numbers will be updated so long as the SetSchema call
59  // succeeds. Clients are free to leave the version number unset, in which case
60  // it will default to value == 0.
61  optional int32 version = 5;
62
63  // An experimental field to make the type as a subtype of parent_types, which
64  // enables parent_types to be interpreted as its subtypes in the context of
65  // the Search APIs, including schema type filters and projections specified in
66  // TypePropertyMask.
67  repeated string parent_types = 6;
68
69  reserved 2, 3;
70}
71
72// Describes how a string property should be indexed.
73// Next tag: 3
74message StringIndexingConfig {
75  // Indicates how the content of this property should be matched in the index.
76  //
77  // TermMatchType.Code=UNKNOWN
78  // Content in this property will not be tokenized or indexed. Useful if the
79  // data type is not indexable. See schema-util for details.
80  //
81  // TermMatchType.Code=EXACT_ONLY
82  // Content in this property should only be returned for queries matching the
83  // exact tokens appearing in this property.
84  // Ex. A property with "fool" should NOT match a query for "foo".
85  //
86  // TermMatchType.Code=PREFIX
87  // Content in this property should be returned for queries that are either
88  // exact matches or query matches of the tokens appearing in this property.
89  // Ex. A property with "fool" *should* match a query for "foo".
90  optional TermMatchType.Code term_match_type = 1;
91
92  message TokenizerType {
93    enum Code {
94      // It is only valid for tokenizer_type to be 'NONE' if the data type is
95      // not indexed.
96      NONE = 0;
97
98      // Tokenization for plain text.
99      PLAIN = 1;
100
101      // Tokenizes text in verbatim. This means no normalization or segmentation
102      // is applied to string values that are tokenized using this type.
103      // Therefore, the output token is equivalent to the raw string text. For
104      // example, "Hello, world!" would be tokenized as "Hello, world!"
105      // preserving punctuation and capitalization, and not creating separate
106      // tokens between the space.
107      VERBATIM = 2;
108
109      // Tokenizes text as an email address. This means it will tokenize a
110      // string into multiple emails, and further tokenize those into parts of
111      // an email address. These parts include the local address, host
112      // components, local components, as well as the name and comments. For
113      // example, "User (comment) <user@domain.com>" would be tokenized into a
114      // "User" name token, a "comment" comment token, a "user" local address, a
115      // "user" local component token, a "domain" host component token, a "com"
116      // host component token, a "user@domain.com" address token, and the entire
117      // original string as an rfc822 token.
118      // See more here: https://datatracker.ietf.org/doc/html/rfc822
119      RFC822 = 3;
120
121      // Tokenizes text as an url address. This tokenizes a url string into a
122      // token for each component in the url, as well as any significant
123      // url suffixes. For example,
124      // https://www.google.com/path/subpath?query#ref would be tokenizes into a
125      // scheme token "https“; 3 host tokens "www", "google", "com"; 2 path
126      // tokens "path", "subpath"; a query token "query"; a reference token
127      // "ref"; and 3 suffix tokens
128      // "https://www.google.com/path/subpath?query#ref",
129      // "www.google.com/path/subpath?query#ref",
130      // "google.com/path/subpath?query#ref".
131      // Currently only supports tokenization of one url string at a time
132      // i.e. the input string cannot have spaces in the middle, but can have
133      // leading or trailing spaces.
134      URL = 4;
135    }
136  }
137  optional TokenizerType.Code tokenizer_type = 2;
138}
139
140// Describes how a document property should be indexed.
141// Next tag: 2
142message DocumentIndexingConfig {
143  // OPTIONAL: Whether nested properties within the document property should be
144  // indexed. If true, then the nested properties will be indexed according to
145  // the property's own indexing configurations. If false, nested documents'
146  // properties will not be indexed even if they have an indexing configuration.
147  //
148  // The default value is false.
149  optional bool index_nested_properties = 1;
150}
151
152// Describes how a int64 property should be indexed.
153// Next tag: 3
154message IntegerIndexingConfig {
155  // OPTIONAL: Indicates how the int64 contents of this property should be
156  // matched.
157  //
158  // The default value is UNKNOWN.
159  message NumericMatchType {
160    enum Code {
161      // Contents in this property will not be indexed. Useful if the int64
162      // property type is not indexable.
163      UNKNOWN = 0;
164
165      // Contents in this property should only be returned for queries matching
166      // the range.
167      RANGE = 1;
168    }
169  }
170  optional NumericMatchType.Code numeric_match_type = 1;
171}
172
173// Describes how a property can be used to join this document with another
174// document. See JoinSpecProto (in search.proto) for more details.
175// Next tag: 3
176message JoinableConfig {
177  // OPTIONAL: Indicates what joinable type the content value of this property
178  // is.
179  //
180  // The default value is NONE.
181  message ValueType {
182    enum Code {
183      // Value in this property is not joinable.
184      NONE = 0;
185
186      // Value in this property is a joinable (string) qualified id, which is
187      // composed of namespace and uri.
188      // See JoinSpecProto (in search.proto) and DocumentProto (in
189      // document.proto) for more details about qualified id, namespace and uri.
190      QUALIFIED_ID = 1;
191    }
192  }
193  optional ValueType.Code value_type = 1;
194
195  // If the parent document a child document is joined to is deleted, delete the
196  // child document as well. This will only apply to children joined through
197  // QUALIFIED_ID, other (future) joinable value types won't use it.
198  optional bool propagate_delete = 2 [default = false];
199}
200
201// Describes the schema of a single property of Documents that belong to a
202// specific SchemaTypeConfigProto. These can be considered as a rich, structured
203// type for each property of Documents accepted by IcingSearchEngine.
204// Next tag: 9
205message PropertyConfigProto {
206  // REQUIRED: Name that uniquely identifies a property within an Document of
207  // a specific SchemaTypeConfigProto.
208  //
209  // Recommended format: Human readable string that's one of the properties
210  // defined in schema.org for the parent SchemaTypeConfigProto.
211  // Eg: 'author' for http://schema.org/DigitalDocument.
212  // Eg: 'address' for http://schema.org/Place.
213  optional string property_name = 1;
214
215  // REQUIRED: Physical data-types of the contents of the property.
216  message DataType {
217    enum Code {
218      // This value should never purposely be used. This is used for backwards
219      // compatibility reasons.
220      UNKNOWN = 0;
221
222      STRING = 1;
223      INT64 = 2;
224      DOUBLE = 3;
225      BOOLEAN = 4;
226
227      // Unstructured BLOB.
228      BYTES = 5;
229
230      // Indicates that the property itself is an Document, making it part
231      // a hierarchical Document schema. Any property using this data_type
232      // MUST have a valid 'schema_type'.
233      DOCUMENT = 6;
234    }
235  }
236  optional DataType.Code data_type = 2;
237
238  // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
239  // Indicates the logical schema-type of the contents of this property.
240  //
241  // TODO(cassiewang): This could be useful for non-document properties, e.g.
242  // to set this field as a schema.org/address for some string property.
243  // Re-evaluate what recommendation we should give clients if we want to start
244  // using this for non-document properties as well.
245  //
246  // Recommended format: Human readable string that is one of the types defined
247  // in schema.org, matching the SchemaTypeConfigProto.schema_type of another
248  // type.
249  optional string schema_type = 3;
250
251  // REQUIRED: The cardinality of the property.
252  message Cardinality {
253    // NOTE: The order of the cardinality is purposefully set to be from least
254    // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
255    // easier to check if a field is backwards compatible by doing a simple
256    // greater-than/less-than check on the enum ints. Changing/adding new
257    // cardinalities should be done cautiously.
258    enum Code {
259      // This should never purposely be set. This is used for backwards
260      // compatibility reasons.
261      UNKNOWN = 0;
262
263      // Any number of items (including zero) [0...*].
264      REPEATED = 1;
265
266      // Zero or one value [0,1].
267      OPTIONAL = 2;
268
269      // Exactly one value [1].
270      REQUIRED = 3;
271    }
272  }
273  optional Cardinality.Code cardinality = 4;
274
275  // OPTIONAL: Describes how string properties should be indexed. String
276  // properties that do not set the indexing config will not be indexed.
277  optional StringIndexingConfig string_indexing_config = 5;
278
279  // OPTIONAL: Describes how document properties should be indexed.
280  optional DocumentIndexingConfig document_indexing_config = 6;
281
282  // OPTIONAL: Describes how int64 properties should be indexed. Int64
283  // properties that do not set the indexing config will not be indexed.
284  optional IntegerIndexingConfig integer_indexing_config = 7;
285
286  // OPTIONAL: Describes how string properties can be used as a document joining
287  // matcher.
288  //
289  // Note: currently we only support STRING single joining, so if a property is
290  // set as joinable (i.e. joinable_config.content_type is not NONE), then:
291  // - DataType should be STRING. Otherwise joinable_config will be ignored.
292  // - The property itself and any upper-level (nested doc) property should
293  //   contain at most one element (i.e. Cardinality is OPTIONAL or REQUIRED).
294  optional JoinableConfig joinable_config = 8;
295}
296
297// List of all supported types constitutes the schema used by Icing.
298// Next tag: 2
299message SchemaProto {
300  repeated SchemaTypeConfigProto types = 1;
301}
302
303// Result of a call to IcingSearchEngine.SetSchema
304// Next tag: 9
305message SetSchemaResultProto {
306  // Status code can be one of:
307  //   OK
308  //   INVALID_ARGUMENT
309  //   FAILED_PRECONDITION
310  //   INTERNAL
311  //
312  // See status.proto for more details.
313  //
314  // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
315  // -INTERNAL. go/icing-library-apis.
316  optional StatusProto status = 1;
317
318  // Schema types that existed in the previous schema, but were deleted from the
319  // new schema. If ignore_errors_and_delete_documents=true, then all documents
320  // of these types were also deleted.
321  repeated string deleted_schema_types = 2;
322
323  // Schema types that existed in the previous schema and were incompatible with
324  // the new schema type. If ignore_errors_and_delete_documents=true, then any
325  // documents that fail validation against the new schema types would also be
326  // deleted.
327  repeated string incompatible_schema_types = 3;
328
329  // Schema types that did not exist in the previous schema and were added with
330  // the new schema type.
331  repeated string new_schema_types = 4;
332
333  // Schema types that were changed in a way that was backwards compatible and
334  // didn't invalidate the index.
335  repeated string fully_compatible_changed_schema_types = 5;
336
337  // Schema types that were changed in a way that was backwards compatible, but
338  // invalidated the index.
339  repeated string index_incompatible_changed_schema_types = 6;
340
341  // Overall time used for the function call.
342  optional int32 latency_ms = 7;
343
344  // Schema types that were changed in a way that was backwards compatible, but
345  // invalidated the joinable cache.
346  //
347  // For example, a property was set non joinable in the old schema definition,
348  // but changed to joinable in the new definition. In this case, this property
349  // will be considered join incompatible when setting new schema.
350  repeated string join_incompatible_changed_schema_types = 8;
351}
352
353// Result of a call to IcingSearchEngine.GetSchema
354// Next tag: 3
355message GetSchemaResultProto {
356  // Status code can be one of:
357  //   OK
358  //   FAILED_PRECONDITION
359  //   NOT_FOUND
360  //   INTERNAL
361  //
362  // See status.proto for more details.
363  //
364  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
365  // go/icing-library-apis.
366  optional StatusProto status = 1;
367
368  // Copy of the Schema proto. Modifying this does not affect the Schema that
369  // IcingSearchEngine holds.
370  optional SchemaProto schema = 2;
371}
372
373// Result of a call to IcingSearchEngine.GetSchemaType
374// Next tag: 3
375message GetSchemaTypeResultProto {
376  // Status code can be one of:
377  //   OK
378  //   FAILED_PRECONDITION
379  //   NOT_FOUND
380  //   INTERNAL
381  //
382  // See status.proto for more details.
383  //
384  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
385  // go/icing-library-apis.
386  optional StatusProto status = 1;
387
388  // Copy of the SchemaTypeConfig proto with the specified schema_type.
389  // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
390  // holds.
391  optional SchemaTypeConfigProto schema_type_config = 2;
392}
393