• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/status.proto";
20import "icing/proto/term.proto";
21
22option java_package = "com.google.android.icing.proto";
23option java_multiple_files = true;
24option objc_class_prefix = "ICNG";
25
26// Defines the schema that every Document of a specific "type" should adhere
27// to. These can be considered as definitions of rich structured types for
28// Documents accepted by IcingSearchEngine.
29//
30// NOTE: Instances of SchemaTypeConfigProto are strongly recommended to be
31// based on types defined in schema.org. This makes the data/config/code more
32// shareable and easier to extend in the future.
33//
34// TODO(cassiewang) Define a sample proto file that can be used by tests and for
35// documentation.
36//
37// Next tag: 6
38message SchemaTypeConfigProto {
39  // REQUIRED: Named type that uniquely identifies the structured, logical
40  // schema being defined.
41  //
42  // Recommended format: Human readable string that's one of the types defined
43  // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
44  optional string schema_type = 1;
45
46  // List of all properties that are supported by Documents of this type.
47  // An Document should never have properties that are not listed here.
48  //
49  // TODO(cassiewang) Figure out if we should disallow, ignore or accept
50  // unknown properties. Accepting them could make switching between versions
51  // easier.
52  repeated PropertyConfigProto properties = 4;
53
54  // Version is an arbitrary number that the client may use to keep track of
55  // different incarnations of the schema. Icing library imposes no requirements
56  // on this field and will not validate it in anyway. If a client calls
57  // SetSchema with a schema that contains one or more new version numbers, then
58  // those version numbers will be updated so long as the SetSchema call
59  // succeeds. Clients are free to leave the version number unset, in which case
60  // it will default to value == 0.
61  optional int32 version = 5;
62
63  reserved 2, 3;
64}
65
66// Describes how a string property should be indexed.
67// Next tag: 3
68message StringIndexingConfig {
69  // Indicates how the content of this property should be matched in the index.
70  //
71  // TermMatchType.Code=UNKNOWN
72  // Content in this property will not be tokenized or indexed. Useful if the
73  // data type is not indexable. See schema-util for details.
74  //
75  // TermMatchType.Code=EXACT_ONLY
76  // Content in this property should only be returned for queries matching the
77  // exact tokens appearing in this property.
78  // Ex. A property with "fool" should NOT match a query for "foo".
79  //
80  // TermMatchType.Code=PREFIX
81  // Content in this property should be returned for queries that are either
82  // exact matches or query matches of the tokens appearing in this property.
83  // Ex. A property with "fool" *should* match a query for "foo".
84  optional TermMatchType.Code term_match_type = 1;
85
86  message TokenizerType {
87    enum Code {
88      // It is only valid for tokenizer_type to be 'NONE' if the data type is
89      // not indexed.
90      NONE = 0;
91
92      // Tokenization for plain text.
93      PLAIN = 1;
94
95      // Tokenizes text in verbatim. This means no normalization or segmentation
96      // is applied to string values that are tokenized using this type.
97      // Therefore, the output token is equivalent to the raw string text. For
98      // example, "Hello, world!" would be tokenized as "Hello, world!"
99      // preserving punctuation and capitalization, and not creating separate
100      // tokens between the space.
101      VERBATIM = 2;
102    }
103  }
104  optional TokenizerType.Code tokenizer_type = 2;
105}
106
107// Describes how a document property should be indexed.
108// Next tag: 2
109message DocumentIndexingConfig {
110  // OPTIONAL: Whether nested properties within the document property should be
111  // indexed. If true, then the nested properties will be indexed according to
112  // the property's own indexing configurations. If false, nested documents'
113  // properties will not be indexed even if they have an indexing configuration.
114  //
115  // The default value is false.
116  optional bool index_nested_properties = 1;
117}
118
119// Describes the schema of a single property of Documents that belong to a
120// specific SchemaTypeConfigProto. These can be considered as a rich, structured
121// type for each property of Documents accepted by IcingSearchEngine.
122// Next tag: 7
123message PropertyConfigProto {
124  // REQUIRED: Name that uniquely identifies a property within an Document of
125  // a specific SchemaTypeConfigProto.
126  //
127  // Recommended format: Human readable string that's one of the properties
128  // defined in schema.org for the parent SchemaTypeConfigProto.
129  // Eg: 'author' for http://schema.org/DigitalDocument.
130  // Eg: 'address' for http://schema.org/Place.
131  optional string property_name = 1;
132
133  // REQUIRED: Physical data-types of the contents of the property.
134  message DataType {
135    enum Code {
136      // This value should never purposely be used. This is used for backwards
137      // compatibility reasons.
138      UNKNOWN = 0;
139
140      STRING = 1;
141      INT64 = 2;
142      DOUBLE = 3;
143      BOOLEAN = 4;
144
145      // Unstructured BLOB.
146      BYTES = 5;
147
148      // Indicates that the property itself is an Document, making it part
149      // a hierarchical Document schema. Any property using this data_type
150      // MUST have a valid 'schema_type'.
151      DOCUMENT = 6;
152    }
153  }
154  optional DataType.Code data_type = 2;
155
156  // REQUIRED if (data_type == DOCUMENT). OPTIONAL otherwise.
157  // Indicates the logical schema-type of the contents of this property.
158  //
159  // TODO(cassiewang): This could be useful for non-document properties, e.g.
160  // to set this field as a schema.org/address for some string property.
161  // Re-evaluate what recommendation we should give clients if we want to start
162  // using this for non-document properties as well.
163  //
164  // Recommended format: Human readable string that is one of the types defined
165  // in schema.org, matching the SchemaTypeConfigProto.schema_type of another
166  // type.
167  optional string schema_type = 3;
168
169  // REQUIRED: The cardinality of the property.
170  message Cardinality {
171    // NOTE: The order of the cardinality is purposefully set to be from least
172    // restrictive (REPEATED) to most restrictive (REQUIRED). This makes it
173    // easier to check if a field is backwards compatible by doing a simple
174    // greater-than/less-than check on the enum ints. Changing/adding new
175    // cardinalities should be done cautiously.
176    enum Code {
177      // This should never purposely be set. This is used for backwards
178      // compatibility reasons.
179      UNKNOWN = 0;
180
181      // Any number of items (including zero) [0...*].
182      REPEATED = 1;
183
184      // Zero or one value [0,1].
185      OPTIONAL = 2;
186
187      // Exactly one value [1].
188      REQUIRED = 3;
189    }
190  }
191  optional Cardinality.Code cardinality = 4;
192
193  // OPTIONAL: Describes how string properties should be indexed. String
194  // properties that do not set the indexing config will not be indexed.
195  optional StringIndexingConfig string_indexing_config = 5;
196
197  // OPTIONAL: Describes how document properties should be indexed.
198  optional DocumentIndexingConfig document_indexing_config = 6;
199}
200
201// List of all supported types constitutes the schema used by Icing.
202// Next tag: 2
203message SchemaProto {
204  repeated SchemaTypeConfigProto types = 1;
205}
206
207// Result of a call to IcingSearchEngine.SetSchema
208// Next tag: 8
209message SetSchemaResultProto {
210  // Status code can be one of:
211  //   OK
212  //   INVALID_ARGUMENT
213  //   FAILED_PRECONDITION
214  //   INTERNAL
215  //
216  // See status.proto for more details.
217  //
218  // TODO(b/147699081): Fix error codes: +ABORTED, +WARNING_DATA_LOSS,
219  // -INTERNAL. go/icing-library-apis.
220  optional StatusProto status = 1;
221
222  // Schema types that existed in the previous schema, but were deleted from the
223  // new schema. If ignore_errors_and_delete_documents=true, then all documents
224  // of these types were also deleted.
225  repeated string deleted_schema_types = 2;
226
227  // Schema types that existed in the previous schema and were incompatible with
228  // the new schema type. If ignore_errors_and_delete_documents=true, then any
229  // documents that fail validation against the new schema types would also be
230  // deleted.
231  repeated string incompatible_schema_types = 3;
232
233  // Schema types that did not exist in the previous schema and were added with
234  // the new schema type.
235  repeated string new_schema_types = 4;
236
237  // Schema types that were changed in a way that was backwards compatible and
238  // didn't invalidate the index.
239  repeated string fully_compatible_changed_schema_types = 5;
240
241  // Schema types that were changed in a way that was backwards compatible, but
242  // invalidated the index.
243  repeated string index_incompatible_changed_schema_types = 6;
244
245  // Overall time used for the function call.
246  optional int32 latency_ms = 7;
247}
248
249// Result of a call to IcingSearchEngine.GetSchema
250// Next tag: 3
251message GetSchemaResultProto {
252  // Status code can be one of:
253  //   OK
254  //   FAILED_PRECONDITION
255  //   NOT_FOUND
256  //   INTERNAL
257  //
258  // See status.proto for more details.
259  //
260  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
261  // go/icing-library-apis.
262  optional StatusProto status = 1;
263
264  // Copy of the Schema proto. Modifying this does not affect the Schema that
265  // IcingSearchEngine holds.
266  optional SchemaProto schema = 2;
267}
268
269// Result of a call to IcingSearchEngine.GetSchemaType
270// Next tag: 3
271message GetSchemaTypeResultProto {
272  // Status code can be one of:
273  //   OK
274  //   FAILED_PRECONDITION
275  //   NOT_FOUND
276  //   INTERNAL
277  //
278  // See status.proto for more details.
279  //
280  // TODO(b/147699081): Fix error codes: +ABORTED, -INTERNAL
281  // go/icing-library-apis.
282  optional StatusProto status = 1;
283
284  // Copy of the SchemaTypeConfig proto with the specified schema_type.
285  // Modifying this does not affect the SchemaTypeConfig that IcingSearchEngine
286  // holds.
287  optional SchemaTypeConfigProto schema_type_config = 2;
288}
289