• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/logging.proto";
20import "icing/proto/status.proto";
21
22option java_package = "com.google.android.icing.proto";
23option java_multiple_files = true;
24option objc_class_prefix = "ICNG";
25
26// Next tag: 8
27message IcingSearchEngineFeatureInfoProto {
28  // REQUIRED: Enum representing an IcingLib feature flagged using
29  // IcingSearchEngineOptions
30  optional FlaggedFeatureType feature_type = 1;
31
32  enum FlaggedFeatureType {
33    // This value should never purposely be used. This is used for backwards
34    // compatibility reasons.
35    UNKNOWN = 0;
36
37    // Feature for flag
38    // IcingSearchEngineOptions::build_property_existence_metadata_hits.
39    //
40    // This feature covers the kHasPropertyFunctionFeature advanced query
41    // feature, and related metadata hits indexing used for property existence
42    // check.
43    FEATURE_HAS_PROPERTY_OPERATOR = 1;
44
45    // Feature for flag IcingSearchEngineOptions::enable_embedding_index.
46    //
47    // This feature covers the embedding index.
48    FEATURE_EMBEDDING_INDEX = 2;
49
50    // Feature for flag IcingSearchEngineOptions::enable_scorable_properties.
51    //
52    // This feature covers the scorable properties feature.
53    FEATURE_SCORABLE_PROPERTIES = 3;
54
55    // Feature for flag IcingSearchEngineOptions::enable_embedding_quantization.
56    //
57    // This feature covers whether to enable quantization for embedding vectors.
58    FEATURE_EMBEDDING_QUANTIZATION = 4;
59
60    // Feature for flag IcingSearchEngineOptions::enable_schema_database.
61    //
62    // This feature covers whether to enable the database field in the schema.
63    // Once enabled, SetSchema will only allow setting schema types from a
64    // single database field at a time.
65    FEATURE_SCHEMA_DATABASE = 5;
66
67    // Feature for flag
68    // IcingSearchEngineOptions::enable_qualified_id_join_index_v3.
69    //
70    // This feature covers whether to enable the join index v3. Once enabled,
71    // join index v3 will be rebuilt to replace v2.
72    FEATURE_QUALIFIED_ID_JOIN_INDEX_V3 = 6;
73
74    // TODO(b/384947619): decide whether need to add a feature type for delete
75    // propagation.
76  }
77
78  // Whether the feature requires the document store to be rebuilt.
79  // The default value is false.
80  optional bool needs_document_store_rebuild = 2;
81
82  // Whether the feature requires the schema store to be rebuilt.
83  // The default value is false.
84  optional bool needs_schema_store_rebuild = 3;
85
86  // Whether the feature requires the term index to be rebuilt.
87  // The default value is false.
88  optional bool needs_term_index_rebuild = 4;
89
90  // Whether the feature requires the integer index to be rebuilt.
91  // The default value is false.
92  optional bool needs_integer_index_rebuild = 5;
93
94  // Whether the feature requires the qualified id join index to be rebuilt.
95  // The default value is false.
96  optional bool needs_qualified_id_join_index_rebuild = 6;
97
98  // Whether the feature requires the embedding index to be rebuilt.
99  // The default value is false.
100  optional bool needs_embedding_index_rebuild = 7;
101}
102
103// Next tag: 4
104message IcingSearchEngineVersionProto {
105  // version and max_version are from the original version file.
106  optional int32 version = 1;
107  optional int32 max_version = 2;
108  // Features that are enabled in an icing version at initialization.
109  repeated IcingSearchEngineFeatureInfoProto enabled_features = 3;
110}
111
112// This proto is used by the marker file to record information about the last
113// incomplete operation that was performed on Icing.
114// - Usually the marker file is required for a complex operation which is
115//   sensitive to power loss or crash.
116// - The marker file is created before the complex operation and flushed to disk
117//   with essential information. It is deleted after the operation is completed.
118// - If the marker file is present during initialization, then it means the
119//   last operation was not completed due to power loss or crash.
120//
121// Next tag: 2
122message IcingSearchEngineMarkerProto {
123  // Next tag: 3
124  message OperationType {
125    enum Code {
126      // This value should never purposely be used. This is used for backwards
127      // compatibility reasons.
128      UNKNOWN = 0;
129
130      // SetSchema operation.
131      SET_SCHEMA = 1;
132
133      // Optimize operation.
134      OPTIMIZE = 2;
135    }
136  }
137  optional OperationType.Code operation_type = 1;
138}
139
140// Next tag: 35
141message IcingSearchEngineOptions {
142  // Directory to persist files for Icing. Required.
143  // If Icing was previously initialized with this directory, it will reload
144  // the index saved by the last instance.
145  optional string base_dir = 1;
146
147  // The maximum allowable token length. All tokens in excess of this size
148  // will be truncated to max_token_length before being indexed.
149  //
150  // Clients may use this option to prevent unnecessary indexing of long tokens.
151  // Depending on the use case, indexing all of
152  // 'Supercalifragilisticexpialidocious' may be unnecessary - a user is
153  // unlikely to type that entire query. So only indexing the first n bytes may
154  // still provide the desired behavior without wasting resources.
155  //
156  // Valid values: [1, INT_MAX]
157  // Optional.
158  optional int32 max_token_length = 3 [default = 30];
159
160  // The size (measured in bytes) at which Icing's internal indices should be
161  // merged. Icing buffers changes together before merging them into a more
162  // compact format. When the buffer exceeds index_merge_size during a Put
163  // operation, the buffer is merged into the larger, more compact index.
164  //
165  // This more compact index is more efficient to search over as the index
166  // grows larger and has smaller system health impact.
167  //
168  // Setting a low index_merge_size increases the frequency of merges -
169  // increasing indexing-time latency and flash wear. Setting a high
170  // index_merge_size leads to larger resource usage and higher query latency.
171  // Valid values: [1, INT_MAX]
172  // Optional.
173  optional int32 index_merge_size = 4 [default = 1048576];  // 1 MiB
174
175  // Whether to use namespace id or namespace name to build up fingerprint for
176  // document_key_mapper_ and corpus_mapper_ in document store.
177  optional bool document_store_namespace_id_fingerprint = 5;
178
179  // The threshold of the percentage of invalid documents to rebuild index
180  // during optimize, i.e. we rebuild index if and only if
181  // |invalid_documents| / |all_documents| >= optimize_rebuild_index_threshold
182  //
183  // Rebuilding the index could be faster than optimizing the index if we have
184  // removed most of the documents.
185  // Based on benchmarks, 85%~95% seems to be a good threshold for most cases.
186  //
187  // Default to 0 for better rollout of the new index optimize.
188  optional float optimize_rebuild_index_threshold = 6 [default = 0.0];
189
190  // Level of compression, NO_COMPRESSION = 0, BEST_SPEED = 1,
191  // BEST_COMPRESSION = 9
192  // Valid values: [0, 9]
193  // Optional.
194  optional int32 compression_level = 7 [default = 3];
195
196  // OPTIONAL: Whether to allow circular references between schema types for
197  // the schema definition.
198  //
199  // Even when set to true, circular references are still not allowed in the
200  // following cases:
201  //    1. All edges of a cycle have index_nested_properties=true
202  //    2. One of the types in the cycle has a joinable property, or depends on
203  //       a type with a joinable property.
204  // This is because such a cycle would lead to an infinite number of
205  // indexed/joinable properties:
206  //
207  // The default value is false.
208  optional bool allow_circular_schema_definitions = 8;
209
210  // Whether memory map max possible file size for FileBackedVector before
211  // growing the actual file size.
212  optional bool pre_mapping_fbv = 9;
213
214  // Whether use persistent hash map as the key mapper (if false, then fall back
215  // to dynamic trie key mapper).
216  optional bool use_persistent_hash_map = 10;
217
218  // Integer index bucket split threshold.
219  optional int32 integer_index_bucket_split_threshold = 11 [default = 65536];
220
221  // Whether Icing should sort and merge its lite index HitBuffer unsorted tail
222  // at indexing time.
223  //
224  // If set to true, the HitBuffer will be sorted at indexing time after
225  // exceeding the sort threshold. If false, the HifBuffer will be sorted at
226  // querying time, before the first query after inserting new elements into the
227  // HitBuffer.
228  //
229  // The default value is false.
230  optional bool lite_index_sort_at_indexing = 12;
231
232  // Size (in bytes) at which Icing's lite index should sort and merge the
233  // HitBuffer's unsorted tail into the sorted head for sorting at indexing
234  // time. Size specified here is the maximum byte size to allow for the
235  // unsorted tail section.
236  //
237  // Setting a lower sort size reduces querying latency at the expense of
238  // indexing latency.
239  optional int32 lite_index_sort_size = 13 [default = 8192];  // 8 KiB
240
241  // DEPRECATED: qualified id join index v2 is fully rolled out.
242  optional bool use_new_qualified_id_join_index = 14;
243
244  // Whether to build the metadata hits used for property existence check, which
245  // is required to support the hasProperty function in advanced query.
246  optional bool build_property_existence_metadata_hits = 15;
247
248  // Whether to enable blob store.
249  // If set to true, the BlobStore will be created to store and retrieve blobs.
250  optional bool enable_blob_store = 16;
251
252  // The time to live for an orphan blob. If the blob has no reference document
253  // for this amount of time, it will be deleted.
254  // If set to 0, the blob will never be deleted.
255  optional int64 orphan_blob_time_to_live_ms = 17;
256
257  // Whether to enable schema database.
258  //
259  // If set to true, the schema database field will be used to store and
260  // retrieve schemas, and SetSchema will only allow setting schema types from
261  // a single database field at a time.
262  //
263  // Icing will automatically handle migrating the stored SchemaProto and
264  // populate the database field when this flag is flipped from false to true.
265  //
266  // TODO - b/337913932: Remove this flag once the schema database is fully
267  // rolled out.
268  optional bool enable_schema_database = 18;
269
270  // Whether to enable embedding index.
271  // If set to false, the EmbeddingIndex will only be created with a header, and
272  // embedding properties will not be indexed.
273  // TODO(b/326656531): Change the default value to false when the flag is
274  // propagated from AppSearch.
275  optional bool enable_embedding_index = 19 [default = true];
276
277  // Feature flag guarding the new scorable properties feature.
278  // TODO - b/357105837: Remove this flag once the feature is fully rolled out.
279  optional bool enable_scorable_properties = 21;
280
281  // Whether to enable quantization for embedding vectors.
282  // If set to false, all embedding vectors will not be quantized. Otherwise,
283  // quantization will be controlled by the quantization type specified in the
284  // schema.
285  optional bool enable_embedding_quantization = 22;
286
287  // Level of blob_info_file_log store compression in blob store ,
288  // NO_COMPRESSION = 0, BEST_SPEED = 1, BEST_COMPRESSION = 9
289  // Valid values: [0, 9]
290  // Optional.
291  optional int32 blob_store_compression_level = 23 [default = 3];
292
293  // Whether to allow repeated fields to have a joinable value type.
294  optional bool enable_repeated_field_joins = 24;
295
296  // DEPRECATED (separate them into 2 flags): whether to use qualified id join
297  // index v3 and enable delete propagation PROPAGATE_FROM.
298  //
299  // - If set to true, qualified id join index v3 will be created and delete
300  //   propagation PROPAGATE_FROM will be enabled.
301  // - Otherwise, qualified id join index v2 will be created and delete
302  //   propagation will be disabled.
303  //
304  // The default value is false.
305  optional bool enable_qualified_id_join_index_v3_and_delete_propagate_from =
306      25;
307
308  // The absolute path to the ICU data file.
309  // If set, ICU will be initialized using this data file.
310  optional string icu_data_file_absolute_path = 26;
311
312  // Whether a backup schema and document should be generated for documents with
313  // embedding properties.
314  optional bool enable_embedding_backup_generation = 27;
315
316  // Whether to calculate time since last optimize using last attempted optimize
317  // run time instead of last successful optimize run time.
318  // - If set to true, time since last optimize is calculated using last
319  //   attempted optimize run time, regardless of whether the optimize run was
320  //   successful or not.
321  // - Otherwise, time since last optimize is calculated using last successful
322  //   optimize run time.
323  //
324  // The default value is false.
325  optional bool calculate_time_since_last_attempted_optimize = 28;
326
327  // Whether to use qualified id join index v3.
328  //
329  // If set to true, qualified id join index v3 will be created.
330  // Otherwise, qualified id join index v2 will be created.
331  optional bool enable_qualified_id_join_index_v3 = 29;
332
333  // Whether to enable delete propagation PROPAGATE_FROM.
334  //
335  // If set to true, enable_qualified_id_join_index_v3 must be also true
336  // (otherwise initialization will fail), and delete propagation PROPAGATE_FROM
337  // will be enabled.
338  // Otherwise, delete propagation will be disabled.
339  optional bool enable_delete_propagation_from = 30;
340
341  // Whether to enable soft index restoration.
342  // If set to true, then any error that occurs during index restoration will be
343  // ignored, and the failed document will be deleted.
344  optional bool enable_soft_index_restoration = 31;
345
346  // Whether to enable marker file for optimize API.
347  // If set to true, then a general marker file will be created before any
348  // optimize operation is performed, and deleted after the operation is
349  // completed. This is to ensure that the optimize operation is not interrupted
350  // by power loss or crash.
351  optional bool enable_marker_file_for_optimize = 32;
352
353  // Whether to manage blob files.
354  // If set to true, then Icing will manage blob files and all APIs will return
355  // valid FileDescriptors. If false, then Icing will only manage blob metadata
356  // and will return the names of the files to be created and managed by the
357  // caller.
358  optional bool manage_blob_files = 33 [default = true];
359
360  // Whether to release the backup schema file instance in the schema-store if
361  // the overlay exists.
362  optional bool release_backup_schema_file_if_overlay_present = 34;
363
364  reserved 2, 20;
365}
366
367// Result of a call to IcingSearchEngine.Initialize
368// Next tag: 3
369message InitializeResultProto {
370  // Status code can be one of:
371  //   OK
372  //   WARNING_DATA_LOSS
373  //   INVALID_ARGUMENT
374  //   NOT_FOUND
375  //   INTERNAL
376  //
377  // See status.proto for more details.
378  //
379  // TODO(b/147699081): Fix error codes: +ABORTED, -NOT_FOUND.
380  // go/icing-library-apis.
381  optional StatusProto status = 1;
382
383  // Stats of the function call. Inside InitializeStatsProto, the function call
384  // latency 'latency_ms' will always be populated. The other fields will be
385  // accurate only when the status above is OK or WARNING_DATA_LOSS. See
386  // logging.proto for details.
387  optional InitializeStatsProto initialize_stats = 2;
388
389  // TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
390  // go/icing-library-apis.
391}
392