• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2019 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto2";
16
17package icing.lib;
18
19import "icing/proto/scoring.proto";
20import "icing/proto/status.proto";
21
22option java_package = "com.google.android.icing.proto";
23option java_multiple_files = true;
24option objc_class_prefix = "ICNG";
25
26// Stats of the top-level function IcingSearchEngine::Initialize().
27// Next tag: 17
28message InitializeStatsProto {
29  // Overall time used for the function call.
30  optional int32 latency_ms = 1;
31
32  // The cause of IcingSearchEngine recovering from a previous bad state during
33  // initialization.
34  enum RecoveryCause {
35    // No recovery happened.
36    NONE = 0;
37
38    // Data loss in ground truth.
39    DATA_LOSS = 1;
40
41    // Data in index is inconsistent with ground truth.
42    INCONSISTENT_WITH_GROUND_TRUTH = 2;
43
44    // Changes were made to the schema, but the marker file remains in the
45    // filesystem indicating that changes possibly were not fully applied to the
46    // document store and the index - requiring a recovery.
47    SCHEMA_CHANGES_OUT_OF_SYNC = 3;
48
49    // Random I/O errors.
50    IO_ERROR = 4;
51
52    // The document log is using legacy format.
53    LEGACY_DOCUMENT_LOG_FORMAT = 5;
54
55    // The current code version is different from existing data version.
56    VERSION_CHANGED = 6;
57
58    // Any dependencies have changed.
59    DEPENDENCIES_CHANGED = 7;
60
61    // Change detected in Icing's feature flags since last initialization that
62    // requires recovery.
63    FEATURE_FLAG_CHANGED = 8;
64
65    // Changes were made by an incomplete complex operation, which caused marker
66    // file to remain in the filesystem - requiring a recovery.
67    //
68    // Note: Icing is unable to interpret the information from the marker file
69    // due to some reasons, so the OUT_OF_SYNC reason is UNKNOWN.
70    UNKNOWN_OUT_OF_SYNC = 9;
71
72    // Changes were made by optimize, but the marker file remains in the
73    // filesystem indicating that optimize possibly was not fully applied to the
74    // document store and the index - requiring a recovery.
75    OPTIMIZE_OUT_OF_SYNC = 10;
76  }
77
78  // Possible recovery causes for document store:
79  // - DATA_LOSS
80  // - SCHEMA_CHANGES_OUT_OF_SYNC
81  // - IO_ERROR
82  optional RecoveryCause document_store_recovery_cause = 2;
83
84  // Possible recovery causes for index:
85  // - INCONSISTENT_WITH_GROUND_TRUTH
86  // - SCHEMA_CHANGES_OUT_OF_SYNC
87  // - IO_ERROR
88  optional RecoveryCause index_restoration_cause = 3;
89
90  // Possible recovery causes for index:
91  // - IO_ERROR
92  optional RecoveryCause schema_store_recovery_cause = 4;
93
94  // Time used to recover the document store.
95  optional int32 document_store_recovery_latency_ms = 5;
96
97  // Time used to restore the index.
98  optional int32 index_restoration_latency_ms = 6;
99
100  // Time used to restore the schema store.
101  optional int32 schema_store_recovery_latency_ms = 7;
102
103  // Status regarding how much data is lost during the initialization.
104  enum DocumentStoreDataStatus {
105    // Document store is successfully initialized or fully recovered.
106    NO_DATA_LOSS = 0;
107
108    // Ground truth data is partially lost.
109    PARTIAL_LOSS = 1;
110
111    // Ground truth data is completely lost.
112    COMPLETE_LOSS = 2;
113  }
114  optional DocumentStoreDataStatus document_store_data_status = 8;
115
116  // Number of documents currently in document store. Those may
117  // include alive, deleted, and expired documents.
118  optional int32 num_documents = 9;
119
120  // Number of schema types currently in schema store.
121  optional int32 num_schema_types = 10;
122
123  // Number of consecutive initialization failures that immediately preceded
124  // this initialization.
125  optional int32 num_previous_init_failures = 11;
126
127  // Possible recovery causes for integer index:
128  // - INCONSISTENT_WITH_GROUND_TRUTH
129  // - SCHEMA_CHANGES_OUT_OF_SYNC
130  // - IO_ERROR
131  optional RecoveryCause integer_index_restoration_cause = 12;
132
133  // Possible recovery causes for qualified id join index:
134  // - INCONSISTENT_WITH_GROUND_TRUTH
135  // - SCHEMA_CHANGES_OUT_OF_SYNC
136  // - IO_ERROR
137  optional RecoveryCause qualified_id_join_index_restoration_cause = 13;
138
139  // Possible recovery causes for embedding index:
140  // - INCONSISTENT_WITH_GROUND_TRUTH
141  // - SCHEMA_CHANGES_OUT_OF_SYNC
142  // - IO_ERROR
143  optional RecoveryCause embedding_index_restoration_cause = 14;
144
145  // Possible status codes for ICU data initialization.
146  // - OK
147  // - INVALID_ARGUMENT
148  // - INTERNAL
149  optional StatusProto initialize_icu_data_status = 15;
150
151  // Number of documents that failed to be reindexed during index restoration.
152  optional int32 num_failed_reindexed_documents = 16;
153}
154
155// Stats of the top-level function IcingSearchEngine::Put().
156// Next tag: 13
157message PutDocumentStatsProto {
158  // Overall time used for the function call.
159  optional int32 latency_ms = 1;
160
161  // Time used to store the document.
162  optional int32 document_store_latency_ms = 2;
163
164  // Time used to index the document.
165  optional int32 index_latency_ms = 3;
166
167  // Time used to merge the indices.
168  optional int32 index_merge_latency_ms = 4;
169
170  // Document size in bytes.
171  optional int32 document_size = 5;
172
173  message TokenizationStats {
174    // Number of tokens added to the index.
175    optional int32 num_tokens_indexed = 1;
176
177    // Number of metadata tokens added to the index, which can only be added by
178    // PropertyExistenceIndexingHandler currently.
179    optional int32 num_metadata_tokens_indexed = 3;
180
181    reserved 2;
182  }
183  optional TokenizationStats tokenization_stats = 6;
184
185  // Time used to index all indexable string terms and property existence
186  // metadata terms in the document. It does not include the time to merge
187  // indices or the time to sort the lite index.
188  optional int32 term_index_latency_ms = 7;
189
190  // Time used to index all indexable integers in the document.
191  optional int32 integer_index_latency_ms = 8;
192
193  // Time used to index all qualified id join strings in the document.
194  optional int32 qualified_id_join_index_latency_ms = 9;
195
196  // Time used to sort the LiteIndex's HitBuffer.
197  optional int32 lite_index_sort_latency_ms = 10;
198
199  // Time used to index all metadata terms in the document, which can only be
200  // added by PropertyExistenceIndexingHandler currently.
201  optional int32 metadata_term_index_latency_ms = 11;
202
203  // Time used to index all embeddings in the document.
204  optional int32 embedding_index_latency_ms = 12;
205}
206
207// Stats of the top-level function IcingSearchEngine::Search() and
208// IcingSearchEngine::GetNextPage().
209// Next tag: 28
210message QueryStatsProto {
211  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
212  // The UTF-8 length of the query string
213  optional int32 query_length = 16;
214
215  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
216  // Number of terms in the query string.
217  optional int32 num_terms = 1;
218
219  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
220  // Number of namespaces filtered.
221  optional int32 num_namespaces_filtered = 2;
222
223  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
224  // Number of schema types filtered.
225  optional int32 num_schema_types_filtered = 3;
226
227  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
228  // Strategy of scoring and ranking.
229  optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 4;
230
231  // Whether the function call is querying the first page. If it’s
232  // not, Icing will fetch the results from cache so that some steps
233  // may be skipped.
234  optional bool is_first_page = 5;
235
236  // The requested number of results in one page.
237  optional int32 requested_page_size = 6;
238
239  // The actual number of results returned in the current page.
240  optional int32 num_results_returned_current_page = 7;
241
242  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
243  // Number of documents scored.
244  optional int32 num_documents_scored = 8;
245
246  // How many of the results in the page returned were snippeted.
247  optional int32 num_results_with_snippets = 15;
248
249  // Overall time used for the function call.
250  optional int32 latency_ms = 10;
251
252  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
253  // Time used to parse the query, including 2 parts: tokenizing and
254  // transforming tokens into an iterator tree.
255  optional int32 parse_query_latency_ms = 11;
256
257  // TODO(b/305098009): deprecate. Use parent_search_stats instead.
258  // Time used to score the raw results.
259  optional int32 scoring_latency_ms = 12;
260
261  // Time used to rank the scored results.
262  optional int32 ranking_latency_ms = 13;
263
264  // Time used to fetch the document protos. Note that it includes the
265  // time to snippet if ‘has_snippets’ is true.
266  optional int32 document_retrieval_latency_ms = 14;
267
268  // Time passed while waiting to acquire the lock before query execution.
269  optional int32 lock_acquisition_latency_ms = 17;
270
271  // Timestamp taken just before sending proto across the JNI boundary from
272  // native to java side.
273  optional int64 native_to_java_start_timestamp_ms = 18;
274
275  // Time used to send protos across the JNI boundary from java to native side.
276  optional int32 java_to_native_jni_latency_ms = 19;
277
278  // Time used to send protos across the JNI boundary from native to java side.
279  optional int32 native_to_java_jni_latency_ms = 20;
280
281  // The native latency due to the join operation.
282  optional int32 join_latency_ms = 21;
283
284  // Number of documents scored.
285  optional int32 num_joined_results_returned_current_page = 22;
286
287  // Whether it contains join query or not.
288  optional bool is_join_query = 23;
289
290  // Stats of the search. Only valid for first page.
291  // Next tag: 16
292  message SearchStats {
293    // The UTF-8 length of the query string
294    optional int32 query_length = 1;
295
296    // Number of terms in the query string.
297    optional int32 num_terms = 2;
298
299    // Number of namespaces filtered.
300    optional int32 num_namespaces_filtered = 3;
301
302    // Number of schema types filtered.
303    optional int32 num_schema_types_filtered = 4;
304
305    // Strategy of scoring and ranking.
306    optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 5;
307
308    // Number of documents scored.
309    optional int32 num_documents_scored = 6;
310
311    // Time used to parse the query, including 2 parts: tokenizing and
312    // transforming tokens into an iterator tree.
313    optional int32 parse_query_latency_ms = 7;
314
315    // Time used to score the raw results.
316    optional int32 scoring_latency_ms = 8;
317
318    // Whether it contains numeric query or not.
319    optional bool is_numeric_query = 9;
320
321    // Number of hits fetched by lite index before applying any filters.
322    optional int32 num_fetched_hits_lite_index = 10;
323
324    // Number of hits fetched by main index before applying any filters.
325    optional int32 num_fetched_hits_main_index = 11;
326
327    // Number of hits fetched by integer index before applying any filters.
328    optional int32 num_fetched_hits_integer_index = 12;
329
330    // Time used in Lexer to extract lexer tokens from the query.
331    optional int32 query_processor_lexer_extract_token_latency_ms = 13;
332
333    // Time used in Parser to consume lexer tokens extracted from the query.
334    optional int32 query_processor_parser_consume_query_latency_ms = 14;
335
336    // Time used in QueryVisitor to visit and build (nested) DocHitInfoIterator.
337    optional int32 query_processor_query_visitor_latency_ms = 15;
338  }
339
340  // Search stats for parent. Only valid for first page.
341  optional SearchStats parent_search_stats = 24;
342
343  // Search stats for child.
344  optional SearchStats child_search_stats = 25;
345
346  // Byte size of the lite index hit buffer.
347  optional int64 lite_index_hit_buffer_byte_size = 26;
348
349  // Byte size of the unsorted tail of the lite index hit buffer.
350  optional int64 lite_index_hit_buffer_unsorted_byte_size = 27;
351
352  reserved 9;
353}
354
355// Stats of the top-level functions IcingSearchEngine::Delete,
356// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType.
357// Next tag: 4
358message DeleteStatsProto {
359  // Overall time used for the function call.
360  optional int32 latency_ms = 1;
361
362  message DeleteType {
363    enum Code {
364      // Default. Should never be used.
365      UNKNOWN = 0;
366
367      // Delete one document.
368      SINGLE = 1;
369
370      // Delete by query. This value is deprecated.
371      // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto
372      // rather than a DeleteStatsProto.
373      DEPRECATED_QUERY = 2 [deprecated = true];
374
375      // Delete by namespace.
376      NAMESPACE = 3;
377
378      // Delete by schema type.
379      SCHEMA_TYPE = 4;
380    }
381  }
382  optional DeleteType.Code delete_type = 2;
383
384  // Number of documents deleted by this call.
385  optional int32 num_documents_deleted = 3;
386}
387
388// Stats of the top-level functions IcingSearchEngine::DeleteByQuery.
389// Next tag: 9
390message DeleteByQueryStatsProto {
391  // Overall time used for the function call.
392  optional int32 latency_ms = 1;
393
394  // Number of documents deleted by this call.
395  optional int32 num_documents_deleted = 2;
396
397  // The UTF-8 length of the query string
398  optional int32 query_length = 3;
399
400  // Number of terms in the query string.
401  optional int32 num_terms = 4;
402
403  // Number of namespaces filtered.
404  optional int32 num_namespaces_filtered = 5;
405
406  // Number of schema types filtered.
407  optional int32 num_schema_types_filtered = 6;
408
409  // Time used to parse the query, including 2 parts: tokenizing and
410  // transforming tokens into an iterator tree.
411  optional int32 parse_query_latency_ms = 7;
412
413  // Time used to delete each document.
414  optional int32 document_removal_latency_ms = 8;
415}
416