1// Copyright 2019 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto2"; 16 17package icing.lib; 18 19import "icing/proto/scoring.proto"; 20import "icing/proto/status.proto"; 21 22option java_package = "com.google.android.icing.proto"; 23option java_multiple_files = true; 24option objc_class_prefix = "ICNG"; 25 26// Stats of the top-level function IcingSearchEngine::Initialize(). 27// Next tag: 17 28message InitializeStatsProto { 29 // Overall time used for the function call. 30 optional int32 latency_ms = 1; 31 32 // The cause of IcingSearchEngine recovering from a previous bad state during 33 // initialization. 34 enum RecoveryCause { 35 // No recovery happened. 36 NONE = 0; 37 38 // Data loss in ground truth. 39 DATA_LOSS = 1; 40 41 // Data in index is inconsistent with ground truth. 42 INCONSISTENT_WITH_GROUND_TRUTH = 2; 43 44 // Changes were made to the schema, but the marker file remains in the 45 // filesystem indicating that changes possibly were not fully applied to the 46 // document store and the index - requiring a recovery. 47 SCHEMA_CHANGES_OUT_OF_SYNC = 3; 48 49 // Random I/O errors. 50 IO_ERROR = 4; 51 52 // The document log is using legacy format. 53 LEGACY_DOCUMENT_LOG_FORMAT = 5; 54 55 // The current code version is different from existing data version. 56 VERSION_CHANGED = 6; 57 58 // Any dependencies have changed. 59 DEPENDENCIES_CHANGED = 7; 60 61 // Change detected in Icing's feature flags since last initialization that 62 // requires recovery. 63 FEATURE_FLAG_CHANGED = 8; 64 65 // Changes were made by an incomplete complex operation, which caused marker 66 // file to remain in the filesystem - requiring a recovery. 67 // 68 // Note: Icing is unable to interpret the information from the marker file 69 // due to some reasons, so the OUT_OF_SYNC reason is UNKNOWN. 70 UNKNOWN_OUT_OF_SYNC = 9; 71 72 // Changes were made by optimize, but the marker file remains in the 73 // filesystem indicating that optimize possibly was not fully applied to the 74 // document store and the index - requiring a recovery. 75 OPTIMIZE_OUT_OF_SYNC = 10; 76 } 77 78 // Possible recovery causes for document store: 79 // - DATA_LOSS 80 // - SCHEMA_CHANGES_OUT_OF_SYNC 81 // - IO_ERROR 82 optional RecoveryCause document_store_recovery_cause = 2; 83 84 // Possible recovery causes for index: 85 // - INCONSISTENT_WITH_GROUND_TRUTH 86 // - SCHEMA_CHANGES_OUT_OF_SYNC 87 // - IO_ERROR 88 optional RecoveryCause index_restoration_cause = 3; 89 90 // Possible recovery causes for index: 91 // - IO_ERROR 92 optional RecoveryCause schema_store_recovery_cause = 4; 93 94 // Time used to recover the document store. 95 optional int32 document_store_recovery_latency_ms = 5; 96 97 // Time used to restore the index. 98 optional int32 index_restoration_latency_ms = 6; 99 100 // Time used to restore the schema store. 101 optional int32 schema_store_recovery_latency_ms = 7; 102 103 // Status regarding how much data is lost during the initialization. 104 enum DocumentStoreDataStatus { 105 // Document store is successfully initialized or fully recovered. 106 NO_DATA_LOSS = 0; 107 108 // Ground truth data is partially lost. 109 PARTIAL_LOSS = 1; 110 111 // Ground truth data is completely lost. 112 COMPLETE_LOSS = 2; 113 } 114 optional DocumentStoreDataStatus document_store_data_status = 8; 115 116 // Number of documents currently in document store. Those may 117 // include alive, deleted, and expired documents. 118 optional int32 num_documents = 9; 119 120 // Number of schema types currently in schema store. 121 optional int32 num_schema_types = 10; 122 123 // Number of consecutive initialization failures that immediately preceded 124 // this initialization. 125 optional int32 num_previous_init_failures = 11; 126 127 // Possible recovery causes for integer index: 128 // - INCONSISTENT_WITH_GROUND_TRUTH 129 // - SCHEMA_CHANGES_OUT_OF_SYNC 130 // - IO_ERROR 131 optional RecoveryCause integer_index_restoration_cause = 12; 132 133 // Possible recovery causes for qualified id join index: 134 // - INCONSISTENT_WITH_GROUND_TRUTH 135 // - SCHEMA_CHANGES_OUT_OF_SYNC 136 // - IO_ERROR 137 optional RecoveryCause qualified_id_join_index_restoration_cause = 13; 138 139 // Possible recovery causes for embedding index: 140 // - INCONSISTENT_WITH_GROUND_TRUTH 141 // - SCHEMA_CHANGES_OUT_OF_SYNC 142 // - IO_ERROR 143 optional RecoveryCause embedding_index_restoration_cause = 14; 144 145 // Possible status codes for ICU data initialization. 146 // - OK 147 // - INVALID_ARGUMENT 148 // - INTERNAL 149 optional StatusProto initialize_icu_data_status = 15; 150 151 // Number of documents that failed to be reindexed during index restoration. 152 optional int32 num_failed_reindexed_documents = 16; 153} 154 155// Stats of the top-level function IcingSearchEngine::Put(). 156// Next tag: 13 157message PutDocumentStatsProto { 158 // Overall time used for the function call. 159 optional int32 latency_ms = 1; 160 161 // Time used to store the document. 162 optional int32 document_store_latency_ms = 2; 163 164 // Time used to index the document. 165 optional int32 index_latency_ms = 3; 166 167 // Time used to merge the indices. 168 optional int32 index_merge_latency_ms = 4; 169 170 // Document size in bytes. 171 optional int32 document_size = 5; 172 173 message TokenizationStats { 174 // Number of tokens added to the index. 175 optional int32 num_tokens_indexed = 1; 176 177 // Number of metadata tokens added to the index, which can only be added by 178 // PropertyExistenceIndexingHandler currently. 179 optional int32 num_metadata_tokens_indexed = 3; 180 181 reserved 2; 182 } 183 optional TokenizationStats tokenization_stats = 6; 184 185 // Time used to index all indexable string terms and property existence 186 // metadata terms in the document. It does not include the time to merge 187 // indices or the time to sort the lite index. 188 optional int32 term_index_latency_ms = 7; 189 190 // Time used to index all indexable integers in the document. 191 optional int32 integer_index_latency_ms = 8; 192 193 // Time used to index all qualified id join strings in the document. 194 optional int32 qualified_id_join_index_latency_ms = 9; 195 196 // Time used to sort the LiteIndex's HitBuffer. 197 optional int32 lite_index_sort_latency_ms = 10; 198 199 // Time used to index all metadata terms in the document, which can only be 200 // added by PropertyExistenceIndexingHandler currently. 201 optional int32 metadata_term_index_latency_ms = 11; 202 203 // Time used to index all embeddings in the document. 204 optional int32 embedding_index_latency_ms = 12; 205} 206 207// Stats of the top-level function IcingSearchEngine::Search() and 208// IcingSearchEngine::GetNextPage(). 209// Next tag: 28 210message QueryStatsProto { 211 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 212 // The UTF-8 length of the query string 213 optional int32 query_length = 16; 214 215 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 216 // Number of terms in the query string. 217 optional int32 num_terms = 1; 218 219 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 220 // Number of namespaces filtered. 221 optional int32 num_namespaces_filtered = 2; 222 223 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 224 // Number of schema types filtered. 225 optional int32 num_schema_types_filtered = 3; 226 227 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 228 // Strategy of scoring and ranking. 229 optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 4; 230 231 // Whether the function call is querying the first page. If it’s 232 // not, Icing will fetch the results from cache so that some steps 233 // may be skipped. 234 optional bool is_first_page = 5; 235 236 // The requested number of results in one page. 237 optional int32 requested_page_size = 6; 238 239 // The actual number of results returned in the current page. 240 optional int32 num_results_returned_current_page = 7; 241 242 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 243 // Number of documents scored. 244 optional int32 num_documents_scored = 8; 245 246 // How many of the results in the page returned were snippeted. 247 optional int32 num_results_with_snippets = 15; 248 249 // Overall time used for the function call. 250 optional int32 latency_ms = 10; 251 252 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 253 // Time used to parse the query, including 2 parts: tokenizing and 254 // transforming tokens into an iterator tree. 255 optional int32 parse_query_latency_ms = 11; 256 257 // TODO(b/305098009): deprecate. Use parent_search_stats instead. 258 // Time used to score the raw results. 259 optional int32 scoring_latency_ms = 12; 260 261 // Time used to rank the scored results. 262 optional int32 ranking_latency_ms = 13; 263 264 // Time used to fetch the document protos. Note that it includes the 265 // time to snippet if ‘has_snippets’ is true. 266 optional int32 document_retrieval_latency_ms = 14; 267 268 // Time passed while waiting to acquire the lock before query execution. 269 optional int32 lock_acquisition_latency_ms = 17; 270 271 // Timestamp taken just before sending proto across the JNI boundary from 272 // native to java side. 273 optional int64 native_to_java_start_timestamp_ms = 18; 274 275 // Time used to send protos across the JNI boundary from java to native side. 276 optional int32 java_to_native_jni_latency_ms = 19; 277 278 // Time used to send protos across the JNI boundary from native to java side. 279 optional int32 native_to_java_jni_latency_ms = 20; 280 281 // The native latency due to the join operation. 282 optional int32 join_latency_ms = 21; 283 284 // Number of documents scored. 285 optional int32 num_joined_results_returned_current_page = 22; 286 287 // Whether it contains join query or not. 288 optional bool is_join_query = 23; 289 290 // Stats of the search. Only valid for first page. 291 // Next tag: 16 292 message SearchStats { 293 // The UTF-8 length of the query string 294 optional int32 query_length = 1; 295 296 // Number of terms in the query string. 297 optional int32 num_terms = 2; 298 299 // Number of namespaces filtered. 300 optional int32 num_namespaces_filtered = 3; 301 302 // Number of schema types filtered. 303 optional int32 num_schema_types_filtered = 4; 304 305 // Strategy of scoring and ranking. 306 optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 5; 307 308 // Number of documents scored. 309 optional int32 num_documents_scored = 6; 310 311 // Time used to parse the query, including 2 parts: tokenizing and 312 // transforming tokens into an iterator tree. 313 optional int32 parse_query_latency_ms = 7; 314 315 // Time used to score the raw results. 316 optional int32 scoring_latency_ms = 8; 317 318 // Whether it contains numeric query or not. 319 optional bool is_numeric_query = 9; 320 321 // Number of hits fetched by lite index before applying any filters. 322 optional int32 num_fetched_hits_lite_index = 10; 323 324 // Number of hits fetched by main index before applying any filters. 325 optional int32 num_fetched_hits_main_index = 11; 326 327 // Number of hits fetched by integer index before applying any filters. 328 optional int32 num_fetched_hits_integer_index = 12; 329 330 // Time used in Lexer to extract lexer tokens from the query. 331 optional int32 query_processor_lexer_extract_token_latency_ms = 13; 332 333 // Time used in Parser to consume lexer tokens extracted from the query. 334 optional int32 query_processor_parser_consume_query_latency_ms = 14; 335 336 // Time used in QueryVisitor to visit and build (nested) DocHitInfoIterator. 337 optional int32 query_processor_query_visitor_latency_ms = 15; 338 } 339 340 // Search stats for parent. Only valid for first page. 341 optional SearchStats parent_search_stats = 24; 342 343 // Search stats for child. 344 optional SearchStats child_search_stats = 25; 345 346 // Byte size of the lite index hit buffer. 347 optional int64 lite_index_hit_buffer_byte_size = 26; 348 349 // Byte size of the unsorted tail of the lite index hit buffer. 350 optional int64 lite_index_hit_buffer_unsorted_byte_size = 27; 351 352 reserved 9; 353} 354 355// Stats of the top-level functions IcingSearchEngine::Delete, 356// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType. 357// Next tag: 4 358message DeleteStatsProto { 359 // Overall time used for the function call. 360 optional int32 latency_ms = 1; 361 362 message DeleteType { 363 enum Code { 364 // Default. Should never be used. 365 UNKNOWN = 0; 366 367 // Delete one document. 368 SINGLE = 1; 369 370 // Delete by query. This value is deprecated. 371 // IcingSearchEngine::DeleteByQuery will return a DeleteByQueryStatsProto 372 // rather than a DeleteStatsProto. 373 DEPRECATED_QUERY = 2 [deprecated = true]; 374 375 // Delete by namespace. 376 NAMESPACE = 3; 377 378 // Delete by schema type. 379 SCHEMA_TYPE = 4; 380 } 381 } 382 optional DeleteType.Code delete_type = 2; 383 384 // Number of documents deleted by this call. 385 optional int32 num_documents_deleted = 3; 386} 387 388// Stats of the top-level functions IcingSearchEngine::DeleteByQuery. 389// Next tag: 9 390message DeleteByQueryStatsProto { 391 // Overall time used for the function call. 392 optional int32 latency_ms = 1; 393 394 // Number of documents deleted by this call. 395 optional int32 num_documents_deleted = 2; 396 397 // The UTF-8 length of the query string 398 optional int32 query_length = 3; 399 400 // Number of terms in the query string. 401 optional int32 num_terms = 4; 402 403 // Number of namespaces filtered. 404 optional int32 num_namespaces_filtered = 5; 405 406 // Number of schema types filtered. 407 optional int32 num_schema_types_filtered = 6; 408 409 // Time used to parse the query, including 2 parts: tokenizing and 410 // transforming tokens into an iterator tree. 411 optional int32 parse_query_latency_ms = 7; 412 413 // Time used to delete each document. 414 optional int32 document_removal_latency_ms = 8; 415} 416