1 /* 2 * Copyright 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package androidx.appsearch.localstorage; 18 19 import android.app.appsearch.SearchSpec; 20 21 import androidx.annotation.RestrictTo; 22 import androidx.appsearch.flags.Flags; 23 24 import com.google.android.icing.proto.IcingSearchEngineOptions; 25 26 import org.jspecify.annotations.NonNull; 27 28 /** 29 * An interface exposing the optional config flags in {@link IcingSearchEngineOptions} used to 30 * instantiate {@link com.google.android.icing.IcingSearchEngine}, as well as other additional 31 * config flags for IcingSearchEngine. 32 */ 33 @RestrictTo(RestrictTo.Scope.LIBRARY_GROUP) 34 public interface IcingOptionsConfig { 35 // Defaults from IcingSearchEngineOptions proto 36 int DEFAULT_MAX_TOKEN_LENGTH = 30; 37 38 int DEFAULT_INDEX_MERGE_SIZE = 1048576; // 1 MiB 39 40 boolean DEFAULT_DOCUMENT_STORE_NAMESPACE_ID_FINGERPRINT = false; 41 42 float DEFAULT_OPTIMIZE_REBUILD_INDEX_THRESHOLD = 0.9f; 43 44 /** 45 * The default compression level in IcingSearchEngineOptions proto matches the 46 * previously-hardcoded document compression level in Icing (which is 3). 47 */ 48 int DEFAULT_COMPRESSION_LEVEL = 3; 49 50 /** 51 * The default compression mem level in IcingSearchEngineOptions proto matches the 52 * previously-hardcoded document compression level in Icing (which is 8). 53 */ 54 int DEFAULT_COMPRESSION_MEM_LEVEL = 8; 55 56 boolean DEFAULT_USE_PREMAPPING_WITH_FILE_BACKED_VECTOR = false; 57 58 boolean DEFAULT_USE_PERSISTENT_HASH_MAP = false; 59 60 int DEFAULT_MAX_PAGE_BYTES_LIMIT = Integer.MAX_VALUE; 61 62 /** 63 * The default threshold for integer index bucket split. 65536 is picked based on 64 * benchmark (Icing integer-index-storage_benchmark.cc). 65 * <ul> 66 * <li>There will be only 16 buckets when indexing 1M integers, which improves the 67 * performance of numeric search range query. 68 * <li>It also increases # of hits to read for numeric search exact query, but the overall 69 * query latency is still reasonable. 70 * </ul> 71 */ 72 int DEFAULT_INTEGER_INDEX_BUCKET_SPLIT_THRESHOLD = 65536; 73 74 boolean DEFAULT_LITE_INDEX_SORT_AT_INDEXING = true; 75 76 /** 77 * The default sort threshold for the lite index when sort at indexing is enabled. 78 * 8192 is picked based on Icing microbenchmarks (icing-search-engine_benchmarks.cc). 79 */ 80 int DEFAULT_LITE_INDEX_SORT_SIZE = 8192; // 8Kib 81 82 boolean DEFAULT_USE_NEW_QUALIFIED_ID_JOIN_INDEX = false; 83 84 boolean DEFAULT_BUILD_PROPERTY_EXISTENCE_METADATA_HITS = false; 85 86 long DEFAULT_ORPHAN_BLOB_TIME_TO_LIVE_MS = 7 * 24 * 60 * 60 * 1000L; // 1 week. 87 88 String DEFAULT_ICU_DATA_FILE_ABSOLUTE_PATH = ""; 89 90 int DEFAULT_COMPRESSION_THRESHOLD_BYTES = 600; 91 92 /** 93 * The maximum allowable token length. All tokens in excess of this size will be truncated to 94 * max_token_length before being indexed. 95 * 96 * <p>Clients may use this option to prevent unnecessary indexing of long tokens. 97 * Depending on the use case, indexing all of 98 * 'Supercalifragilisticexpialidocious' may be unnecessary - a user is 99 * unlikely to type that entire query. So only indexing the first n bytes may 100 * still provide the desired behavior without wasting resources. 101 */ getMaxTokenLength()102 int getMaxTokenLength(); 103 104 /** 105 * The size (measured in bytes) at which Icing's internal indices should be 106 * merged. Icing buffers changes together before merging them into a more 107 * compact format. When the buffer exceeds index_merge_size during a Put 108 * operation, the buffer is merged into the larger, more compact index. 109 * 110 * <p>This more compact index is more efficient to search over as the index 111 * grows larger and has smaller system health impact. 112 * 113 * <p>Setting a low index_merge_size increases the frequency of merges - 114 * increasing indexing-time latency and flash wear. Setting a high 115 * index_merge_size leads to larger resource usage and higher query latency. 116 */ getIndexMergeSize()117 int getIndexMergeSize(); 118 119 /** 120 * Whether to use namespace id or namespace name to build up fingerprint for 121 * document_key_mapper_ and corpus_mapper_ in document store. 122 */ getDocumentStoreNamespaceIdFingerprint()123 boolean getDocumentStoreNamespaceIdFingerprint(); 124 125 /** 126 * The threshold of the percentage of invalid documents at which to rebuild index 127 * during optimize. 128 * 129 * <p>We rebuild index if and only if |invalid_documents| / |all_documents| >= threshold. 130 * 131 * <p>Rebuilding the index could be faster than optimizing the index if we have 132 * removed most of the documents. Based on benchmarks, 85%~95% seems to be a good threshold 133 * for most cases. 134 */ getOptimizeRebuildIndexThreshold()135 float getOptimizeRebuildIndexThreshold(); 136 137 /** 138 * The level of gzip compression for documents in the Icing document store. 139 * 140 * <p>NO_COMPRESSION = 0, BEST_SPEED = 1, BEST_COMPRESSION = 9 141 */ getCompressionLevel()142 int getCompressionLevel(); 143 144 145 /** 146 * The mem level for gzip compression for documents in the Icing document store. 147 * 148 * <p> 1 uses minimum memory but is slow and reduces compression ratio; 9 uses maximum memory 149 * for optimal speed and compression ratio. Icing historically used a memLevel of 8. 150 */ getCompressionMemLevel()151 int getCompressionMemLevel(); 152 153 /** 154 * Whether to allow circular references between schema types for the schema definition. 155 * 156 * <p>Even when set to true, circular references are still not allowed in the following cases: 157 * 1. All edges of a cycle have index_nested_properties=true 158 * 2. One of the types in the cycle has a joinable property, or depends on a type with a 159 * joinable property. 160 */ getAllowCircularSchemaDefinitions()161 boolean getAllowCircularSchemaDefinitions(); 162 163 /** 164 * Flag for {@link com.google.android.icing.proto.SearchSpecProto}. 165 * 166 * <p>Whether to use the read-only implementation of IcingSearchEngine::Search. 167 * 168 * <p>The read-only version enables multiple queries to be performed concurrently 169 * as it only acquires the read lock at IcingSearchEngine's level. Finer-grained locks are 170 * implemented around code paths that write changes to Icing during Search. 171 */ getUseReadOnlySearch()172 boolean getUseReadOnlySearch(); 173 174 /** 175 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 176 * 177 * <p>Whether or not to pre-map the potential memory region used by the PersistentHashMap. 178 * This will avoid the need to re-map the mmapping used by PersistentHashMap whenever the 179 * underlying storage grows. 180 */ getUsePreMappingWithFileBackedVector()181 boolean getUsePreMappingWithFileBackedVector(); 182 183 /** 184 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 185 * 186 * <p>Whether or not to use the PersistentHashMap in the QualifiedIdTypeJoinableIndex. If false, 187 * we will use the old IcingDynamicTrie to store key value pairs. 188 */ getUsePersistentHashMap()189 boolean getUsePersistentHashMap(); 190 191 /** 192 * Flag for {@link com.google.android.icing.proto.ResultSpecProto}. 193 * 194 * <p>The maximum byte size to allow in a single page. This limit is only loosely binding. 195 * AppSearch will add results to the page until either 1) AppSearch has retrieved 196 * {@link SearchSpec#getResultCountPerPage()} results or 2) total size of the page exceeds this 197 * value. Therefore, AppSearch will always retrieve at least a single result, even if that 198 * result exceeds this limit. 199 */ getMaxPageBytesLimit()200 int getMaxPageBytesLimit(); 201 202 /** 203 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 204 * 205 * <p>Threshold for integer index bucket split. Integer index stores hits in several buckets, 206 * and splits if # of hits in a single bucket exceed the threshold. Splitting bucket accelerates 207 * numeric search exact query, but potentially downgrades the performance of range query. 208 * 209 * <p>This flag is for rolling out new threshold 65536. If identifying any issues, then change 210 * it back to 341 (the previous bucket split threshold, capacity of full max-sized posting 211 * list). 212 */ getIntegerIndexBucketSplitThreshold()213 int getIntegerIndexBucketSplitThreshold(); 214 215 /** 216 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 217 * 218 * <p>Whether Icing should sort and merge its lite index HitBuffer unsorted tail at indexing 219 * time. 220 * 221 * <p>If set to true, the HitBuffer will be sorted at indexing time after exceeding the sort 222 * threshold. If false, the HifBuffer will be sorted at querying time, before the first query 223 * after inserting new elements into the HitBuffer. 224 */ getLiteIndexSortAtIndexing()225 boolean getLiteIndexSortAtIndexing(); 226 227 /** 228 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 229 * 230 * <p>Size (in bytes) at which Icing's lite index should sort and merge the HitBuffer's 231 * unsorted tail into the sorted head for sorting at indexing time. Size specified here is 232 * unsorted tail section. 233 * 234 * <p>Setting a lower sort size reduces querying latency at the expense of indexing latency. 235 */ getLiteIndexSortSize()236 int getLiteIndexSortSize(); 237 238 /** 239 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 240 * 241 * <p>Whether to use the new qualified Id join index. 242 */ getUseNewQualifiedIdJoinIndex()243 boolean getUseNewQualifiedIdJoinIndex(); 244 245 /** 246 * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 247 * 248 * <p>Whether to build the metadata hits used for property existence check, which is required 249 * to support the hasProperty function in advanced query. 250 */ getBuildPropertyExistenceMetadataHits()251 boolean getBuildPropertyExistenceMetadataHits(); 252 253 /** 254 * Config for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 255 * 256 * <p>The maximum time in millisecond for a orphan blob to get recycled and deleted if there is 257 * no reference document linked to it. 258 */ getOrphanBlobTimeToLiveMs()259 long getOrphanBlobTimeToLiveMs(); 260 261 /** 262 * Config for {@link com.google.android.icing.proto.IcingSearchEngineOptions}. 263 * 264 * <p>The absolute path to the ICU data file. If a valid path has been provided, it will be used 265 * to initialize ICU. The path is not available in Jetpack and Framework. This method is 266 * functionally no-op and returns an empty string. 267 */ getIcuDataFileAbsolutePath()268 @NonNull String getIcuDataFileAbsolutePath(); 269 270 /** 271 * The threshold in bytes for compressing documents. If a document is larger than or equal to 272 * this threshold, it will be compressed based on getCompressionLevel(). 0 means always 273 * compress. 274 */ getCompressionThresholdBytes()275 int getCompressionThresholdBytes(); 276 277 /** 278 * Converts to an {@link IcingSearchEngineOptions} instance. 279 * 280 * @param baseDir base directory of the icing instance. 281 */ toIcingSearchEngineOptions( @onNull String baseDir, boolean isVMEnabled)282 default @NonNull IcingSearchEngineOptions toIcingSearchEngineOptions( 283 @NonNull String baseDir, boolean isVMEnabled) { 284 return IcingSearchEngineOptions.newBuilder() 285 .setBaseDir(baseDir) 286 .setMaxTokenLength(getMaxTokenLength()) 287 .setIndexMergeSize(getIndexMergeSize()) 288 .setDocumentStoreNamespaceIdFingerprint( 289 getDocumentStoreNamespaceIdFingerprint()) 290 .setOptimizeRebuildIndexThreshold( 291 getOptimizeRebuildIndexThreshold()) 292 .setCompressionLevel(getCompressionLevel()) 293 .setAllowCircularSchemaDefinitions( 294 getAllowCircularSchemaDefinitions()) 295 .setPreMappingFbv(getUsePreMappingWithFileBackedVector()) 296 .setUsePersistentHashMap(getUsePersistentHashMap()) 297 .setIntegerIndexBucketSplitThreshold( 298 getIntegerIndexBucketSplitThreshold()) 299 .setLiteIndexSortAtIndexing(getLiteIndexSortAtIndexing()) 300 .setLiteIndexSortSize(getLiteIndexSortSize()) 301 .setUseNewQualifiedIdJoinIndex( 302 getUseNewQualifiedIdJoinIndex()) 303 .setBuildPropertyExistenceMetadataHits( 304 getBuildPropertyExistenceMetadataHits()) 305 .setEnableBlobStore(Flags.enableBlobStore()) 306 .setOrphanBlobTimeToLiveMs(getOrphanBlobTimeToLiveMs()) 307 .setEnableEmbeddingIndex( 308 Flags.enableSchemaEmbeddingPropertyConfig()) 309 .setEnableEmbeddingQuantization( 310 Flags.enableSchemaEmbeddingQuantization()) 311 .setEnableScorableProperties(Flags.enableScorableProperty()) 312 .setIcuDataFileAbsolutePath(getIcuDataFileAbsolutePath()) 313 .setManageBlobFiles(!Flags.enableAppSearchManageBlobFiles()) 314 // Join index v3 is a prerequisite for delete propagation. 315 .setEnableDeletePropagationFrom( 316 Flags.enableDeletePropagationType() && Flags.enableQualifiedIdJoinIndexV3()) 317 .setCalculateTimeSinceLastAttemptedOptimize( 318 Flags.enableCalculateTimeSinceLastAttemptedOptimize()) 319 .setEnableQualifiedIdJoinIndexV3(Flags.enableQualifiedIdJoinIndexV3()) 320 .setEnableSoftIndexRestoration(Flags.enableSoftIndexRestoration()) 321 .setEnableMarkerFileForOptimize(Flags.enableMarkerFileForOptimize()) 322 .setReleaseBackupSchemaFileIfOverlayPresent( 323 Flags.enableReleaseBackupSchemaFileIfOverlayPresent()) 324 // This is a necessary bug fix for the VMEnabled case. VMEnabled is guarded by its 325 // own trunk-stable flag, therefore this can be included there. Otherwise, we should 326 // use this trank-stable flag. 327 .setEnableStrictPageByteSizeLimit( 328 Flags.enableStrictPageByteSizeLimit() || isVMEnabled) 329 .setCompressionThresholdBytes( 330 (Flags.enableCompressionThreshold() || isVMEnabled) 331 ? Math.max(0, getCompressionThresholdBytes()) : 0) 332 .setCompressionMemLevel( 333 (Flags.enableCompressionMemLevelOne() || isVMEnabled) ? 1 334 : DEFAULT_COMPRESSION_MEM_LEVEL) 335 .build(); 336 } 337 } 338