1 /*
2  * Copyright 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package androidx.appsearch.localstorage;
18 
19 import android.app.appsearch.SearchSpec;
20 
21 import androidx.annotation.RestrictTo;
22 import androidx.appsearch.flags.Flags;
23 
24 import com.google.android.icing.proto.IcingSearchEngineOptions;
25 
26 import org.jspecify.annotations.NonNull;
27 
28 /**
29  * An interface exposing the optional config flags in {@link IcingSearchEngineOptions} used to
30  * instantiate {@link com.google.android.icing.IcingSearchEngine}, as well as other additional
31  * config flags for IcingSearchEngine.
32  */
33 @RestrictTo(RestrictTo.Scope.LIBRARY_GROUP)
34 public interface IcingOptionsConfig {
35     // Defaults from IcingSearchEngineOptions proto
36     int DEFAULT_MAX_TOKEN_LENGTH = 30;
37 
38     int DEFAULT_INDEX_MERGE_SIZE = 1048576; // 1 MiB
39 
40     boolean DEFAULT_DOCUMENT_STORE_NAMESPACE_ID_FINGERPRINT = false;
41 
42     float DEFAULT_OPTIMIZE_REBUILD_INDEX_THRESHOLD = 0.9f;
43 
44     /**
45      * The default compression level in IcingSearchEngineOptions proto matches the
46      * previously-hardcoded document compression level in Icing (which is 3).
47      */
48     int DEFAULT_COMPRESSION_LEVEL = 3;
49 
50     /**
51      * The default compression mem level in IcingSearchEngineOptions proto matches the
52      * previously-hardcoded document compression level in Icing (which is 8).
53      */
54     int DEFAULT_COMPRESSION_MEM_LEVEL = 8;
55 
56     boolean DEFAULT_USE_PREMAPPING_WITH_FILE_BACKED_VECTOR = false;
57 
58     boolean DEFAULT_USE_PERSISTENT_HASH_MAP = false;
59 
60     int DEFAULT_MAX_PAGE_BYTES_LIMIT = Integer.MAX_VALUE;
61 
62     /**
63      * The default threshold for integer index bucket split. 65536 is picked based on
64      * benchmark (Icing integer-index-storage_benchmark.cc).
65      * <ul>
66      *     <li>There will be only 16 buckets when indexing 1M integers, which improves the
67      *     performance of numeric search range query.
68      *     <li>It also increases # of hits to read for numeric search exact query, but the overall
69      *     query latency is still reasonable.
70      * </ul>
71      */
72     int DEFAULT_INTEGER_INDEX_BUCKET_SPLIT_THRESHOLD = 65536;
73 
74     boolean DEFAULT_LITE_INDEX_SORT_AT_INDEXING = true;
75 
76     /**
77      * The default sort threshold for the lite index when sort at indexing is enabled.
78      * 8192 is picked based on Icing microbenchmarks (icing-search-engine_benchmarks.cc).
79      */
80     int DEFAULT_LITE_INDEX_SORT_SIZE = 8192;   // 8Kib
81 
82     boolean DEFAULT_USE_NEW_QUALIFIED_ID_JOIN_INDEX = false;
83 
84     boolean DEFAULT_BUILD_PROPERTY_EXISTENCE_METADATA_HITS = false;
85 
86     long DEFAULT_ORPHAN_BLOB_TIME_TO_LIVE_MS = 7 * 24 * 60 * 60 * 1000L; // 1 week.
87 
88     String DEFAULT_ICU_DATA_FILE_ABSOLUTE_PATH = "";
89 
90     int DEFAULT_COMPRESSION_THRESHOLD_BYTES = 600;
91 
92     /**
93      * The maximum allowable token length. All tokens in excess of this size will be truncated to
94      * max_token_length before being indexed.
95      *
96      * <p>Clients may use this option to prevent unnecessary indexing of long tokens.
97      * Depending on the use case, indexing all of
98      * 'Supercalifragilisticexpialidocious' may be unnecessary - a user is
99      * unlikely to type that entire query. So only indexing the first n bytes may
100      * still provide the desired behavior without wasting resources.
101      */
getMaxTokenLength()102     int getMaxTokenLength();
103 
104     /**
105      * The size (measured in bytes) at which Icing's internal indices should be
106      * merged. Icing buffers changes together before merging them into a more
107      * compact format. When the buffer exceeds index_merge_size during a Put
108      * operation, the buffer is merged into the larger, more compact index.
109      *
110      * <p>This more compact index is more efficient to search over as the index
111      * grows larger and has smaller system health impact.
112      *
113      * <p>Setting a low index_merge_size increases the frequency of merges -
114      * increasing indexing-time latency and flash wear. Setting a high
115      * index_merge_size leads to larger resource usage and higher query latency.
116      */
getIndexMergeSize()117     int getIndexMergeSize();
118 
119     /**
120      * Whether to use namespace id or namespace name to build up fingerprint for
121      * document_key_mapper_ and corpus_mapper_ in document store.
122      */
getDocumentStoreNamespaceIdFingerprint()123     boolean getDocumentStoreNamespaceIdFingerprint();
124 
125     /**
126      * The threshold of the percentage of invalid documents at which to rebuild index
127      * during optimize.
128      *
129      * <p>We rebuild index if and only if |invalid_documents| / |all_documents| >= threshold.
130      *
131      * <p>Rebuilding the index could be faster than optimizing the index if we have
132      * removed most of the documents. Based on benchmarks, 85%~95% seems to be a good threshold
133      * for most cases.
134      */
getOptimizeRebuildIndexThreshold()135     float getOptimizeRebuildIndexThreshold();
136 
137     /**
138      * The level of gzip compression for documents in the Icing document store.
139      *
140      * <p>NO_COMPRESSION = 0, BEST_SPEED = 1, BEST_COMPRESSION = 9
141      */
getCompressionLevel()142     int getCompressionLevel();
143 
144 
145     /**
146      * The mem level for gzip compression for documents in the Icing document store.
147      *
148      * <p> 1 uses minimum memory but is slow and reduces compression ratio; 9 uses maximum memory
149      * for optimal speed and compression ratio. Icing historically used a memLevel of 8.
150      */
getCompressionMemLevel()151     int getCompressionMemLevel();
152 
153     /**
154      * Whether to allow circular references between schema types for the schema definition.
155      *
156      * <p>Even when set to true, circular references are still not allowed in the following cases:
157      *   1. All edges of a cycle have index_nested_properties=true
158      *   2. One of the types in the cycle has a joinable property, or depends on a type with a
159      *   joinable property.
160      */
getAllowCircularSchemaDefinitions()161     boolean getAllowCircularSchemaDefinitions();
162 
163     /**
164      * Flag for {@link com.google.android.icing.proto.SearchSpecProto}.
165      *
166      * <p>Whether to use the read-only implementation of IcingSearchEngine::Search.
167      *
168      * <p>The read-only version enables multiple queries to be performed concurrently
169      * as it only acquires the read lock at IcingSearchEngine's level. Finer-grained locks are
170      * implemented around code paths that write changes to Icing during Search.
171      */
getUseReadOnlySearch()172     boolean getUseReadOnlySearch();
173 
174     /**
175      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
176      *
177      * <p>Whether or not to pre-map the potential memory region used by the PersistentHashMap.
178      * This will avoid the need to re-map the mmapping used by PersistentHashMap whenever the
179      * underlying storage grows.
180      */
getUsePreMappingWithFileBackedVector()181     boolean getUsePreMappingWithFileBackedVector();
182 
183     /**
184      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
185      *
186      * <p>Whether or not to use the PersistentHashMap in the QualifiedIdTypeJoinableIndex. If false,
187      * we will use the old IcingDynamicTrie to store key value pairs.
188      */
getUsePersistentHashMap()189     boolean getUsePersistentHashMap();
190 
191     /**
192      * Flag for {@link com.google.android.icing.proto.ResultSpecProto}.
193      *
194      * <p>The maximum byte size to allow in a single page. This limit is only loosely binding.
195      * AppSearch will add results to the page until either 1) AppSearch has retrieved
196      * {@link SearchSpec#getResultCountPerPage()} results or 2) total size of the page exceeds this
197      * value. Therefore, AppSearch will always retrieve at least a single result, even if that
198      * result exceeds this limit.
199      */
getMaxPageBytesLimit()200     int getMaxPageBytesLimit();
201 
202     /**
203      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
204      *
205      * <p>Threshold for integer index bucket split. Integer index stores hits in several buckets,
206      * and splits if # of hits in a single bucket exceed the threshold. Splitting bucket accelerates
207      * numeric search exact query, but potentially downgrades the performance of range query.
208      *
209      * <p>This flag is for rolling out new threshold 65536. If identifying any issues, then change
210      * it back to 341 (the previous bucket split threshold, capacity of full max-sized posting
211      * list).
212      */
getIntegerIndexBucketSplitThreshold()213     int getIntegerIndexBucketSplitThreshold();
214 
215     /**
216      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
217      *
218      * <p>Whether Icing should sort and merge its lite index HitBuffer unsorted tail at indexing
219      * time.
220      *
221      * <p>If set to true, the HitBuffer will be sorted at indexing time after exceeding the sort
222      * threshold. If false, the HifBuffer will be sorted at querying time, before the first query
223      * after inserting new elements into the HitBuffer.
224      */
getLiteIndexSortAtIndexing()225     boolean getLiteIndexSortAtIndexing();
226 
227     /**
228      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
229      *
230      * <p>Size (in bytes) at which Icing's lite index should sort and merge the HitBuffer's
231      * unsorted tail into the sorted head for sorting at indexing time. Size specified here is
232      * unsorted tail section.
233      *
234      * <p>Setting a lower sort size reduces querying latency at the expense of indexing latency.
235      */
getLiteIndexSortSize()236     int getLiteIndexSortSize();
237 
238     /**
239      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
240      *
241      * <p>Whether to use the new qualified Id join index.
242      */
getUseNewQualifiedIdJoinIndex()243     boolean getUseNewQualifiedIdJoinIndex();
244 
245     /**
246      * Flag for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
247      *
248      * <p>Whether to build the metadata hits used for property existence check, which is required
249      * to support the hasProperty function in advanced query.
250      */
getBuildPropertyExistenceMetadataHits()251     boolean getBuildPropertyExistenceMetadataHits();
252 
253     /**
254      * Config for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
255      *
256      * <p>The maximum time in millisecond for a orphan blob to get recycled and deleted if there is
257      * no reference document linked to it.
258      */
getOrphanBlobTimeToLiveMs()259     long getOrphanBlobTimeToLiveMs();
260 
261     /**
262      * Config for {@link com.google.android.icing.proto.IcingSearchEngineOptions}.
263      *
264      * <p>The absolute path to the ICU data file. If a valid path has been provided, it will be used
265      * to initialize ICU. The path is not available in Jetpack and Framework. This method is
266      * functionally no-op and returns an empty string.
267      */
getIcuDataFileAbsolutePath()268     @NonNull String getIcuDataFileAbsolutePath();
269 
270     /**
271      * The threshold in bytes for compressing documents. If a document is larger than or equal to
272      * this threshold, it will be compressed based on getCompressionLevel(). 0 means always
273      * compress.
274      */
getCompressionThresholdBytes()275     int getCompressionThresholdBytes();
276 
277     /**
278      * Converts to an {@link IcingSearchEngineOptions} instance.
279      *
280      * @param baseDir base directory of the icing instance.
281      */
toIcingSearchEngineOptions( @onNull String baseDir, boolean isVMEnabled)282     default @NonNull IcingSearchEngineOptions toIcingSearchEngineOptions(
283             @NonNull String baseDir, boolean isVMEnabled) {
284         return IcingSearchEngineOptions.newBuilder()
285                 .setBaseDir(baseDir)
286                 .setMaxTokenLength(getMaxTokenLength())
287                 .setIndexMergeSize(getIndexMergeSize())
288                 .setDocumentStoreNamespaceIdFingerprint(
289                         getDocumentStoreNamespaceIdFingerprint())
290                 .setOptimizeRebuildIndexThreshold(
291                         getOptimizeRebuildIndexThreshold())
292                 .setCompressionLevel(getCompressionLevel())
293                 .setAllowCircularSchemaDefinitions(
294                         getAllowCircularSchemaDefinitions())
295                 .setPreMappingFbv(getUsePreMappingWithFileBackedVector())
296                 .setUsePersistentHashMap(getUsePersistentHashMap())
297                 .setIntegerIndexBucketSplitThreshold(
298                         getIntegerIndexBucketSplitThreshold())
299                 .setLiteIndexSortAtIndexing(getLiteIndexSortAtIndexing())
300                 .setLiteIndexSortSize(getLiteIndexSortSize())
301                 .setUseNewQualifiedIdJoinIndex(
302                         getUseNewQualifiedIdJoinIndex())
303                 .setBuildPropertyExistenceMetadataHits(
304                         getBuildPropertyExistenceMetadataHits())
305                 .setEnableBlobStore(Flags.enableBlobStore())
306                 .setOrphanBlobTimeToLiveMs(getOrphanBlobTimeToLiveMs())
307                 .setEnableEmbeddingIndex(
308                         Flags.enableSchemaEmbeddingPropertyConfig())
309                 .setEnableEmbeddingQuantization(
310                         Flags.enableSchemaEmbeddingQuantization())
311                 .setEnableScorableProperties(Flags.enableScorableProperty())
312                 .setIcuDataFileAbsolutePath(getIcuDataFileAbsolutePath())
313                 .setManageBlobFiles(!Flags.enableAppSearchManageBlobFiles())
314                 // Join index v3 is a prerequisite for delete propagation.
315                 .setEnableDeletePropagationFrom(
316                         Flags.enableDeletePropagationType() && Flags.enableQualifiedIdJoinIndexV3())
317                 .setCalculateTimeSinceLastAttemptedOptimize(
318                         Flags.enableCalculateTimeSinceLastAttemptedOptimize())
319                 .setEnableQualifiedIdJoinIndexV3(Flags.enableQualifiedIdJoinIndexV3())
320                 .setEnableSoftIndexRestoration(Flags.enableSoftIndexRestoration())
321                 .setEnableMarkerFileForOptimize(Flags.enableMarkerFileForOptimize())
322                 .setReleaseBackupSchemaFileIfOverlayPresent(
323                         Flags.enableReleaseBackupSchemaFileIfOverlayPresent())
324                 // This is a necessary bug fix for the VMEnabled case. VMEnabled is guarded by its
325                 // own trunk-stable flag, therefore this can be included there. Otherwise, we should
326                 // use this trank-stable flag.
327                 .setEnableStrictPageByteSizeLimit(
328                         Flags.enableStrictPageByteSizeLimit() || isVMEnabled)
329                 .setCompressionThresholdBytes(
330                         (Flags.enableCompressionThreshold() || isVMEnabled)
331                                 ? Math.max(0, getCompressionThresholdBytes()) : 0)
332                 .setCompressionMemLevel(
333                         (Flags.enableCompressionMemLevelOne() || isVMEnabled) ? 1
334                                 : DEFAULT_COMPRESSION_MEM_LEVEL)
335                 .build();
336     }
337 }
338