1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import static com.android.providers.contacts.flags.Flags.cp2SyncSearchIndexFlag; 19 20 import android.content.ContentValues; 21 import android.database.Cursor; 22 import android.database.sqlite.SQLiteDatabase; 23 import android.os.SystemClock; 24 import android.provider.ContactsContract.CommonDataKinds.Email; 25 import android.provider.ContactsContract.CommonDataKinds.Nickname; 26 import android.provider.ContactsContract.CommonDataKinds.Organization; 27 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal; 28 import android.provider.ContactsContract.Data; 29 import android.provider.ContactsContract.RawContacts; 30 import android.text.TextUtils; 31 import android.util.ArraySet; 32 import android.util.Log; 33 34 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns; 35 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns; 36 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns; 37 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns; 38 import com.android.providers.contacts.ContactsDatabaseHelper.Tables; 39 import com.android.providers.contacts.util.CappedStringBuilder; 40 41 import com.google.android.collect.Lists; 42 import com.google.common.annotations.VisibleForTesting; 43 44 import java.util.ArrayList; 45 import java.util.List; 46 import java.util.Set; 47 import java.util.regex.Pattern; 48 49 /** 50 * Maintains a search index for comprehensive contact search. 51 */ 52 public class SearchIndexManager { 53 private static final String TAG = "ContactsFTS"; 54 55 private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE); 56 57 public static final int MAX_UPDATE_FILTER_CONTACTS = 5000; 58 private static final int MAX_STRING_BUILDER_SIZE = 1024 * 10; 59 60 public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index"; 61 private static final String ROW_ID_KEY = "rowid"; 62 private static final int SEARCH_INDEX_VERSION = 2; 63 64 private static final class ContactIndexQuery { 65 public static final String[] COLUMNS = { 66 Data.CONTACT_ID, 67 MimetypesColumns.MIMETYPE, 68 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5, 69 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11, 70 Data.DATA12, Data.DATA13, Data.DATA14 71 }; 72 73 public static final int MIMETYPE = 1; 74 } 75 76 public static class IndexBuilder { 77 public static final int SEPARATOR_SPACE = 0; 78 public static final int SEPARATOR_PARENTHESES = 1; 79 public static final int SEPARATOR_SLASH = 2; 80 public static final int SEPARATOR_COMMA = 3; 81 82 private CappedStringBuilder mSbContent = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 83 private CappedStringBuilder mSbName = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 84 private CappedStringBuilder mSbTokens = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 85 private CappedStringBuilder mSbElementContent = new CappedStringBuilder( 86 MAX_STRING_BUILDER_SIZE); 87 private ArraySet<String> mUniqueElements = new ArraySet<>(); 88 private Cursor mCursor; 89 setCursor(Cursor cursor)90 void setCursor(Cursor cursor) { 91 this.mCursor = cursor; 92 } 93 reset()94 void reset() { 95 mSbContent.clear(); 96 mSbTokens.clear(); 97 mSbName.clear(); 98 mSbElementContent.clear(); 99 mUniqueElements.clear(); 100 } 101 getContent()102 public String getContent() { 103 return mSbContent.length() == 0 ? null : mSbContent.toString(); 104 } 105 getName()106 public String getName() { 107 return mSbName.length() == 0 ? null : mSbName.toString(); 108 } 109 getTokens()110 public String getTokens() { 111 return mSbTokens.length() == 0 ? null : mSbTokens.toString(); 112 } 113 getString(String columnName)114 public String getString(String columnName) { 115 return mCursor.getString(mCursor.getColumnIndex(columnName)); 116 } 117 getInt(String columnName)118 public int getInt(String columnName) { 119 return mCursor.getInt(mCursor.getColumnIndex(columnName)); 120 } 121 122 @Override toString()123 public String toString() { 124 return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens; 125 } 126 commit()127 public void commit() { 128 if (mSbElementContent.length() != 0) { 129 String content = mSbElementContent.toString().replace('\n', ' '); 130 if (!mUniqueElements.contains(content)) { 131 if (mSbContent.length() != 0) { 132 mSbContent.append('\n'); 133 } 134 mSbContent.append(content); 135 mUniqueElements.add(content); 136 } 137 mSbElementContent.clear(); 138 } 139 } 140 appendContentFromColumn(String columnName)141 public void appendContentFromColumn(String columnName) { 142 appendContentFromColumn(columnName, SEPARATOR_SPACE); 143 } 144 appendContentFromColumn(String columnName, int format)145 public void appendContentFromColumn(String columnName, int format) { 146 appendContent(getString(columnName), format); 147 } 148 appendContent(String value)149 public void appendContent(String value) { 150 appendContent(value, SEPARATOR_SPACE); 151 } 152 appendContent(String value, int format)153 private void appendContent(String value, int format) { 154 if (TextUtils.isEmpty(value)) { 155 return; 156 } 157 158 switch (format) { 159 case SEPARATOR_SPACE: 160 if (mSbElementContent.length() > 0) { 161 mSbElementContent.append(' '); 162 } 163 mSbElementContent.append(value); 164 break; 165 166 case SEPARATOR_SLASH: 167 mSbElementContent.append('/').append(value); 168 break; 169 170 case SEPARATOR_PARENTHESES: 171 if (mSbElementContent.length() > 0) { 172 mSbElementContent.append(' '); 173 } 174 mSbElementContent.append('(').append(value).append(')'); 175 break; 176 177 case SEPARATOR_COMMA: 178 if (mSbElementContent.length() > 0) { 179 mSbElementContent.append(", "); 180 } 181 mSbElementContent.append(value); 182 break; 183 } 184 } 185 appendToken(String token)186 public void appendToken(String token) { 187 if (TextUtils.isEmpty(token)) { 188 return; 189 } 190 191 if (mSbTokens.length() != 0) { 192 mSbTokens.append(' '); 193 } 194 mSbTokens.append(token); 195 } 196 appendNameFromColumn(String columnName)197 public void appendNameFromColumn(String columnName) { 198 appendName(getString(columnName)); 199 } 200 appendName(String name)201 public void appendName(String name) { 202 if (TextUtils.isEmpty(name)) { 203 return; 204 } 205 // First, put the original name. 206 appendNameInternal(name); 207 208 // Then, if the name contains more than one FTS token, put each token into the index 209 // too. 210 // 211 // This is to make names with special characters searchable, such as "double-barrelled" 212 // "L'Image". 213 // 214 // Here's how it works: 215 // Because we "normalize" names when putting into the index, if we only put 216 // "double-barrelled", the index will only contain "doublebarrelled". 217 // Now, if the user searches for "double-barrelled", the searcher tokenizes it into 218 // two tokens, "double" and "barrelled". The first one matches "doublebarrelled" 219 // but the second one doesn't (because we only do the prefix match), so 220 // "doublebarrelled" doesn't match. 221 // So, here, we put each token in a name into the index too. In the case above, 222 // we put also "double" and "barrelled". 223 // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled" 224 // will all match "double-barrelled". 225 final List<String> nameParts = splitIntoFtsTokens(name); 226 if (nameParts.size() > 1) { 227 for (String namePart : nameParts) { 228 if (!TextUtils.isEmpty(namePart)) { 229 appendNameInternal(namePart); 230 } 231 } 232 } 233 } 234 235 /** 236 * Normalize a name and add to {@link #mSbName} 237 */ appendNameInternal(String name)238 private void appendNameInternal(String name) { 239 if (mSbName.length() != 0) { 240 mSbName.append(' '); 241 } 242 mSbName.append(NameNormalizer.normalize(name)); 243 } 244 } 245 246 private final ContactsProvider2 mContactsProvider; 247 private final ContactsDatabaseHelper mDbHelper; 248 private StringBuilder mSb = new StringBuilder(); 249 private IndexBuilder mIndexBuilder = new IndexBuilder(); 250 private ContentValues mValues = new ContentValues(); 251 private String[] mSelectionArgs1 = new String[1]; 252 private int mMaxUpdateFilterContacts = MAX_UPDATE_FILTER_CONTACTS; 253 SearchIndexManager(ContactsProvider2 contactsProvider)254 public SearchIndexManager(ContactsProvider2 contactsProvider) { 255 this.mContactsProvider = contactsProvider; 256 mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper(); 257 } 258 updateIndex(boolean force)259 public void updateIndex(boolean force) { 260 if (force) { 261 setSearchIndexVersion(0); 262 } else { 263 if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) { 264 return; 265 } 266 } 267 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 268 db.beginTransaction(); 269 try { 270 // We do a version check again, because the version might have been modified after 271 // the first check. We need to do the check again in a transaction to make sure. 272 if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) { 273 rebuildIndex(db); 274 setSearchIndexVersion(SEARCH_INDEX_VERSION); 275 db.setTransactionSuccessful(); 276 } 277 } finally { 278 db.endTransaction(); 279 } 280 } 281 rebuildIndex(SQLiteDatabase db)282 private void rebuildIndex(SQLiteDatabase db) { 283 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING); 284 final long start = SystemClock.elapsedRealtime(); 285 int count = 0; 286 try { 287 mDbHelper.createSearchIndexTable(db, true); 288 count = buildAndInsertIndex(db, null); 289 } finally { 290 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL); 291 292 final long end = SystemClock.elapsedRealtime(); 293 Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, " 294 + count + " contacts"); 295 } 296 } 297 298 /** 299 * Updates the stale contact ids in the search index. 300 * 301 * <p> 302 * The stale contact ids used by this method are cached in the 303 * stale_search_index_contacts temp table. If the count of stale contacts 304 * is greater than the maximum amount of stale contacts, the search index 305 * is rebuilt completely. If not then only the stale contacts are updated. 306 * 307 * Stale contacts are contacts which have been either added, updated or deleted. 308 * Meaning the information in the search index for those contacts needs to be 309 * updated. 310 * </p> 311 * 312 * @param staleContactsCount The amount of cached stale contacts ids. Passing a 313 * negative value or a value greater than the max amount of allowed stale 314 * contacts will rebuild the entire search index. 315 */ updateIndexForRawContacts(long staleContactsCount)316 public void updateIndexForRawContacts(long staleContactsCount) { 317 if (VERBOSE_LOGGING) { 318 Log.v(TAG, "Updating search index for " + staleContactsCount + " contacts"); 319 } 320 321 final SQLiteDatabase db = mDbHelper.getWritableDatabase(); 322 323 String contactIdsSelection = null; 324 String whereClause = null; 325 326 // If the amount of contacts which need to be re-synced in the search index 327 // surpasses the limit, then simply clear the entire search index table and 328 // and rebuild it. 329 if (staleContactsCount > 0 && staleContactsCount <= mMaxUpdateFilterContacts) { 330 // Selects all raw_contacts which contain a stale contact id in search index 331 contactIdsSelection = 332 "raw_contacts.contact_id IN (SELECT id FROM stale_search_index_contacts)"; 333 // Only remove the provided contacts 334 whereClause = "rowid IN (SELECT id FROM stale_search_index_contacts)"; 335 } 336 db.delete(Tables.SEARCH_INDEX, whereClause, null); 337 338 // Rebuild search index. The selection is used to select raw_contacts. If the selection 339 // string is null the entire search index table will be rebuilt. 340 final int count = buildAndInsertIndex(db, contactIdsSelection); 341 342 if (VERBOSE_LOGGING) { 343 Log.v(TAG, "Updated search index for " + count + " contacts"); 344 } 345 } 346 updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)347 public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) { 348 if (cp2SyncSearchIndexFlag()) { 349 throw new UnsupportedOperationException(); 350 } 351 if (VERBOSE_LOGGING) { 352 Log.v(TAG, "Updating search index for " + contactIds.size() + 353 " contacts / " + rawContactIds.size() + " raw contacts"); 354 } 355 StringBuilder sb = new StringBuilder(); 356 sb.append("("); 357 if (!contactIds.isEmpty()) { 358 // Select all raw contacts that belong to all contacts in contactIds 359 sb.append(RawContacts.CONTACT_ID + " IN ("); 360 sb.append(TextUtils.join(",", contactIds)); 361 sb.append(')'); 362 } 363 if (!rawContactIds.isEmpty()) { 364 if (!contactIds.isEmpty()) { 365 sb.append(" OR "); 366 } 367 // Select all raw contacts that belong to the same contact as all raw contacts 368 // in rawContactIds. For every raw contact in rawContactIds that we are updating 369 // the index for, we need to rebuild the search index for all raw contacts belonging 370 // to the same contact, because we can only update the search index on a per-contact 371 // basis. 372 sb.append(RawContacts.CONTACT_ID + " IN " 373 + "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS 374 + " WHERE " + RawContactsColumns.CONCRETE_ID + " IN ("); 375 sb.append(TextUtils.join(",", rawContactIds)); 376 sb.append("))"); 377 } 378 379 sb.append(")"); 380 381 // The selection to select raw_contacts. 382 final String rawContactsSelection = sb.toString(); 383 384 // Remove affected search_index rows. 385 final SQLiteDatabase db = mDbHelper.getWritableDatabase(); 386 final int deleted = db.delete(Tables.SEARCH_INDEX, 387 ROW_ID_KEY + " IN (SELECT " 388 + RawContacts.CONTACT_ID 389 + " FROM " + Tables.RAW_CONTACTS 390 + " WHERE " + rawContactsSelection 391 + ")", 392 null); 393 394 // Then rebuild index for them. 395 final int count = buildAndInsertIndex(db, rawContactsSelection); 396 397 if (VERBOSE_LOGGING) { 398 Log.v(TAG, "Updated search index for " + count + " contacts"); 399 } 400 } 401 buildAndInsertIndex(SQLiteDatabase db, String selection)402 private int buildAndInsertIndex(SQLiteDatabase db, String selection) { 403 mSb.setLength(0); 404 mSb.append(Data.CONTACT_ID + ", "); 405 mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "="); 406 mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE)); 407 mSb.append(" THEN -4 "); 408 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 409 mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE)); 410 mSb.append(" THEN -3 "); 411 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 412 mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE)); 413 mSb.append(" THEN -2"); 414 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 415 mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE)); 416 mSb.append(" THEN -1"); 417 mSb.append(" ELSE " + DataColumns.MIMETYPE_ID); 418 mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID); 419 420 int count = 0; 421 Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS, 422 selection, null, null, null, mSb.toString()); 423 mIndexBuilder.setCursor(cursor); 424 mIndexBuilder.reset(); 425 try { 426 long currentContactId = -1; 427 while (cursor.moveToNext()) { 428 long contactId = cursor.getLong(0); 429 if (contactId != currentContactId) { 430 if (currentContactId != -1) { 431 insertIndexRow(db, currentContactId, mIndexBuilder); 432 count++; 433 } 434 currentContactId = contactId; 435 mIndexBuilder.reset(); 436 } 437 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE); 438 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype); 439 if (dataRowHandler.hasSearchableData()) { 440 dataRowHandler.appendSearchableData(mIndexBuilder); 441 mIndexBuilder.commit(); 442 } 443 } 444 if (currentContactId != -1) { 445 insertIndexRow(db, currentContactId, mIndexBuilder); 446 count++; 447 } 448 } finally { 449 cursor.close(); 450 } 451 return count; 452 } 453 insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)454 private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) { 455 mValues.clear(); 456 mValues.put(SearchIndexColumns.CONTENT, builder.getContent()); 457 mValues.put(SearchIndexColumns.NAME, builder.getName()); 458 mValues.put(SearchIndexColumns.TOKENS, builder.getTokens()); 459 mValues.put(SearchIndexColumns.CONTACT_ID, contactId); 460 mValues.put(ROW_ID_KEY, contactId); 461 db.insert(Tables.SEARCH_INDEX, null, mValues); 462 } 463 getSearchIndexVersion()464 private int getSearchIndexVersion() { 465 return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0")); 466 } 467 setSearchIndexVersion(int version)468 private void setSearchIndexVersion(int version) { 469 mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version)); 470 } 471 472 @VisibleForTesting setMaxUpdateFilterContacts(int maxUpdateFilterContacts)473 void setMaxUpdateFilterContacts(int maxUpdateFilterContacts) { 474 mMaxUpdateFilterContacts = maxUpdateFilterContacts; 475 } 476 477 /** 478 * Token separator that matches SQLite's "simple" tokenizer. 479 * - Unicode codepoints >= 128: Everything 480 * - Unicode codepoints < 128: Alphanumeric and "_" 481 * - Everything else is a separator of tokens 482 */ 483 private static final Pattern FTS_TOKEN_SEPARATOR_RE = 484 Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]"); 485 486 /** 487 * Tokenize a string in the way as that of SQLite's "simple" tokenizer. 488 */ 489 @VisibleForTesting splitIntoFtsTokens(String s)490 static List<String> splitIntoFtsTokens(String s) { 491 final ArrayList<String> ret = Lists.newArrayList(); 492 for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) { 493 if (!TextUtils.isEmpty(token)) { 494 ret.add(token); 495 } 496 } 497 return ret; 498 } 499 500 /** 501 * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same 502 * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then 503 * returned as a String. 504 * @see FtsQueryBuilder#UNSCOPED_NORMALIZING 505 * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING 506 */ getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)507 public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) { 508 final StringBuilder result = new StringBuilder(); 509 for (String token : splitIntoFtsTokens(query)) { 510 ftsQueryBuilder.addToken(result, token); 511 } 512 return result.toString(); 513 } 514 515 public static abstract class FtsQueryBuilder { addToken(StringBuilder builder, String token)516 public abstract void addToken(StringBuilder builder, String token); 517 518 /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */ 519 public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder(); 520 521 /** 522 * Scopes each token to a column and normalizes the name. 523 * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*" 524 */ 525 public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING = 526 new ScopedNameNormalizingBuilder(); 527 528 /** 529 * Scopes each token to a the content column and also for name with normalization. 530 * Also adds a user-defined expression to each token. This allows common criteria to be 531 * concatenated to each token. 532 * Example (commonCriteria=" OR tokens:123*"): 533 * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*" 534 */ getDigitsQueryBuilder(final String commonCriteria)535 public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) { 536 return new FtsQueryBuilder() { 537 @Override 538 public void addToken(StringBuilder builder, String token) { 539 if (builder.length() != 0) builder.append(' '); 540 541 builder.append("content:"); 542 builder.append(token); 543 builder.append("* "); 544 545 final String normalizedToken = NameNormalizer.normalize(token); 546 if (!TextUtils.isEmpty(normalizedToken)) { 547 builder.append(" OR name:"); 548 builder.append(normalizedToken); 549 builder.append('*'); 550 } 551 552 builder.append(commonCriteria); 553 } 554 }; 555 } 556 } 557 558 private static class UnscopedNormalizingBuilder extends FtsQueryBuilder { 559 @Override 560 public void addToken(StringBuilder builder, String token) { 561 if (builder.length() != 0) builder.append(' '); 562 563 // the token could be empty (if the search query was "_"). we should still emit it 564 // here, as we otherwise risk to end up with an empty MATCH-expression MATCH "" 565 builder.append(NameNormalizer.normalize(token)); 566 builder.append('*'); 567 } 568 } 569 570 private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder { 571 @Override 572 public void addToken(StringBuilder builder, String token) { 573 if (builder.length() != 0) builder.append(' '); 574 575 builder.append("content:"); 576 builder.append(token); 577 builder.append('*'); 578 579 final String normalizedToken = NameNormalizer.normalize(token); 580 if (!TextUtils.isEmpty(normalizedToken)) { 581 builder.append(" OR name:"); 582 builder.append(normalizedToken); 583 builder.append('*'); 584 } 585 586 builder.append(" OR tokens:"); 587 builder.append(token); 588 builder.append("*"); 589 } 590 } 591 } 592