1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License 15 */ 16 package com.android.providers.contacts; 17 18 import android.content.ContentValues; 19 import android.database.Cursor; 20 import android.database.sqlite.SQLiteDatabase; 21 import android.os.SystemClock; 22 import android.provider.ContactsContract.CommonDataKinds.Email; 23 import android.provider.ContactsContract.CommonDataKinds.Nickname; 24 import android.provider.ContactsContract.CommonDataKinds.Organization; 25 import android.provider.ContactsContract.CommonDataKinds.StructuredPostal; 26 import android.provider.ContactsContract.Data; 27 import android.provider.ContactsContract.RawContacts; 28 import android.text.TextUtils; 29 import android.util.ArraySet; 30 import android.util.Log; 31 32 import com.android.providers.contacts.ContactsDatabaseHelper.DataColumns; 33 import com.android.providers.contacts.ContactsDatabaseHelper.MimetypesColumns; 34 import com.android.providers.contacts.ContactsDatabaseHelper.RawContactsColumns; 35 import com.android.providers.contacts.ContactsDatabaseHelper.SearchIndexColumns; 36 import com.android.providers.contacts.ContactsDatabaseHelper.Tables; 37 import com.android.providers.contacts.util.CappedStringBuilder; 38 39 import com.google.android.collect.Lists; 40 import com.google.common.annotations.VisibleForTesting; 41 42 import java.util.ArrayList; 43 import java.util.List; 44 import java.util.Set; 45 import java.util.regex.Pattern; 46 47 /** 48 * Maintains a search index for comprehensive contact search. 49 */ 50 public class SearchIndexManager { 51 private static final String TAG = "ContactsFTS"; 52 53 private static final boolean VERBOSE_LOGGING = Log.isLoggable(TAG, Log.VERBOSE); 54 55 private static final int MAX_STRING_BUILDER_SIZE = 1024 * 10; 56 57 public static final String PROPERTY_SEARCH_INDEX_VERSION = "search_index"; 58 private static final String ROW_ID_KEY = "rowid"; 59 private static final int SEARCH_INDEX_VERSION = 2; 60 61 private static final class ContactIndexQuery { 62 public static final String[] COLUMNS = { 63 Data.CONTACT_ID, 64 MimetypesColumns.MIMETYPE, 65 Data.DATA1, Data.DATA2, Data.DATA3, Data.DATA4, Data.DATA5, 66 Data.DATA6, Data.DATA7, Data.DATA8, Data.DATA9, Data.DATA10, Data.DATA11, 67 Data.DATA12, Data.DATA13, Data.DATA14 68 }; 69 70 public static final int MIMETYPE = 1; 71 } 72 73 public static class IndexBuilder { 74 public static final int SEPARATOR_SPACE = 0; 75 public static final int SEPARATOR_PARENTHESES = 1; 76 public static final int SEPARATOR_SLASH = 2; 77 public static final int SEPARATOR_COMMA = 3; 78 79 private CappedStringBuilder mSbContent = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 80 private CappedStringBuilder mSbName = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 81 private CappedStringBuilder mSbTokens = new CappedStringBuilder(MAX_STRING_BUILDER_SIZE); 82 private CappedStringBuilder mSbElementContent = new CappedStringBuilder( 83 MAX_STRING_BUILDER_SIZE); 84 private ArraySet<String> mUniqueElements = new ArraySet<>(); 85 private Cursor mCursor; 86 setCursor(Cursor cursor)87 void setCursor(Cursor cursor) { 88 this.mCursor = cursor; 89 } 90 reset()91 void reset() { 92 mSbContent.clear(); 93 mSbTokens.clear(); 94 mSbName.clear(); 95 mSbElementContent.clear(); 96 mUniqueElements.clear(); 97 } 98 getContent()99 public String getContent() { 100 return mSbContent.length() == 0 ? null : mSbContent.toString(); 101 } 102 getName()103 public String getName() { 104 return mSbName.length() == 0 ? null : mSbName.toString(); 105 } 106 getTokens()107 public String getTokens() { 108 return mSbTokens.length() == 0 ? null : mSbTokens.toString(); 109 } 110 getString(String columnName)111 public String getString(String columnName) { 112 return mCursor.getString(mCursor.getColumnIndex(columnName)); 113 } 114 getInt(String columnName)115 public int getInt(String columnName) { 116 return mCursor.getInt(mCursor.getColumnIndex(columnName)); 117 } 118 119 @Override toString()120 public String toString() { 121 return "Content: " + mSbContent + "\n Name: " + mSbName + "\n Tokens: " + mSbTokens; 122 } 123 commit()124 public void commit() { 125 if (mSbElementContent.length() != 0) { 126 String content = mSbElementContent.toString().replace('\n', ' '); 127 if (!mUniqueElements.contains(content)) { 128 if (mSbContent.length() != 0) { 129 mSbContent.append('\n'); 130 } 131 mSbContent.append(content); 132 mUniqueElements.add(content); 133 } 134 mSbElementContent.clear(); 135 } 136 } 137 appendContentFromColumn(String columnName)138 public void appendContentFromColumn(String columnName) { 139 appendContentFromColumn(columnName, SEPARATOR_SPACE); 140 } 141 appendContentFromColumn(String columnName, int format)142 public void appendContentFromColumn(String columnName, int format) { 143 appendContent(getString(columnName), format); 144 } 145 appendContent(String value)146 public void appendContent(String value) { 147 appendContent(value, SEPARATOR_SPACE); 148 } 149 appendContent(String value, int format)150 private void appendContent(String value, int format) { 151 if (TextUtils.isEmpty(value)) { 152 return; 153 } 154 155 switch (format) { 156 case SEPARATOR_SPACE: 157 if (mSbElementContent.length() > 0) { 158 mSbElementContent.append(' '); 159 } 160 mSbElementContent.append(value); 161 break; 162 163 case SEPARATOR_SLASH: 164 mSbElementContent.append('/').append(value); 165 break; 166 167 case SEPARATOR_PARENTHESES: 168 if (mSbElementContent.length() > 0) { 169 mSbElementContent.append(' '); 170 } 171 mSbElementContent.append('(').append(value).append(')'); 172 break; 173 174 case SEPARATOR_COMMA: 175 if (mSbElementContent.length() > 0) { 176 mSbElementContent.append(", "); 177 } 178 mSbElementContent.append(value); 179 break; 180 } 181 } 182 appendToken(String token)183 public void appendToken(String token) { 184 if (TextUtils.isEmpty(token)) { 185 return; 186 } 187 188 if (mSbTokens.length() != 0) { 189 mSbTokens.append(' '); 190 } 191 mSbTokens.append(token); 192 } 193 appendNameFromColumn(String columnName)194 public void appendNameFromColumn(String columnName) { 195 appendName(getString(columnName)); 196 } 197 appendName(String name)198 public void appendName(String name) { 199 if (TextUtils.isEmpty(name)) { 200 return; 201 } 202 // First, put the original name. 203 appendNameInternal(name); 204 205 // Then, if the name contains more than one FTS token, put each token into the index 206 // too. 207 // 208 // This is to make names with special characters searchable, such as "double-barrelled" 209 // "L'Image". 210 // 211 // Here's how it works: 212 // Because we "normalize" names when putting into the index, if we only put 213 // "double-barrelled", the index will only contain "doublebarrelled". 214 // Now, if the user searches for "double-barrelled", the searcher tokenizes it into 215 // two tokens, "double" and "barrelled". The first one matches "doublebarrelled" 216 // but the second one doesn't (because we only do the prefix match), so 217 // "doublebarrelled" doesn't match. 218 // So, here, we put each token in a name into the index too. In the case above, 219 // we put also "double" and "barrelled". 220 // With this, queries such as "double-barrelled", "double barrelled", "doublebarrelled" 221 // will all match "double-barrelled". 222 final List<String> nameParts = splitIntoFtsTokens(name); 223 if (nameParts.size() > 1) { 224 for (String namePart : nameParts) { 225 if (!TextUtils.isEmpty(namePart)) { 226 appendNameInternal(namePart); 227 } 228 } 229 } 230 } 231 232 /** 233 * Normalize a name and add to {@link #mSbName} 234 */ appendNameInternal(String name)235 private void appendNameInternal(String name) { 236 if (mSbName.length() != 0) { 237 mSbName.append(' '); 238 } 239 mSbName.append(NameNormalizer.normalize(name)); 240 } 241 } 242 243 private final ContactsProvider2 mContactsProvider; 244 private final ContactsDatabaseHelper mDbHelper; 245 private StringBuilder mSb = new StringBuilder(); 246 private IndexBuilder mIndexBuilder = new IndexBuilder(); 247 private ContentValues mValues = new ContentValues(); 248 private String[] mSelectionArgs1 = new String[1]; 249 SearchIndexManager(ContactsProvider2 contactsProvider)250 public SearchIndexManager(ContactsProvider2 contactsProvider) { 251 this.mContactsProvider = contactsProvider; 252 mDbHelper = (ContactsDatabaseHelper) mContactsProvider.getDatabaseHelper(); 253 } 254 updateIndex(boolean force)255 public void updateIndex(boolean force) { 256 if (force) { 257 setSearchIndexVersion(0); 258 } else { 259 if (getSearchIndexVersion() == SEARCH_INDEX_VERSION) { 260 return; 261 } 262 } 263 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 264 db.beginTransaction(); 265 try { 266 // We do a version check again, because the version might have been modified after 267 // the first check. We need to do the check again in a transaction to make sure. 268 if (getSearchIndexVersion() != SEARCH_INDEX_VERSION) { 269 rebuildIndex(db); 270 setSearchIndexVersion(SEARCH_INDEX_VERSION); 271 db.setTransactionSuccessful(); 272 } 273 } finally { 274 db.endTransaction(); 275 } 276 } 277 rebuildIndex(SQLiteDatabase db)278 private void rebuildIndex(SQLiteDatabase db) { 279 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_UPGRADING); 280 final long start = SystemClock.elapsedRealtime(); 281 int count = 0; 282 try { 283 mDbHelper.createSearchIndexTable(db, true); 284 count = buildAndInsertIndex(db, null); 285 } finally { 286 mContactsProvider.setProviderStatus(ContactsProvider2.STATUS_NORMAL); 287 288 final long end = SystemClock.elapsedRealtime(); 289 Log.i(TAG, "Rebuild contact search index in " + (end - start) + "ms, " 290 + count + " contacts"); 291 } 292 } 293 updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds)294 public void updateIndexForRawContacts(Set<Long> contactIds, Set<Long> rawContactIds) { 295 if (VERBOSE_LOGGING) { 296 Log.v(TAG, "Updating search index for " + contactIds.size() + 297 " contacts / " + rawContactIds.size() + " raw contacts"); 298 } 299 StringBuilder sb = new StringBuilder(); 300 sb.append("("); 301 if (!contactIds.isEmpty()) { 302 // Select all raw contacts that belong to all contacts in contactIds 303 sb.append(RawContacts.CONTACT_ID + " IN ("); 304 sb.append(TextUtils.join(",", contactIds)); 305 sb.append(')'); 306 } 307 if (!rawContactIds.isEmpty()) { 308 if (!contactIds.isEmpty()) { 309 sb.append(" OR "); 310 } 311 // Select all raw contacts that belong to the same contact as all raw contacts 312 // in rawContactIds. For every raw contact in rawContactIds that we are updating 313 // the index for, we need to rebuild the search index for all raw contacts belonging 314 // to the same contact, because we can only update the search index on a per-contact 315 // basis. 316 sb.append(RawContacts.CONTACT_ID + " IN " + 317 "(SELECT " + RawContacts.CONTACT_ID + " FROM " + Tables.RAW_CONTACTS + 318 " WHERE " + RawContactsColumns.CONCRETE_ID + " IN ("); 319 sb.append(TextUtils.join(",", rawContactIds)); 320 sb.append("))"); 321 } 322 323 sb.append(")"); 324 325 // The selection to select raw_contacts. 326 final String rawContactsSelection = sb.toString(); 327 328 // Remove affected search_index rows. 329 final SQLiteDatabase db = mDbHelper.getWritableDatabase(); 330 final int deleted = db.delete(Tables.SEARCH_INDEX, 331 ROW_ID_KEY + " IN (SELECT " + 332 RawContacts.CONTACT_ID + 333 " FROM " + Tables.RAW_CONTACTS + 334 " WHERE " + rawContactsSelection + 335 ")" 336 , null); 337 338 // Then rebuild index for them. 339 final int count = buildAndInsertIndex(db, rawContactsSelection); 340 341 if (VERBOSE_LOGGING) { 342 Log.v(TAG, "Updated search index for " + count + " contacts"); 343 } 344 } 345 buildAndInsertIndex(SQLiteDatabase db, String selection)346 private int buildAndInsertIndex(SQLiteDatabase db, String selection) { 347 mSb.setLength(0); 348 mSb.append(Data.CONTACT_ID + ", "); 349 mSb.append("(CASE WHEN " + DataColumns.MIMETYPE_ID + "="); 350 mSb.append(mDbHelper.getMimeTypeId(Nickname.CONTENT_ITEM_TYPE)); 351 mSb.append(" THEN -4 "); 352 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 353 mSb.append(mDbHelper.getMimeTypeId(Organization.CONTENT_ITEM_TYPE)); 354 mSb.append(" THEN -3 "); 355 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 356 mSb.append(mDbHelper.getMimeTypeId(StructuredPostal.CONTENT_ITEM_TYPE)); 357 mSb.append(" THEN -2"); 358 mSb.append(" WHEN " + DataColumns.MIMETYPE_ID + "="); 359 mSb.append(mDbHelper.getMimeTypeId(Email.CONTENT_ITEM_TYPE)); 360 mSb.append(" THEN -1"); 361 mSb.append(" ELSE " + DataColumns.MIMETYPE_ID); 362 mSb.append(" END), " + Data.IS_SUPER_PRIMARY + ", " + DataColumns.CONCRETE_ID); 363 364 int count = 0; 365 Cursor cursor = db.query(Tables.DATA_JOIN_MIMETYPE_RAW_CONTACTS, ContactIndexQuery.COLUMNS, 366 selection, null, null, null, mSb.toString()); 367 mIndexBuilder.setCursor(cursor); 368 mIndexBuilder.reset(); 369 try { 370 long currentContactId = -1; 371 while (cursor.moveToNext()) { 372 long contactId = cursor.getLong(0); 373 if (contactId != currentContactId) { 374 if (currentContactId != -1) { 375 insertIndexRow(db, currentContactId, mIndexBuilder); 376 count++; 377 } 378 currentContactId = contactId; 379 mIndexBuilder.reset(); 380 } 381 String mimetype = cursor.getString(ContactIndexQuery.MIMETYPE); 382 DataRowHandler dataRowHandler = mContactsProvider.getDataRowHandler(mimetype); 383 if (dataRowHandler.hasSearchableData()) { 384 dataRowHandler.appendSearchableData(mIndexBuilder); 385 mIndexBuilder.commit(); 386 } 387 } 388 if (currentContactId != -1) { 389 insertIndexRow(db, currentContactId, mIndexBuilder); 390 count++; 391 } 392 } finally { 393 cursor.close(); 394 } 395 return count; 396 } 397 insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder)398 private void insertIndexRow(SQLiteDatabase db, long contactId, IndexBuilder builder) { 399 mValues.clear(); 400 mValues.put(SearchIndexColumns.CONTENT, builder.getContent()); 401 mValues.put(SearchIndexColumns.NAME, builder.getName()); 402 mValues.put(SearchIndexColumns.TOKENS, builder.getTokens()); 403 mValues.put(SearchIndexColumns.CONTACT_ID, contactId); 404 mValues.put(ROW_ID_KEY, contactId); 405 db.insert(Tables.SEARCH_INDEX, null, mValues); 406 } getSearchIndexVersion()407 private int getSearchIndexVersion() { 408 return Integer.parseInt(mDbHelper.getProperty(PROPERTY_SEARCH_INDEX_VERSION, "0")); 409 } 410 setSearchIndexVersion(int version)411 private void setSearchIndexVersion(int version) { 412 mDbHelper.setProperty(PROPERTY_SEARCH_INDEX_VERSION, String.valueOf(version)); 413 } 414 415 /** 416 * Token separator that matches SQLite's "simple" tokenizer. 417 * - Unicode codepoints >= 128: Everything 418 * - Unicode codepoints < 128: Alphanumeric and "_" 419 * - Everything else is a separator of tokens 420 */ 421 private static final Pattern FTS_TOKEN_SEPARATOR_RE = 422 Pattern.compile("[^\u0080-\uffff\\p{Alnum}_]"); 423 424 /** 425 * Tokenize a string in the way as that of SQLite's "simple" tokenizer. 426 */ 427 @VisibleForTesting splitIntoFtsTokens(String s)428 static List<String> splitIntoFtsTokens(String s) { 429 final ArrayList<String> ret = Lists.newArrayList(); 430 for (String token : FTS_TOKEN_SEPARATOR_RE.split(s)) { 431 if (!TextUtils.isEmpty(token)) { 432 ret.add(token); 433 } 434 } 435 return ret; 436 } 437 438 /** 439 * Tokenizes the query and normalizes/hex encodes each token. The tokenizer uses the same 440 * rules as SQLite's "simple" tokenizer. Each token is added to the retokenizer and then 441 * returned as a String. 442 * @see FtsQueryBuilder#UNSCOPED_NORMALIZING 443 * @see FtsQueryBuilder#SCOPED_NAME_NORMALIZING 444 */ getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder)445 public static String getFtsMatchQuery(String query, FtsQueryBuilder ftsQueryBuilder) { 446 final StringBuilder result = new StringBuilder(); 447 for (String token : splitIntoFtsTokens(query)) { 448 ftsQueryBuilder.addToken(result, token); 449 } 450 return result.toString(); 451 } 452 453 public static abstract class FtsQueryBuilder { addToken(StringBuilder builder, String token)454 public abstract void addToken(StringBuilder builder, String token); 455 456 /** Normalizes and space-concatenates each token. Example: "a1b2c1* a2b3c2*" */ 457 public static final FtsQueryBuilder UNSCOPED_NORMALIZING = new UnscopedNormalizingBuilder(); 458 459 /** 460 * Scopes each token to a column and normalizes the name. 461 * Example: "content:foo* name:a1b2c1* tokens:foo* content:bar* name:a2b3c2* tokens:bar*" 462 */ 463 public static final FtsQueryBuilder SCOPED_NAME_NORMALIZING = 464 new ScopedNameNormalizingBuilder(); 465 466 /** 467 * Scopes each token to a the content column and also for name with normalization. 468 * Also adds a user-defined expression to each token. This allows common criteria to be 469 * concatenated to each token. 470 * Example (commonCriteria=" OR tokens:123*"): 471 * "content:650* OR name:1A1B1C* OR tokens:123* content:2A2B2C* OR name:foo* OR tokens:123*" 472 */ getDigitsQueryBuilder(final String commonCriteria)473 public static FtsQueryBuilder getDigitsQueryBuilder(final String commonCriteria) { 474 return new FtsQueryBuilder() { 475 @Override 476 public void addToken(StringBuilder builder, String token) { 477 if (builder.length() != 0) builder.append(' '); 478 479 builder.append("content:"); 480 builder.append(token); 481 builder.append("* "); 482 483 final String normalizedToken = NameNormalizer.normalize(token); 484 if (!TextUtils.isEmpty(normalizedToken)) { 485 builder.append(" OR name:"); 486 builder.append(normalizedToken); 487 builder.append('*'); 488 } 489 490 builder.append(commonCriteria); 491 } 492 }; 493 } 494 } 495 496 private static class UnscopedNormalizingBuilder extends FtsQueryBuilder { 497 @Override 498 public void addToken(StringBuilder builder, String token) { 499 if (builder.length() != 0) builder.append(' '); 500 501 // the token could be empty (if the search query was "_"). we should still emit it 502 // here, as we otherwise risk to end up with an empty MATCH-expression MATCH "" 503 builder.append(NameNormalizer.normalize(token)); 504 builder.append('*'); 505 } 506 } 507 508 private static class ScopedNameNormalizingBuilder extends FtsQueryBuilder { 509 @Override 510 public void addToken(StringBuilder builder, String token) { 511 if (builder.length() != 0) builder.append(' '); 512 513 builder.append("content:"); 514 builder.append(token); 515 builder.append('*'); 516 517 final String normalizedToken = NameNormalizer.normalize(token); 518 if (!TextUtils.isEmpty(normalizedToken)) { 519 builder.append(" OR name:"); 520 builder.append(normalizedToken); 521 builder.append('*'); 522 } 523 524 builder.append(" OR tokens:"); 525 builder.append(token); 526 builder.append("*"); 527 } 528 } 529 } 530