1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.ContentValues; 20 import android.content.Context; 21 import android.database.Cursor; 22 import android.database.sqlite.SQLiteDatabase; 23 import android.database.sqlite.SQLiteOpenHelper; 24 import android.database.sqlite.SQLiteQueryBuilder; 25 import android.os.AsyncTask; 26 import android.provider.BaseColumns; 27 import android.util.Log; 28 29 import java.util.HashMap; 30 import java.util.HashSet; 31 import java.util.Iterator; 32 33 /** 34 * Stores all the pairs user types in databases. Prune the database if the size 35 * gets too big. Unlike AutoDictionary, it even stores the pairs that are already 36 * in the dictionary. 37 */ 38 public class UserBigramDictionary extends ExpandableDictionary { 39 private static final String TAG = "UserBigramDictionary"; 40 41 /** Any pair being typed or picked */ 42 private static final int FREQUENCY_FOR_TYPED = 2; 43 44 /** Maximum frequency for all pairs */ 45 private static final int FREQUENCY_MAX = 127; 46 47 /** Maximum number of pairs. Pruning will start when databases goes above this number. */ 48 private static int sMaxUserBigrams = 10000; 49 50 /** 51 * When it hits maximum bigram pair, it will delete until you are left with 52 * only (sMaxUserBigrams - sDeleteUserBigrams) pairs. 53 * Do not keep this number small to avoid deleting too often. 54 */ 55 private static int sDeleteUserBigrams = 1000; 56 57 /** 58 * Database version should increase if the database structure changes 59 */ 60 private static final int DATABASE_VERSION = 1; 61 62 private static final String DATABASE_NAME = "userbigram_dict.db"; 63 64 /** Name of the words table in the database */ 65 private static final String MAIN_TABLE_NAME = "main"; 66 // TODO: Consume less space by using a unique id for locale instead of the whole 67 // 2-5 character string. (Same TODO from AutoDictionary) 68 private static final String MAIN_COLUMN_ID = BaseColumns._ID; 69 private static final String MAIN_COLUMN_WORD1 = "word1"; 70 private static final String MAIN_COLUMN_WORD2 = "word2"; 71 private static final String MAIN_COLUMN_LOCALE = "locale"; 72 73 /** Name of the frequency table in the database */ 74 private static final String FREQ_TABLE_NAME = "frequency"; 75 private static final String FREQ_COLUMN_ID = BaseColumns._ID; 76 private static final String FREQ_COLUMN_PAIR_ID = "pair_id"; 77 private static final String FREQ_COLUMN_FREQUENCY = "freq"; 78 79 private final LatinIME mIme; 80 81 /** Locale for which this auto dictionary is storing words */ 82 private String mLocale; 83 84 private HashSet<Bigram> mPendingWrites = new HashSet<Bigram>(); 85 private final Object mPendingWritesLock = new Object(); 86 private static volatile boolean sUpdatingDB = false; 87 88 private final static HashMap<String, String> sDictProjectionMap; 89 90 static { 91 sDictProjectionMap = new HashMap<String, String>(); sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID)92 sDictProjectionMap.put(MAIN_COLUMN_ID, MAIN_COLUMN_ID); sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1)93 sDictProjectionMap.put(MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD1); sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2)94 sDictProjectionMap.put(MAIN_COLUMN_WORD2, MAIN_COLUMN_WORD2); sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE)95 sDictProjectionMap.put(MAIN_COLUMN_LOCALE, MAIN_COLUMN_LOCALE); 96 sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID)97 sDictProjectionMap.put(FREQ_COLUMN_ID, FREQ_COLUMN_ID); sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID)98 sDictProjectionMap.put(FREQ_COLUMN_PAIR_ID, FREQ_COLUMN_PAIR_ID); sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY)99 sDictProjectionMap.put(FREQ_COLUMN_FREQUENCY, FREQ_COLUMN_FREQUENCY); 100 } 101 102 private static DatabaseHelper sOpenHelper = null; 103 104 private static class Bigram { 105 public final String mWord1; 106 public final String mWord2; 107 public final int mFrequency; 108 Bigram(String word1, String word2, int frequency)109 Bigram(String word1, String word2, int frequency) { 110 this.mWord1 = word1; 111 this.mWord2 = word2; 112 this.mFrequency = frequency; 113 } 114 115 @Override equals(Object bigram)116 public boolean equals(Object bigram) { 117 Bigram bigram2 = (Bigram) bigram; 118 return (mWord1.equals(bigram2.mWord1) && mWord2.equals(bigram2.mWord2)); 119 } 120 121 @Override hashCode()122 public int hashCode() { 123 return (mWord1 + " " + mWord2).hashCode(); 124 } 125 } 126 setDatabaseMax(int maxUserBigram)127 public void setDatabaseMax(int maxUserBigram) { 128 sMaxUserBigrams = maxUserBigram; 129 } 130 setDatabaseDelete(int deleteUserBigram)131 public void setDatabaseDelete(int deleteUserBigram) { 132 sDeleteUserBigrams = deleteUserBigram; 133 } 134 UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId)135 public UserBigramDictionary(Context context, LatinIME ime, String locale, int dicTypeId) { 136 super(context, dicTypeId); 137 mIme = ime; 138 mLocale = locale; 139 if (sOpenHelper == null) { 140 sOpenHelper = new DatabaseHelper(getContext()); 141 } 142 if (mLocale != null && mLocale.length() > 1) { 143 loadDictionary(); 144 } 145 } 146 147 @Override close()148 public void close() { 149 flushPendingWrites(); 150 // Don't close the database as locale changes will require it to be reopened anyway 151 // Also, the database is written to somewhat frequently, so it needs to be kept alive 152 // throughout the life of the process. 153 // mOpenHelper.close(); 154 super.close(); 155 } 156 157 /** 158 * Pair will be added to the userbigram database. 159 */ addBigrams(String word1, String word2)160 public int addBigrams(String word1, String word2) { 161 // remove caps if second word is autocapitalized 162 if (mIme != null && mIme.getCurrentWord().isAutoCapitalized()) { 163 word2 = Character.toLowerCase(word2.charAt(0)) + word2.substring(1); 164 } 165 // Do not insert a word as a bigram of itself 166 if (word1.equals(word2)) { 167 return 0; 168 } 169 170 int freq = super.addBigram(word1, word2, FREQUENCY_FOR_TYPED); 171 if (freq > FREQUENCY_MAX) freq = FREQUENCY_MAX; 172 synchronized (mPendingWritesLock) { 173 if (freq == FREQUENCY_FOR_TYPED || mPendingWrites.isEmpty()) { 174 mPendingWrites.add(new Bigram(word1, word2, freq)); 175 } else { 176 Bigram bi = new Bigram(word1, word2, freq); 177 mPendingWrites.remove(bi); 178 mPendingWrites.add(bi); 179 } 180 } 181 182 return freq; 183 } 184 185 /** 186 * Schedules a background thread to write any pending words to the database. 187 */ flushPendingWrites()188 public void flushPendingWrites() { 189 synchronized (mPendingWritesLock) { 190 // Nothing pending? Return 191 if (mPendingWrites.isEmpty()) return; 192 // Create a background thread to write the pending entries 193 new UpdateDbTask(sOpenHelper, mPendingWrites, mLocale).execute(); 194 // Create a new map for writing new entries into while the old one is written to db 195 mPendingWrites = new HashSet<Bigram>(); 196 } 197 } 198 199 /** Used for testing purpose **/ waitUntilUpdateDBDone()200 void waitUntilUpdateDBDone() { 201 synchronized (mPendingWritesLock) { 202 while (sUpdatingDB) { 203 try { 204 Thread.sleep(100); 205 } catch (InterruptedException e) { 206 } 207 } 208 return; 209 } 210 } 211 212 @Override loadDictionaryAsync()213 public void loadDictionaryAsync() { 214 // Load the words that correspond to the current input locale 215 Cursor cursor = query(MAIN_COLUMN_LOCALE + "=?", new String[] { mLocale }); 216 try { 217 if (cursor.moveToFirst()) { 218 int word1Index = cursor.getColumnIndex(MAIN_COLUMN_WORD1); 219 int word2Index = cursor.getColumnIndex(MAIN_COLUMN_WORD2); 220 int frequencyIndex = cursor.getColumnIndex(FREQ_COLUMN_FREQUENCY); 221 while (!cursor.isAfterLast()) { 222 String word1 = cursor.getString(word1Index); 223 String word2 = cursor.getString(word2Index); 224 int frequency = cursor.getInt(frequencyIndex); 225 // Safeguard against adding really long words. Stack may overflow due 226 // to recursive lookup 227 if (word1.length() < MAX_WORD_LENGTH && word2.length() < MAX_WORD_LENGTH) { 228 super.setBigram(word1, word2, frequency); 229 } 230 cursor.moveToNext(); 231 } 232 } 233 } finally { 234 cursor.close(); 235 } 236 } 237 238 /** 239 * Query the database 240 */ query(String selection, String[] selectionArgs)241 private Cursor query(String selection, String[] selectionArgs) { 242 SQLiteQueryBuilder qb = new SQLiteQueryBuilder(); 243 244 // main INNER JOIN frequency ON (main._id=freq.pair_id) 245 qb.setTables(MAIN_TABLE_NAME + " INNER JOIN " + FREQ_TABLE_NAME + " ON (" 246 + MAIN_TABLE_NAME + "." + MAIN_COLUMN_ID + "=" + FREQ_TABLE_NAME + "." 247 + FREQ_COLUMN_PAIR_ID +")"); 248 249 qb.setProjectionMap(sDictProjectionMap); 250 251 // Get the database and run the query 252 SQLiteDatabase db = sOpenHelper.getReadableDatabase(); 253 Cursor c = qb.query(db, 254 new String[] { MAIN_COLUMN_WORD1, MAIN_COLUMN_WORD2, FREQ_COLUMN_FREQUENCY }, 255 selection, selectionArgs, null, null, null); 256 return c; 257 } 258 259 /** 260 * This class helps open, create, and upgrade the database file. 261 */ 262 private static class DatabaseHelper extends SQLiteOpenHelper { 263 DatabaseHelper(Context context)264 DatabaseHelper(Context context) { 265 super(context, DATABASE_NAME, null, DATABASE_VERSION); 266 } 267 268 @Override onCreate(SQLiteDatabase db)269 public void onCreate(SQLiteDatabase db) { 270 db.execSQL("PRAGMA foreign_keys = ON;"); 271 db.execSQL("CREATE TABLE " + MAIN_TABLE_NAME + " (" 272 + MAIN_COLUMN_ID + " INTEGER PRIMARY KEY," 273 + MAIN_COLUMN_WORD1 + " TEXT," 274 + MAIN_COLUMN_WORD2 + " TEXT," 275 + MAIN_COLUMN_LOCALE + " TEXT" 276 + ");"); 277 db.execSQL("CREATE TABLE " + FREQ_TABLE_NAME + " (" 278 + FREQ_COLUMN_ID + " INTEGER PRIMARY KEY," 279 + FREQ_COLUMN_PAIR_ID + " INTEGER," 280 + FREQ_COLUMN_FREQUENCY + " INTEGER," 281 + "FOREIGN KEY(" + FREQ_COLUMN_PAIR_ID + ") REFERENCES " + MAIN_TABLE_NAME 282 + "(" + MAIN_COLUMN_ID + ")" + " ON DELETE CASCADE" 283 + ");"); 284 } 285 286 @Override onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion)287 public void onUpgrade(SQLiteDatabase db, int oldVersion, int newVersion) { 288 Log.w(TAG, "Upgrading database from version " + oldVersion + " to " 289 + newVersion + ", which will destroy all old data"); 290 db.execSQL("DROP TABLE IF EXISTS " + MAIN_TABLE_NAME); 291 db.execSQL("DROP TABLE IF EXISTS " + FREQ_TABLE_NAME); 292 onCreate(db); 293 } 294 } 295 296 /** 297 * Async task to write pending words to the database so that it stays in sync with 298 * the in-memory trie. 299 */ 300 private static class UpdateDbTask extends AsyncTask<Void, Void, Void> { 301 private final HashSet<Bigram> mMap; 302 private final DatabaseHelper mDbHelper; 303 private final String mLocale; 304 UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites, String locale)305 public UpdateDbTask(DatabaseHelper openHelper, HashSet<Bigram> pendingWrites, 306 String locale) { 307 mMap = pendingWrites; 308 mLocale = locale; 309 mDbHelper = openHelper; 310 } 311 312 /** Prune any old data if the database is getting too big. */ checkPruneData(SQLiteDatabase db)313 private void checkPruneData(SQLiteDatabase db) { 314 db.execSQL("PRAGMA foreign_keys = ON;"); 315 Cursor c = db.query(FREQ_TABLE_NAME, new String[] { FREQ_COLUMN_PAIR_ID }, 316 null, null, null, null, null); 317 try { 318 int totalRowCount = c.getCount(); 319 // prune out old data if we have too much data 320 if (totalRowCount > sMaxUserBigrams) { 321 int numDeleteRows = (totalRowCount - sMaxUserBigrams) + sDeleteUserBigrams; 322 int pairIdColumnId = c.getColumnIndex(FREQ_COLUMN_PAIR_ID); 323 c.moveToFirst(); 324 int count = 0; 325 while (count < numDeleteRows && !c.isAfterLast()) { 326 String pairId = c.getString(pairIdColumnId); 327 // Deleting from MAIN table will delete the frequencies 328 // due to FOREIGN KEY .. ON DELETE CASCADE 329 db.delete(MAIN_TABLE_NAME, MAIN_COLUMN_ID + "=?", 330 new String[] { pairId }); 331 c.moveToNext(); 332 count++; 333 } 334 } 335 } finally { 336 c.close(); 337 } 338 } 339 340 @Override onPreExecute()341 protected void onPreExecute() { 342 sUpdatingDB = true; 343 } 344 345 @Override doInBackground(Void... v)346 protected Void doInBackground(Void... v) { 347 SQLiteDatabase db = mDbHelper.getWritableDatabase(); 348 db.execSQL("PRAGMA foreign_keys = ON;"); 349 // Write all the entries to the db 350 Iterator<Bigram> iterator = mMap.iterator(); 351 while (iterator.hasNext()) { 352 Bigram bi = iterator.next(); 353 354 // find pair id 355 Cursor c = db.query(MAIN_TABLE_NAME, new String[] { MAIN_COLUMN_ID }, 356 MAIN_COLUMN_WORD1 + "=? AND " + MAIN_COLUMN_WORD2 + "=? AND " 357 + MAIN_COLUMN_LOCALE + "=?", 358 new String[] { bi.mWord1, bi.mWord2, mLocale }, null, null, null); 359 360 int pairId; 361 if (c.moveToFirst()) { 362 // existing pair 363 pairId = c.getInt(c.getColumnIndex(MAIN_COLUMN_ID)); 364 db.delete(FREQ_TABLE_NAME, FREQ_COLUMN_PAIR_ID + "=?", 365 new String[] { Integer.toString(pairId) }); 366 } else { 367 // new pair 368 Long pairIdLong = db.insert(MAIN_TABLE_NAME, null, 369 getContentValues(bi.mWord1, bi.mWord2, mLocale)); 370 pairId = pairIdLong.intValue(); 371 } 372 c.close(); 373 374 // insert new frequency 375 db.insert(FREQ_TABLE_NAME, null, getFrequencyContentValues(pairId, bi.mFrequency)); 376 } 377 checkPruneData(db); 378 sUpdatingDB = false; 379 380 return null; 381 } 382 getContentValues(String word1, String word2, String locale)383 private ContentValues getContentValues(String word1, String word2, String locale) { 384 ContentValues values = new ContentValues(3); 385 values.put(MAIN_COLUMN_WORD1, word1); 386 values.put(MAIN_COLUMN_WORD2, word2); 387 values.put(MAIN_COLUMN_LOCALE, locale); 388 return values; 389 } 390 getFrequencyContentValues(int pairId, int frequency)391 private ContentValues getFrequencyContentValues(int pairId, int frequency) { 392 ContentValues values = new ContentValues(2); 393 values.put(FREQ_COLUMN_PAIR_ID, pairId); 394 values.put(FREQ_COLUMN_FREQUENCY, frequency); 395 return values; 396 } 397 } 398 399 } 400