1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.text.TextUtils; 21 22 import com.android.inputmethod.keyboard.Keyboard; 23 import com.android.inputmethod.keyboard.ProximityInfo; 24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; 25 26 import java.io.File; 27 import java.util.ArrayList; 28 import java.util.Comparator; 29 import java.util.HashSet; 30 import java.util.Locale; 31 import java.util.concurrent.ConcurrentHashMap; 32 33 /** 34 * This class loads a dictionary and provides a list of suggestions for a given sequence of 35 * characters. This includes corrections and completions. 36 */ 37 public final class Suggest { 38 public static final String TAG = Suggest.class.getSimpleName(); 39 40 // Session id for 41 // {@link #getSuggestedWords(WordComposer,CharSequence,ProximityInfo,boolean,int)}. 42 public static final int SESSION_TYPING = 0; 43 public static final int SESSION_GESTURE = 1; 44 45 // TODO: rename this to CORRECTION_OFF 46 public static final int CORRECTION_NONE = 0; 47 // TODO: rename this to CORRECTION_ON 48 public static final int CORRECTION_FULL = 1; 49 50 public interface SuggestInitializationListener { onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable)51 public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable); 52 } 53 54 private static final boolean DBG = LatinImeLogger.sDBG; 55 56 private Dictionary mMainDictionary; 57 private ContactsBinaryDictionary mContactsDict; 58 private final ConcurrentHashMap<String, Dictionary> mDictionaries = 59 CollectionUtils.newConcurrentHashMap(); 60 61 public static final int MAX_SUGGESTIONS = 18; 62 63 private float mAutoCorrectionThreshold; 64 65 // Locale used for upper- and title-casing words 66 private final Locale mLocale; 67 Suggest(final Context context, final Locale locale, final SuggestInitializationListener listener)68 public Suggest(final Context context, final Locale locale, 69 final SuggestInitializationListener listener) { 70 initAsynchronously(context, locale, listener); 71 mLocale = locale; 72 } 73 Suggest(final Context context, final File dictionary, final long startOffset, final long length, final Locale locale)74 /* package for test */ Suggest(final Context context, final File dictionary, 75 final long startOffset, final long length, final Locale locale) { 76 final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary, 77 startOffset, length /* useFullEditDistance */, false, locale); 78 mLocale = locale; 79 mMainDictionary = mainDict; 80 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict); 81 } 82 initAsynchronously(final Context context, final Locale locale, final SuggestInitializationListener listener)83 private void initAsynchronously(final Context context, final Locale locale, 84 final SuggestInitializationListener listener) { 85 resetMainDict(context, locale, listener); 86 } 87 addOrReplaceDictionary( final ConcurrentHashMap<String, Dictionary> dictionaries, final String key, final Dictionary dict)88 private static void addOrReplaceDictionary( 89 final ConcurrentHashMap<String, Dictionary> dictionaries, 90 final String key, final Dictionary dict) { 91 final Dictionary oldDict = (dict == null) 92 ? dictionaries.remove(key) 93 : dictionaries.put(key, dict); 94 if (oldDict != null && dict != oldDict) { 95 oldDict.close(); 96 } 97 } 98 resetMainDict(final Context context, final Locale locale, final SuggestInitializationListener listener)99 public void resetMainDict(final Context context, final Locale locale, 100 final SuggestInitializationListener listener) { 101 mMainDictionary = null; 102 if (listener != null) { 103 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 104 } 105 new Thread("InitializeBinaryDictionary") { 106 @Override 107 public void run() { 108 final DictionaryCollection newMainDict = 109 DictionaryFactory.createMainDictionaryFromManager(context, locale); 110 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict); 111 mMainDictionary = newMainDict; 112 if (listener != null) { 113 listener.onUpdateMainDictionaryAvailability(hasMainDictionary()); 114 } 115 } 116 }.start(); 117 } 118 119 // The main dictionary could have been loaded asynchronously. Don't cache the return value 120 // of this method. hasMainDictionary()121 public boolean hasMainDictionary() { 122 return null != mMainDictionary && mMainDictionary.isInitialized(); 123 } 124 getMainDictionary()125 public Dictionary getMainDictionary() { 126 return mMainDictionary; 127 } 128 getContactsDictionary()129 public ContactsBinaryDictionary getContactsDictionary() { 130 return mContactsDict; 131 } 132 getUnigramDictionaries()133 public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() { 134 return mDictionaries; 135 } 136 137 /** 138 * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted 139 * before the main dictionary, if set. This refers to the system-managed user dictionary. 140 */ setUserDictionary(UserBinaryDictionary userDictionary)141 public void setUserDictionary(UserBinaryDictionary userDictionary) { 142 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary); 143 } 144 145 /** 146 * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove 147 * the contacts dictionary by passing null to this method. In this case no contacts dictionary 148 * won't be used. 149 */ setContactsDictionary(ContactsBinaryDictionary contactsDictionary)150 public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) { 151 mContactsDict = contactsDictionary; 152 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary); 153 } 154 setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary)155 public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) { 156 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary); 157 } 158 setAutoCorrectionThreshold(float threshold)159 public void setAutoCorrectionThreshold(float threshold) { 160 mAutoCorrectionThreshold = threshold; 161 } 162 getSuggestedWords( final WordComposer wordComposer, CharSequence prevWordForBigram, final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId)163 public SuggestedWords getSuggestedWords( 164 final WordComposer wordComposer, CharSequence prevWordForBigram, 165 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId) { 166 LatinImeLogger.onStartSuggestion(prevWordForBigram); 167 if (wordComposer.isBatchMode()) { 168 return getSuggestedWordsForBatchInput( 169 wordComposer, prevWordForBigram, proximityInfo, sessionId); 170 } else { 171 return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo, 172 isCorrectionEnabled); 173 } 174 } 175 176 // Retrieves suggestions for the typing input. getSuggestedWordsForTypingInput( final WordComposer wordComposer, CharSequence prevWordForBigram, final ProximityInfo proximityInfo, final boolean isCorrectionEnabled)177 private SuggestedWords getSuggestedWordsForTypingInput( 178 final WordComposer wordComposer, CharSequence prevWordForBigram, 179 final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) { 180 final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount(); 181 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 182 MAX_SUGGESTIONS); 183 184 final String typedWord = wordComposer.getTypedWord(); 185 final String consideredWord = trailingSingleQuotesCount > 0 186 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount) 187 : typedWord; 188 LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED); 189 190 final WordComposer wordComposerForLookup; 191 if (trailingSingleQuotesCount > 0) { 192 wordComposerForLookup = new WordComposer(wordComposer); 193 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 194 wordComposerForLookup.deleteLast(); 195 } 196 } else { 197 wordComposerForLookup = wordComposer; 198 } 199 200 for (final String key : mDictionaries.keySet()) { 201 final Dictionary dictionary = mDictionaries.get(key); 202 suggestionsSet.addAll(dictionary.getSuggestions( 203 wordComposerForLookup, prevWordForBigram, proximityInfo)); 204 } 205 206 final CharSequence whitelistedWord; 207 if (suggestionsSet.isEmpty()) { 208 whitelistedWord = null; 209 } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) { 210 whitelistedWord = null; 211 } else { 212 whitelistedWord = suggestionsSet.first().mWord; 213 } 214 215 // The word can be auto-corrected if it has a whitelist entry that is not itself, 216 // or if it's a 2+ characters non-word (i.e. it's not in the dictionary). 217 final boolean allowsToBeAutoCorrected = (null != whitelistedWord 218 && !whitelistedWord.equals(consideredWord)) 219 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries, 220 consideredWord, wordComposer.isFirstCharCapitalized())); 221 222 final boolean hasAutoCorrection; 223 // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because 224 // any attempt to do auto-correction is already shielded with a test for this flag; at the 225 // same time, it feels wrong that the SuggestedWord object includes information about 226 // the current settings. It may also be useful to know, when the setting is off, whether 227 // the word *would* have been auto-corrected. 228 if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord() 229 || suggestionsSet.isEmpty() || wordComposer.hasDigits() 230 || wordComposer.isMostlyCaps() || wordComposer.isResumed() 231 || !hasMainDictionary()) { 232 // If we don't have a main dictionary, we never want to auto-correct. The reason for 233 // this is, the user may have a contact whose name happens to match a valid word in 234 // their language, and it will unexpectedly auto-correct. For example, if the user 235 // types in English with no dictionary and has a "Will" in their contact list, "will" 236 // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no 237 // auto-correct. 238 hasAutoCorrection = false; 239 } else { 240 hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold( 241 suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold); 242 } 243 244 final ArrayList<SuggestedWordInfo> suggestionsContainer = 245 CollectionUtils.newArrayList(suggestionsSet); 246 final int suggestionsCount = suggestionsContainer.size(); 247 final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized(); 248 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 249 if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) { 250 for (int i = 0; i < suggestionsCount; ++i) { 251 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 252 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 253 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 254 trailingSingleQuotesCount); 255 suggestionsContainer.set(i, transformedWordInfo); 256 } 257 } 258 259 for (int i = 0; i < suggestionsCount; ++i) { 260 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 261 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 262 } 263 264 if (!TextUtils.isEmpty(typedWord)) { 265 suggestionsContainer.add(0, new SuggestedWordInfo(typedWord, 266 SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED, 267 Dictionary.TYPE_USER_TYPED)); 268 } 269 SuggestedWordInfo.removeDups(suggestionsContainer); 270 271 final ArrayList<SuggestedWordInfo> suggestionsList; 272 if (DBG && !suggestionsContainer.isEmpty()) { 273 suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer); 274 } else { 275 suggestionsList = suggestionsContainer; 276 } 277 278 return new SuggestedWords(suggestionsList, 279 // TODO: this first argument is lying. If this is a whitelisted word which is an 280 // actual word, it says typedWordValid = false, which looks wrong. We should either 281 // rename the attribute or change the value. 282 !allowsToBeAutoCorrected /* typedWordValid */, 283 hasAutoCorrection, /* willAutoCorrect */ 284 false /* isPunctuationSuggestions */, 285 false /* isObsoleteSuggestions */, 286 !wordComposer.isComposingWord() /* isPrediction */); 287 } 288 289 // Retrieves suggestions for the batch input. getSuggestedWordsForBatchInput( final WordComposer wordComposer, CharSequence prevWordForBigram, final ProximityInfo proximityInfo, int sessionId)290 private SuggestedWords getSuggestedWordsForBatchInput( 291 final WordComposer wordComposer, CharSequence prevWordForBigram, 292 final ProximityInfo proximityInfo, int sessionId) { 293 final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator, 294 MAX_SUGGESTIONS); 295 296 // At second character typed, search the unigrams (scores being affected by bigrams) 297 for (final String key : mDictionaries.keySet()) { 298 // Skip User history dictionary for lookup 299 // TODO: The user history dictionary should just override getSuggestionsWithSessionId 300 // to make sure it doesn't return anything and we should remove this test 301 if (key.equals(Dictionary.TYPE_USER_HISTORY)) { 302 continue; 303 } 304 final Dictionary dictionary = mDictionaries.get(key); 305 suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId( 306 wordComposer, prevWordForBigram, proximityInfo, sessionId)); 307 } 308 309 for (SuggestedWordInfo wordInfo : suggestionsSet) { 310 LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict); 311 } 312 313 final ArrayList<SuggestedWordInfo> suggestionsContainer = 314 CollectionUtils.newArrayList(suggestionsSet); 315 final int suggestionsCount = suggestionsContainer.size(); 316 final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); 317 final boolean isAllUpperCase = wordComposer.isAllUpperCase(); 318 if (isFirstCharCapitalized || isAllUpperCase) { 319 for (int i = 0; i < suggestionsCount; ++i) { 320 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); 321 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( 322 wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized, 323 0 /* trailingSingleQuotesCount */); 324 suggestionsContainer.set(i, transformedWordInfo); 325 } 326 } 327 328 SuggestedWordInfo.removeDups(suggestionsContainer); 329 // In the batch input mode, the most relevant suggested word should act as a "typed word" 330 // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). 331 return new SuggestedWords(suggestionsContainer, 332 true /* typedWordValid */, 333 false /* willAutoCorrect */, 334 false /* isPunctuationSuggestions */, 335 false /* isObsoleteSuggestions */, 336 false /* isPrediction */); 337 } 338 getSuggestionsInfoListWithDebugInfo( final String typedWord, final ArrayList<SuggestedWordInfo> suggestions)339 private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo( 340 final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) { 341 final SuggestedWordInfo typedWordInfo = suggestions.get(0); 342 typedWordInfo.setDebugString("+"); 343 final int suggestionsSize = suggestions.size(); 344 final ArrayList<SuggestedWordInfo> suggestionsList = 345 CollectionUtils.newArrayList(suggestionsSize); 346 suggestionsList.add(typedWordInfo); 347 // Note: i here is the index in mScores[], but the index in mSuggestions is one more 348 // than i because we added the typed word to mSuggestions without touching mScores. 349 for (int i = 0; i < suggestionsSize - 1; ++i) { 350 final SuggestedWordInfo cur = suggestions.get(i + 1); 351 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 352 typedWord, cur.toString(), cur.mScore); 353 final String scoreInfoString; 354 if (normalizedScore > 0) { 355 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore); 356 } else { 357 scoreInfoString = Integer.toString(cur.mScore); 358 } 359 cur.setDebugString(scoreInfoString); 360 suggestionsList.add(cur); 361 } 362 return suggestionsList; 363 } 364 365 private static final class SuggestedWordInfoComparator 366 implements Comparator<SuggestedWordInfo> { 367 // This comparator ranks the word info with the higher frequency first. That's because 368 // that's the order we want our elements in. 369 @Override compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2)370 public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) { 371 if (o1.mScore > o2.mScore) return -1; 372 if (o1.mScore < o2.mScore) return 1; 373 if (o1.mCodePointCount < o2.mCodePointCount) return -1; 374 if (o1.mCodePointCount > o2.mCodePointCount) return 1; 375 return o1.mWord.toString().compareTo(o2.mWord.toString()); 376 } 377 } 378 private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator = 379 new SuggestedWordInfoComparator(); 380 getTransformedSuggestedWordInfo( final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount)381 private static SuggestedWordInfo getTransformedSuggestedWordInfo( 382 final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, 383 final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) { 384 final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); 385 if (isAllUpperCase) { 386 sb.append(wordInfo.mWord.toString().toUpperCase(locale)); 387 } else if (isFirstCharCapitalized) { 388 sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale)); 389 } else { 390 sb.append(wordInfo.mWord); 391 } 392 for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) { 393 sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE); 394 } 395 return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict); 396 } 397 close()398 public void close() { 399 final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet(); 400 dictionaries.addAll(mDictionaries.values()); 401 for (final Dictionary dictionary : dictionaries) { 402 dictionary.close(); 403 } 404 mMainDictionary = null; 405 } 406 } 407