1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.Intent; 20 import android.content.SharedPreferences; 21 import android.preference.PreferenceManager; 22 import android.service.textservice.SpellCheckerService; 23 import android.text.TextUtils; 24 import android.util.Log; 25 import android.util.LruCache; 26 import android.view.textservice.SentenceSuggestionsInfo; 27 import android.view.textservice.SuggestionsInfo; 28 import android.view.textservice.TextInfo; 29 30 import com.android.inputmethod.compat.SuggestionsInfoCompatUtils; 31 import com.android.inputmethod.keyboard.ProximityInfo; 32 import com.android.inputmethod.latin.BinaryDictionary; 33 import com.android.inputmethod.latin.Dictionary; 34 import com.android.inputmethod.latin.Dictionary.WordCallback; 35 import com.android.inputmethod.latin.DictionaryCollection; 36 import com.android.inputmethod.latin.DictionaryFactory; 37 import com.android.inputmethod.latin.LatinIME; 38 import com.android.inputmethod.latin.LocaleUtils; 39 import com.android.inputmethod.latin.R; 40 import com.android.inputmethod.latin.StringUtils; 41 import com.android.inputmethod.latin.SynchronouslyLoadedContactsBinaryDictionary; 42 import com.android.inputmethod.latin.SynchronouslyLoadedContactsDictionary; 43 import com.android.inputmethod.latin.SynchronouslyLoadedUserBinaryDictionary; 44 import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 45 import com.android.inputmethod.latin.WhitelistDictionary; 46 import com.android.inputmethod.latin.WordComposer; 47 48 import java.lang.ref.WeakReference; 49 import java.util.ArrayList; 50 import java.util.Arrays; 51 import java.util.Collections; 52 import java.util.HashSet; 53 import java.util.Iterator; 54 import java.util.Locale; 55 import java.util.Map; 56 import java.util.TreeMap; 57 58 /** 59 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 60 */ 61 public class AndroidSpellCheckerService extends SpellCheckerService 62 implements SharedPreferences.OnSharedPreferenceChangeListener { 63 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 64 private static final boolean DBG = false; 65 private static final int POOL_SIZE = 2; 66 67 public static final String PREF_USE_CONTACTS_KEY = "pref_spellcheck_use_contacts"; 68 69 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 70 private static final int CAPITALIZE_FIRST = 1; // First only 71 private static final int CAPITALIZE_ALL = 2; // All caps 72 73 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 74 private Map<String, DictionaryPool> mDictionaryPools = 75 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 76 private Map<String, Dictionary> mUserDictionaries = 77 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 78 private Map<String, Dictionary> mWhitelistDictionaries = 79 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 80 private Dictionary mContactsDictionary; 81 82 // The threshold for a candidate to be offered as a suggestion. 83 private float mSuggestionThreshold; 84 // The threshold for a suggestion to be considered "recommended". 85 private float mRecommendedThreshold; 86 // Whether to use the contacts dictionary 87 private boolean mUseContactsDictionary; 88 private final Object mUseContactsLock = new Object(); 89 90 private final HashSet<WeakReference<DictionaryCollection>> mDictionaryCollectionsList = 91 new HashSet<WeakReference<DictionaryCollection>>(); 92 93 public static final int SCRIPT_LATIN = 0; 94 public static final int SCRIPT_CYRILLIC = 1; 95 private static final String SINGLE_QUOTE = "\u0027"; 96 private static final String APOSTROPHE = "\u2019"; 97 private static final TreeMap<String, Integer> mLanguageToScript; 98 static { 99 // List of the supported languages and their associated script. We won't check 100 // words written in another script than the selected script, because we know we 101 // don't have those in our dictionary so we will underline everything and we 102 // will never have any suggestions, so it makes no sense checking them, and this 103 // is done in {@link #shouldFilterOut}. Also, the script is used to choose which 104 // proximity to pass to the dictionary descent algorithm. 105 // IMPORTANT: this only contains languages - do not write countries in there. 106 // Only the language is searched from the map. 107 mLanguageToScript = new TreeMap<String, Integer>(); 108 mLanguageToScript.put("en", SCRIPT_LATIN); 109 mLanguageToScript.put("fr", SCRIPT_LATIN); 110 mLanguageToScript.put("de", SCRIPT_LATIN); 111 mLanguageToScript.put("nl", SCRIPT_LATIN); 112 mLanguageToScript.put("cs", SCRIPT_LATIN); 113 mLanguageToScript.put("es", SCRIPT_LATIN); 114 mLanguageToScript.put("it", SCRIPT_LATIN); 115 mLanguageToScript.put("hr", SCRIPT_LATIN); 116 mLanguageToScript.put("pt", SCRIPT_LATIN); 117 mLanguageToScript.put("ru", SCRIPT_CYRILLIC); 118 // TODO: Make a persian proximity, and activate the Farsi subtype. 119 // mLanguageToScript.put("fa", SCRIPT_PERSIAN); 120 } 121 onCreate()122 @Override public void onCreate() { 123 super.onCreate(); 124 mSuggestionThreshold = 125 Float.parseFloat(getString(R.string.spellchecker_suggestion_threshold_value)); 126 mRecommendedThreshold = 127 Float.parseFloat(getString(R.string.spellchecker_recommended_threshold_value)); 128 final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this); 129 prefs.registerOnSharedPreferenceChangeListener(this); 130 onSharedPreferenceChanged(prefs, PREF_USE_CONTACTS_KEY); 131 } 132 getScriptFromLocale(final Locale locale)133 private static int getScriptFromLocale(final Locale locale) { 134 final Integer script = mLanguageToScript.get(locale.getLanguage()); 135 if (null == script) { 136 throw new RuntimeException("We have been called with an unsupported language: \"" 137 + locale.getLanguage() + "\". Framework bug?"); 138 } 139 return script; 140 } 141 142 @Override onSharedPreferenceChanged(final SharedPreferences prefs, final String key)143 public void onSharedPreferenceChanged(final SharedPreferences prefs, final String key) { 144 if (!PREF_USE_CONTACTS_KEY.equals(key)) return; 145 synchronized(mUseContactsLock) { 146 mUseContactsDictionary = prefs.getBoolean(PREF_USE_CONTACTS_KEY, true); 147 if (mUseContactsDictionary) { 148 startUsingContactsDictionaryLocked(); 149 } else { 150 stopUsingContactsDictionaryLocked(); 151 } 152 } 153 } 154 startUsingContactsDictionaryLocked()155 private void startUsingContactsDictionaryLocked() { 156 if (null == mContactsDictionary) { 157 if (LatinIME.USE_BINARY_CONTACTS_DICTIONARY) { 158 // TODO: use the right locale for each session 159 mContactsDictionary = 160 new SynchronouslyLoadedContactsBinaryDictionary(this, Locale.getDefault()); 161 } else { 162 mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); 163 } 164 } 165 final Iterator<WeakReference<DictionaryCollection>> iterator = 166 mDictionaryCollectionsList.iterator(); 167 while (iterator.hasNext()) { 168 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 169 final DictionaryCollection dict = dictRef.get(); 170 if (null == dict) { 171 iterator.remove(); 172 } else { 173 dict.addDictionary(mContactsDictionary); 174 } 175 } 176 } 177 stopUsingContactsDictionaryLocked()178 private void stopUsingContactsDictionaryLocked() { 179 if (null == mContactsDictionary) return; 180 final Dictionary contactsDict = mContactsDictionary; 181 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no longer needed 182 mContactsDictionary = null; 183 final Iterator<WeakReference<DictionaryCollection>> iterator = 184 mDictionaryCollectionsList.iterator(); 185 while (iterator.hasNext()) { 186 final WeakReference<DictionaryCollection> dictRef = iterator.next(); 187 final DictionaryCollection dict = dictRef.get(); 188 if (null == dict) { 189 iterator.remove(); 190 } else { 191 dict.removeDictionary(contactsDict); 192 } 193 } 194 contactsDict.close(); 195 } 196 197 @Override createSession()198 public Session createSession() { 199 return new AndroidSpellCheckerSession(this); 200 } 201 getNotInDictEmptySuggestions()202 private static SuggestionsInfo getNotInDictEmptySuggestions() { 203 return new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 204 } 205 getInDictEmptySuggestions()206 private static SuggestionsInfo getInDictEmptySuggestions() { 207 return new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 208 EMPTY_STRING_ARRAY); 209 } 210 211 private static class SuggestionsGatherer implements WordCallback { 212 public static class Result { 213 public final String[] mSuggestions; 214 public final boolean mHasRecommendedSuggestions; Result(final String[] gatheredSuggestions, final boolean hasRecommendedSuggestions)215 public Result(final String[] gatheredSuggestions, 216 final boolean hasRecommendedSuggestions) { 217 mSuggestions = gatheredSuggestions; 218 mHasRecommendedSuggestions = hasRecommendedSuggestions; 219 } 220 } 221 222 private final ArrayList<CharSequence> mSuggestions; 223 private final int[] mScores; 224 private final String mOriginalText; 225 private final float mSuggestionThreshold; 226 private final float mRecommendedThreshold; 227 private final int mMaxLength; 228 private int mLength = 0; 229 230 // The two following attributes are only ever filled if the requested max length 231 // is 0 (or less, which is treated the same). 232 private String mBestSuggestion = null; 233 private int mBestScore = Integer.MIN_VALUE; // As small as possible 234 SuggestionsGatherer(final String originalText, final float suggestionThreshold, final float recommendedThreshold, final int maxLength)235 SuggestionsGatherer(final String originalText, final float suggestionThreshold, 236 final float recommendedThreshold, final int maxLength) { 237 mOriginalText = originalText; 238 mSuggestionThreshold = suggestionThreshold; 239 mRecommendedThreshold = recommendedThreshold; 240 mMaxLength = maxLength; 241 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 242 mScores = new int[mMaxLength]; 243 } 244 245 @Override addWord(char[] word, int wordOffset, int wordLength, int score, int dicTypeId, int dataType)246 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 247 int dicTypeId, int dataType) { 248 final int positionIndex = Arrays.binarySearch(mScores, 0, mLength, score); 249 // binarySearch returns the index if the element exists, and -<insertion index> - 1 250 // if it doesn't. See documentation for binarySearch. 251 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 252 253 if (insertIndex == 0 && mLength >= mMaxLength) { 254 // In the future, we may want to keep track of the best suggestion score even if 255 // we are asked for 0 suggestions. In this case, we can use the following 256 // (tested) code to keep it: 257 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 258 // then we need to keep track of the best suggestion in mBestScore and 259 // mBestSuggestion. This is so that we know whether the best suggestion makes 260 // the score cutoff, since we need to know that to return a meaningful 261 // looksLikeTypo. 262 // if (0 >= mMaxLength) { 263 // if (score > mBestScore) { 264 // mBestScore = score; 265 // mBestSuggestion = new String(word, wordOffset, wordLength); 266 // } 267 // } 268 return true; 269 } 270 if (insertIndex >= mMaxLength) { 271 // We found a suggestion, but its score is too weak to be kept considering 272 // the suggestion limit. 273 return true; 274 } 275 276 // Compute the normalized score and skip this word if it's normalized score does not 277 // make the threshold. 278 final String wordString = new String(word, wordOffset, wordLength); 279 final float normalizedScore = 280 BinaryDictionary.calcNormalizedScore(mOriginalText, wordString, score); 281 if (normalizedScore < mSuggestionThreshold) { 282 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 283 return true; 284 } 285 286 if (mLength < mMaxLength) { 287 final int copyLen = mLength - insertIndex; 288 ++mLength; 289 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 290 mSuggestions.add(insertIndex, wordString); 291 } else { 292 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 293 mSuggestions.add(insertIndex, wordString); 294 mSuggestions.remove(0); 295 } 296 mScores[insertIndex] = score; 297 298 return true; 299 } 300 getResults(final int capitalizeType, final Locale locale)301 public Result getResults(final int capitalizeType, final Locale locale) { 302 final String[] gatheredSuggestions; 303 final boolean hasRecommendedSuggestions; 304 if (0 == mLength) { 305 // Either we found no suggestions, or we found some BUT the max length was 0. 306 // If we found some mBestSuggestion will not be null. If it is null, then 307 // we found none, regardless of the max length. 308 if (null == mBestSuggestion) { 309 gatheredSuggestions = null; 310 hasRecommendedSuggestions = false; 311 } else { 312 gatheredSuggestions = EMPTY_STRING_ARRAY; 313 final float normalizedScore = BinaryDictionary.calcNormalizedScore( 314 mOriginalText, mBestSuggestion, mBestScore); 315 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 316 } 317 } else { 318 if (DBG) { 319 if (mLength != mSuggestions.size()) { 320 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 321 } 322 for (int i = mLength - 1; i >= 0; --i) { 323 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 324 } 325 } 326 Collections.reverse(mSuggestions); 327 StringUtils.removeDupes(mSuggestions); 328 if (CAPITALIZE_ALL == capitalizeType) { 329 for (int i = 0; i < mSuggestions.size(); ++i) { 330 // get(i) returns a CharSequence which is actually a String so .toString() 331 // should return the same object. 332 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 333 } 334 } else if (CAPITALIZE_FIRST == capitalizeType) { 335 for (int i = 0; i < mSuggestions.size(); ++i) { 336 // Likewise 337 mSuggestions.set(i, StringUtils.toTitleCase( 338 mSuggestions.get(i).toString(), locale)); 339 } 340 } 341 // This returns a String[], while toArray() returns an Object[] which cannot be cast 342 // into a String[]. 343 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 344 345 final int bestScore = mScores[mLength - 1]; 346 final CharSequence bestSuggestion = mSuggestions.get(0); 347 final float normalizedScore = 348 BinaryDictionary.calcNormalizedScore( 349 mOriginalText, bestSuggestion.toString(), bestScore); 350 hasRecommendedSuggestions = (normalizedScore > mRecommendedThreshold); 351 if (DBG) { 352 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 353 Log.i(TAG, "Normalized score = " + normalizedScore 354 + " (threshold " + mRecommendedThreshold 355 + ") => hasRecommendedSuggestions = " + hasRecommendedSuggestions); 356 } 357 } 358 return new Result(gatheredSuggestions, hasRecommendedSuggestions); 359 } 360 } 361 362 @Override onUnbind(final Intent intent)363 public boolean onUnbind(final Intent intent) { 364 closeAllDictionaries(); 365 return false; 366 } 367 closeAllDictionaries()368 private void closeAllDictionaries() { 369 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 370 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 371 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 372 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 373 final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries; 374 mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 375 new Thread("spellchecker_close_dicts") { 376 @Override 377 public void run() { 378 for (DictionaryPool pool : oldPools.values()) { 379 pool.close(); 380 } 381 for (Dictionary dict : oldUserDictionaries.values()) { 382 dict.close(); 383 } 384 for (Dictionary dict : oldWhitelistDictionaries.values()) { 385 dict.close(); 386 } 387 synchronized (mUseContactsLock) { 388 if (null != mContactsDictionary) { 389 // The synchronously loaded contacts dictionary should have been in one 390 // or several pools, but it is shielded against multiple closing and it's 391 // safe to call it several times. 392 final Dictionary dictToClose = mContactsDictionary; 393 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY 394 // is no longer needed 395 mContactsDictionary = null; 396 dictToClose.close(); 397 } 398 } 399 } 400 }.start(); 401 } 402 getDictionaryPool(final String locale)403 private DictionaryPool getDictionaryPool(final String locale) { 404 DictionaryPool pool = mDictionaryPools.get(locale); 405 if (null == pool) { 406 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 407 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 408 mDictionaryPools.put(locale, pool); 409 } 410 return pool; 411 } 412 createDictAndProximity(final Locale locale)413 public DictAndProximity createDictAndProximity(final Locale locale) { 414 final int script = getScriptFromLocale(locale); 415 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo( 416 SpellCheckerProximityInfo.getProximityForScript(script), 417 SpellCheckerProximityInfo.ROW_SIZE, 418 SpellCheckerProximityInfo.PROXIMITY_GRID_WIDTH, 419 SpellCheckerProximityInfo.PROXIMITY_GRID_HEIGHT); 420 final DictionaryCollection dictionaryCollection = 421 DictionaryFactory.createMainDictionaryFromManager(this, locale, 422 true /* useFullEditDistance */); 423 final String localeStr = locale.toString(); 424 Dictionary userDictionary = mUserDictionaries.get(localeStr); 425 if (null == userDictionary) { 426 if (LatinIME.USE_BINARY_USER_DICTIONARY) { 427 userDictionary = new SynchronouslyLoadedUserBinaryDictionary(this, localeStr, true); 428 } else { 429 userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true); 430 } 431 mUserDictionaries.put(localeStr, userDictionary); 432 } 433 dictionaryCollection.addDictionary(userDictionary); 434 Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr); 435 if (null == whitelistDictionary) { 436 whitelistDictionary = new WhitelistDictionary(this, locale); 437 mWhitelistDictionaries.put(localeStr, whitelistDictionary); 438 } 439 dictionaryCollection.addDictionary(whitelistDictionary); 440 synchronized (mUseContactsLock) { 441 if (mUseContactsDictionary) { 442 if (null == mContactsDictionary) { 443 // TODO: revert to the concrete type when USE_BINARY_CONTACTS_DICTIONARY is no 444 // longer needed 445 if (LatinIME.USE_BINARY_CONTACTS_DICTIONARY) { 446 // TODO: use the right locale. We can't do it right now because the 447 // spell checker is reusing the contacts dictionary across sessions 448 // without regard for their locale, so we need to fix that first. 449 mContactsDictionary = new SynchronouslyLoadedContactsBinaryDictionary(this, 450 Locale.getDefault()); 451 } else { 452 mContactsDictionary = new SynchronouslyLoadedContactsDictionary(this); 453 } 454 } 455 } 456 dictionaryCollection.addDictionary(mContactsDictionary); 457 mDictionaryCollectionsList.add( 458 new WeakReference<DictionaryCollection>(dictionaryCollection)); 459 } 460 return new DictAndProximity(dictionaryCollection, proximityInfo); 461 } 462 463 // This method assumes the text is not empty or null. getCapitalizationType(String text)464 private static int getCapitalizationType(String text) { 465 // If the first char is not uppercase, then the word is either all lower case, 466 // and in either case we return CAPITALIZE_NONE. 467 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 468 final int len = text.length(); 469 int capsCount = 1; 470 for (int i = 1; i < len; i = text.offsetByCodePoints(i, 1)) { 471 if (1 != capsCount && i != capsCount) break; 472 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 473 } 474 // We know the first char is upper case. So we want to test if either everything 475 // else is lower case, or if everything else is upper case. If the string is 476 // exactly one char long, then we will arrive here with capsCount 1, and this is 477 // correct, too. 478 if (1 == capsCount) return CAPITALIZE_FIRST; 479 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 480 } 481 482 private static class AndroidSpellCheckerSession extends Session { 483 // Immutable, but need the locale which is not available in the constructor yet 484 private DictionaryPool mDictionaryPool; 485 // Likewise 486 private Locale mLocale; 487 // Cache this for performance 488 private int mScript; // One of SCRIPT_LATIN or SCRIPT_CYRILLIC for now. 489 490 private final AndroidSpellCheckerService mService; 491 492 private final SuggestionsCache mSuggestionsCache = new SuggestionsCache(); 493 494 private static class SuggestionsParams { 495 public final String[] mSuggestions; 496 public final int mFlags; SuggestionsParams(String[] suggestions, int flags)497 public SuggestionsParams(String[] suggestions, int flags) { 498 mSuggestions = suggestions; 499 mFlags = flags; 500 } 501 } 502 503 private static class SuggestionsCache { 504 private static final int MAX_CACHE_SIZE = 50; 505 // TODO: support bigram 506 private final LruCache<String, SuggestionsParams> mUnigramSuggestionsInfoCache = 507 new LruCache<String, SuggestionsParams>(MAX_CACHE_SIZE); 508 getSuggestionsFromCache(String query)509 public SuggestionsParams getSuggestionsFromCache(String query) { 510 return mUnigramSuggestionsInfoCache.get(query); 511 } 512 putSuggestionsToCache(String query, String[] suggestions, int flags)513 public void putSuggestionsToCache(String query, String[] suggestions, int flags) { 514 if (suggestions == null || TextUtils.isEmpty(query)) { 515 return; 516 } 517 mUnigramSuggestionsInfoCache.put(query, new SuggestionsParams(suggestions, flags)); 518 } 519 } 520 AndroidSpellCheckerSession(final AndroidSpellCheckerService service)521 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 522 mService = service; 523 } 524 525 @Override onCreate()526 public void onCreate() { 527 final String localeString = getLocale(); 528 mDictionaryPool = mService.getDictionaryPool(localeString); 529 mLocale = LocaleUtils.constructLocaleFromString(localeString); 530 mScript = getScriptFromLocale(mLocale); 531 } 532 533 /* 534 * Returns whether the code point is a letter that makes sense for the specified 535 * locale for this spell checker. 536 * The dictionaries supported by Latin IME are described in res/xml/spellchecker.xml 537 * and is limited to EFIGS languages and Russian. 538 * Hence at the moment this explicitly tests for Cyrillic characters or Latin characters 539 * as appropriate, and explicitly excludes CJK, Arabic and Hebrew characters. 540 */ isLetterCheckableByLanguage(final int codePoint, final int script)541 private static boolean isLetterCheckableByLanguage(final int codePoint, 542 final int script) { 543 switch (script) { 544 case SCRIPT_LATIN: 545 // Our supported latin script dictionaries (EFIGS) at the moment only include 546 // characters in the C0, C1, Latin Extended A and B, IPA extensions unicode 547 // blocks. As it happens, those are back-to-back in the code range 0x40 to 0x2AF, 548 // so the below is a very efficient way to test for it. As for the 0-0x3F, it's 549 // excluded from isLetter anyway. 550 return codePoint <= 0x2AF && Character.isLetter(codePoint); 551 case SCRIPT_CYRILLIC: 552 // All Cyrillic characters are in the 400~52F block. There are some in the upper 553 // Unicode range, but they are archaic characters that are not used in modern 554 // russian and are not used by our dictionary. 555 return codePoint >= 0x400 && codePoint <= 0x52F && Character.isLetter(codePoint); 556 default: 557 // Should never come here 558 throw new RuntimeException("Impossible value of script: " + script); 559 } 560 } 561 562 /** 563 * Finds out whether a particular string should be filtered out of spell checking. 564 * 565 * This will loosely match URLs, numbers, symbols. To avoid always underlining words that 566 * we know we will never recognize, this accepts a script identifier that should be one 567 * of the SCRIPT_* constants defined above, to rule out quickly characters from very 568 * different languages. 569 * 570 * @param text the string to evaluate. 571 * @param script the identifier for the script this spell checker recognizes 572 * @return true if we should filter this text out, false otherwise 573 */ shouldFilterOut(final String text, final int script)574 private static boolean shouldFilterOut(final String text, final int script) { 575 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 576 577 // TODO: check if an equivalent processing can't be done more quickly with a 578 // compiled regexp. 579 // Filter by first letter 580 final int firstCodePoint = text.codePointAt(0); 581 // Filter out words that don't start with a letter or an apostrophe 582 if (!isLetterCheckableByLanguage(firstCodePoint, script) 583 && '\'' != firstCodePoint) return true; 584 585 // Filter contents 586 final int length = text.length(); 587 int letterCount = 0; 588 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 589 final int codePoint = text.codePointAt(i); 590 // Any word containing a '@' is probably an e-mail address 591 // Any word containing a '/' is probably either an ad-hoc combination of two 592 // words or a URI - in either case we don't want to spell check that 593 if ('@' == codePoint || '/' == codePoint) return true; 594 if (isLetterCheckableByLanguage(codePoint, script)) ++letterCount; 595 } 596 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 597 // in this word are letters 598 return (letterCount * 4 < length * 3); 599 } 600 fixWronglyInvalidatedWordWithSingleQuote( TextInfo ti, SentenceSuggestionsInfo ssi)601 private SentenceSuggestionsInfo fixWronglyInvalidatedWordWithSingleQuote( 602 TextInfo ti, SentenceSuggestionsInfo ssi) { 603 final String typedText = ti.getText(); 604 if (!typedText.contains(SINGLE_QUOTE)) { 605 return null; 606 } 607 final int N = ssi.getSuggestionsCount(); 608 final ArrayList<Integer> additionalOffsets = new ArrayList<Integer>(); 609 final ArrayList<Integer> additionalLengths = new ArrayList<Integer>(); 610 final ArrayList<SuggestionsInfo> additionalSuggestionsInfos = 611 new ArrayList<SuggestionsInfo>(); 612 for (int i = 0; i < N; ++i) { 613 final SuggestionsInfo si = ssi.getSuggestionsInfoAt(i); 614 final int flags = si.getSuggestionsAttributes(); 615 if ((flags & SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY) == 0) { 616 continue; 617 } 618 final int offset = ssi.getOffsetAt(i); 619 final int length = ssi.getLengthAt(i); 620 final String subText = typedText.substring(offset, offset + length); 621 if (!subText.contains(SINGLE_QUOTE)) { 622 continue; 623 } 624 final String[] splitTexts = subText.split(SINGLE_QUOTE, -1); 625 if (splitTexts == null || splitTexts.length <= 1) { 626 continue; 627 } 628 final int splitNum = splitTexts.length; 629 for (int j = 0; j < splitNum; ++j) { 630 final String splitText = splitTexts[j]; 631 if (TextUtils.isEmpty(splitText)) { 632 continue; 633 } 634 if (mSuggestionsCache.getSuggestionsFromCache(splitText) == null) { 635 continue; 636 } 637 final int newLength = splitText.length(); 638 // Neither RESULT_ATTR_IN_THE_DICTIONARY nor RESULT_ATTR_LOOKS_LIKE_TYPO 639 final int newFlags = 0; 640 final SuggestionsInfo newSi = new SuggestionsInfo(newFlags, EMPTY_STRING_ARRAY); 641 newSi.setCookieAndSequence(si.getCookie(), si.getSequence()); 642 if (DBG) { 643 Log.d(TAG, "Override and remove old span over: " 644 + splitText + ", " + offset + "," + newLength); 645 } 646 additionalOffsets.add(offset); 647 additionalLengths.add(newLength); 648 additionalSuggestionsInfos.add(newSi); 649 } 650 } 651 final int additionalSize = additionalOffsets.size(); 652 if (additionalSize <= 0) { 653 return null; 654 } 655 final int suggestionsSize = N + additionalSize; 656 final int[] newOffsets = new int[suggestionsSize]; 657 final int[] newLengths = new int[suggestionsSize]; 658 final SuggestionsInfo[] newSuggestionsInfos = new SuggestionsInfo[suggestionsSize]; 659 int i; 660 for (i = 0; i < N; ++i) { 661 newOffsets[i] = ssi.getOffsetAt(i); 662 newLengths[i] = ssi.getLengthAt(i); 663 newSuggestionsInfos[i] = ssi.getSuggestionsInfoAt(i); 664 } 665 for (; i < suggestionsSize; ++i) { 666 newOffsets[i] = additionalOffsets.get(i - N); 667 newLengths[i] = additionalLengths.get(i - N); 668 newSuggestionsInfos[i] = additionalSuggestionsInfos.get(i - N); 669 } 670 return new SentenceSuggestionsInfo(newSuggestionsInfos, newOffsets, newLengths); 671 } 672 673 @Override onGetSentenceSuggestionsMultiple( TextInfo[] textInfos, int suggestionsLimit)674 public SentenceSuggestionsInfo[] onGetSentenceSuggestionsMultiple( 675 TextInfo[] textInfos, int suggestionsLimit) { 676 final SentenceSuggestionsInfo[] retval = super.onGetSentenceSuggestionsMultiple( 677 textInfos, suggestionsLimit); 678 if (retval == null || retval.length != textInfos.length) { 679 return retval; 680 } 681 for (int i = 0; i < retval.length; ++i) { 682 final SentenceSuggestionsInfo tempSsi = 683 fixWronglyInvalidatedWordWithSingleQuote(textInfos[i], retval[i]); 684 if (tempSsi != null) { 685 retval[i] = tempSsi; 686 } 687 } 688 return retval; 689 } 690 691 @Override onGetSuggestionsMultiple(TextInfo[] textInfos, int suggestionsLimit, boolean sequentialWords)692 public SuggestionsInfo[] onGetSuggestionsMultiple(TextInfo[] textInfos, 693 int suggestionsLimit, boolean sequentialWords) { 694 final int length = textInfos.length; 695 final SuggestionsInfo[] retval = new SuggestionsInfo[length]; 696 for (int i = 0; i < length; ++i) { 697 final String prevWord; 698 if (sequentialWords && i > 0) { 699 final String prevWordCandidate = textInfos[i - 1].getText(); 700 // Note that an empty string would be used to indicate the initial word 701 // in the future. 702 prevWord = TextUtils.isEmpty(prevWordCandidate) ? null : prevWordCandidate; 703 } else { 704 prevWord = null; 705 } 706 retval[i] = onGetSuggestions(textInfos[i], prevWord, suggestionsLimit); 707 retval[i].setCookieAndSequence( 708 textInfos[i].getCookie(), textInfos[i].getSequence()); 709 } 710 return retval; 711 } 712 713 // Note : this must be reentrant 714 /** 715 * Gets a list of suggestions for a specific string. This returns a list of possible 716 * corrections for the text passed as an argument. It may split or group words, and 717 * even perform grammatical analysis. 718 */ 719 @Override onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)720 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 721 final int suggestionsLimit) { 722 return onGetSuggestions(textInfo, null, suggestionsLimit); 723 } 724 onGetSuggestions( final TextInfo textInfo, final String prevWord, final int suggestionsLimit)725 private SuggestionsInfo onGetSuggestions( 726 final TextInfo textInfo, final String prevWord, final int suggestionsLimit) { 727 try { 728 final String inText = textInfo.getText(); 729 final SuggestionsParams cachedSuggestionsParams = 730 mSuggestionsCache.getSuggestionsFromCache(inText); 731 if (cachedSuggestionsParams != null) { 732 if (DBG) { 733 Log.d(TAG, "Cache hit: " + inText + ", " + cachedSuggestionsParams.mFlags); 734 } 735 return new SuggestionsInfo( 736 cachedSuggestionsParams.mFlags, cachedSuggestionsParams.mSuggestions); 737 } 738 739 if (shouldFilterOut(inText, mScript)) { 740 DictAndProximity dictInfo = null; 741 try { 742 dictInfo = mDictionaryPool.takeOrGetNull(); 743 if (null == dictInfo) return getNotInDictEmptySuggestions(); 744 return dictInfo.mDictionary.isValidWord(inText) ? 745 getInDictEmptySuggestions() : getNotInDictEmptySuggestions(); 746 } finally { 747 if (null != dictInfo) { 748 if (!mDictionaryPool.offer(dictInfo)) { 749 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 750 } 751 } 752 } 753 } 754 final String text = inText.replaceAll(APOSTROPHE, SINGLE_QUOTE); 755 756 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 757 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 758 mService.mSuggestionThreshold, mService.mRecommendedThreshold, 759 suggestionsLimit); 760 final WordComposer composer = new WordComposer(); 761 final int length = text.length(); 762 for (int i = 0; i < length; i = text.offsetByCodePoints(i, 1)) { 763 final int codePoint = text.codePointAt(i); 764 // The getXYForCodePointAndScript method returns (Y << 16) + X 765 final int xy = SpellCheckerProximityInfo.getXYForCodePointAndScript( 766 codePoint, mScript); 767 if (SpellCheckerProximityInfo.NOT_A_COORDINATE_PAIR == xy) { 768 composer.add(codePoint, WordComposer.NOT_A_COORDINATE, 769 WordComposer.NOT_A_COORDINATE, null); 770 } else { 771 composer.add(codePoint, xy & 0xFFFF, xy >> 16, null); 772 } 773 } 774 775 final int capitalizeType = getCapitalizationType(text); 776 boolean isInDict = true; 777 DictAndProximity dictInfo = null; 778 try { 779 dictInfo = mDictionaryPool.takeOrGetNull(); 780 if (null == dictInfo) return getNotInDictEmptySuggestions(); 781 dictInfo.mDictionary.getWords(composer, prevWord, suggestionsGatherer, 782 dictInfo.mProximityInfo); 783 isInDict = dictInfo.mDictionary.isValidWord(text); 784 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 785 // We want to test the word again if it's all caps or first caps only. 786 // If it's fully down, we already tested it, if it's mixed case, we don't 787 // want to test a lowercase version of it. 788 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 789 } 790 } finally { 791 if (null != dictInfo) { 792 if (!mDictionaryPool.offer(dictInfo)) { 793 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 794 } 795 } 796 } 797 798 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 799 capitalizeType, mLocale); 800 801 if (DBG) { 802 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 803 + suggestionsLimit); 804 Log.i(TAG, "IsInDict = " + isInDict); 805 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 806 Log.i(TAG, "HasRecommendedSuggestions = " + result.mHasRecommendedSuggestions); 807 if (null != result.mSuggestions) { 808 for (String suggestion : result.mSuggestions) { 809 Log.i(TAG, suggestion); 810 } 811 } 812 } 813 814 final int flags = 815 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 816 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO) 817 | (result.mHasRecommendedSuggestions 818 ? SuggestionsInfoCompatUtils 819 .getValueOf_RESULT_ATTR_HAS_RECOMMENDED_SUGGESTIONS() 820 : 0); 821 final SuggestionsInfo retval = new SuggestionsInfo(flags, result.mSuggestions); 822 mSuggestionsCache.putSuggestionsToCache(text, result.mSuggestions, flags); 823 return retval; 824 } catch (RuntimeException e) { 825 // Don't kill the keyboard if there is a bug in the spell checker 826 if (DBG) { 827 throw e; 828 } else { 829 Log.e(TAG, "Exception while spellcheking: " + e); 830 return getNotInDictEmptySuggestions(); 831 } 832 } 833 } 834 } 835 } 836