1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin.spellcheck; 18 19 import android.content.Intent; 20 import android.content.res.Resources; 21 import android.service.textservice.SpellCheckerService; 22 import android.text.TextUtils; 23 import android.util.Log; 24 import android.view.textservice.SuggestionsInfo; 25 import android.view.textservice.TextInfo; 26 27 import com.android.inputmethod.compat.ArraysCompatUtils; 28 import com.android.inputmethod.keyboard.ProximityInfo; 29 import com.android.inputmethod.latin.BinaryDictionary; 30 import com.android.inputmethod.latin.Dictionary; 31 import com.android.inputmethod.latin.Dictionary.DataType; 32 import com.android.inputmethod.latin.Dictionary.WordCallback; 33 import com.android.inputmethod.latin.DictionaryCollection; 34 import com.android.inputmethod.latin.DictionaryFactory; 35 import com.android.inputmethod.latin.Flag; 36 import com.android.inputmethod.latin.LocaleUtils; 37 import com.android.inputmethod.latin.R; 38 import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary; 39 import com.android.inputmethod.latin.Utils; 40 import com.android.inputmethod.latin.WhitelistDictionary; 41 import com.android.inputmethod.latin.WordComposer; 42 43 import java.util.ArrayList; 44 import java.util.Arrays; 45 import java.util.Collections; 46 import java.util.Locale; 47 import java.util.Map; 48 import java.util.TreeMap; 49 50 /** 51 * Service for spell checking, using LatinIME's dictionaries and mechanisms. 52 */ 53 public class AndroidSpellCheckerService extends SpellCheckerService { 54 private static final String TAG = AndroidSpellCheckerService.class.getSimpleName(); 55 private static final boolean DBG = false; 56 private static final int POOL_SIZE = 2; 57 58 private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case 59 private static final int CAPITALIZE_FIRST = 1; // First only 60 private static final int CAPITALIZE_ALL = 2; // All caps 61 62 private final static String[] EMPTY_STRING_ARRAY = new String[0]; 63 private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS = 64 new SuggestionsInfo(0, EMPTY_STRING_ARRAY); 65 private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS = 66 new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY, 67 EMPTY_STRING_ARRAY); 68 private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY; 69 static { 70 // See BinaryDictionary.java for an explanation of these flags 71 // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the 72 // current dictionary configuration - for example, consider the UMLAUT flag 73 // so that it will be turned on for German dictionaries and off for others. 74 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS, 75 BinaryDictionary.ALL_CONFIG_FLAGS.length + 1); 76 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] = 77 BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE; 78 } 79 private Map<String, DictionaryPool> mDictionaryPools = 80 Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 81 private Map<String, Dictionary> mUserDictionaries = 82 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 83 private Map<String, Dictionary> mWhitelistDictionaries = 84 Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 85 86 // The threshold for a candidate to be offered as a suggestion. 87 private double mSuggestionThreshold; 88 // The threshold for a suggestion to be considered "likely". 89 private double mLikelyThreshold; 90 onCreate()91 @Override public void onCreate() { 92 super.onCreate(); 93 mSuggestionThreshold = 94 Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value)); 95 mLikelyThreshold = 96 Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value)); 97 } 98 99 @Override createSession()100 public Session createSession() { 101 return new AndroidSpellCheckerSession(this); 102 } 103 104 private static class SuggestionsGatherer implements WordCallback { 105 public static class Result { 106 public final String[] mSuggestions; 107 public final boolean mHasLikelySuggestions; Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions)108 public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) { 109 mSuggestions = gatheredSuggestions; 110 mHasLikelySuggestions = hasLikelySuggestions; 111 } 112 } 113 114 private final ArrayList<CharSequence> mSuggestions; 115 private final int[] mScores; 116 private final String mOriginalText; 117 private final double mSuggestionThreshold; 118 private final double mLikelyThreshold; 119 private final int mMaxLength; 120 private int mLength = 0; 121 122 // The two following attributes are only ever filled if the requested max length 123 // is 0 (or less, which is treated the same). 124 private String mBestSuggestion = null; 125 private int mBestScore = Integer.MIN_VALUE; // As small as possible 126 SuggestionsGatherer(final String originalText, final double suggestionThreshold, final double likelyThreshold, final int maxLength)127 SuggestionsGatherer(final String originalText, final double suggestionThreshold, 128 final double likelyThreshold, final int maxLength) { 129 mOriginalText = originalText; 130 mSuggestionThreshold = suggestionThreshold; 131 mLikelyThreshold = likelyThreshold; 132 mMaxLength = maxLength; 133 mSuggestions = new ArrayList<CharSequence>(maxLength + 1); 134 mScores = new int[mMaxLength]; 135 } 136 137 @Override addWord(char[] word, int wordOffset, int wordLength, int score, int dicTypeId, DataType dataType)138 synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score, 139 int dicTypeId, DataType dataType) { 140 final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score); 141 // binarySearch returns the index if the element exists, and -<insertion index> - 1 142 // if it doesn't. See documentation for binarySearch. 143 final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1; 144 145 if (insertIndex == 0 && mLength >= mMaxLength) { 146 // In the future, we may want to keep track of the best suggestion score even if 147 // we are asked for 0 suggestions. In this case, we can use the following 148 // (tested) code to keep it: 149 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0) 150 // then we need to keep track of the best suggestion in mBestScore and 151 // mBestSuggestion. This is so that we know whether the best suggestion makes 152 // the score cutoff, since we need to know that to return a meaningful 153 // looksLikeTypo. 154 // if (0 >= mMaxLength) { 155 // if (score > mBestScore) { 156 // mBestScore = score; 157 // mBestSuggestion = new String(word, wordOffset, wordLength); 158 // } 159 // } 160 return true; 161 } 162 if (insertIndex >= mMaxLength) { 163 // We found a suggestion, but its score is too weak to be kept considering 164 // the suggestion limit. 165 return true; 166 } 167 168 // Compute the normalized score and skip this word if it's normalized score does not 169 // make the threshold. 170 final String wordString = new String(word, wordOffset, wordLength); 171 final double normalizedScore = 172 Utils.calcNormalizedScore(mOriginalText, wordString, score); 173 if (normalizedScore < mSuggestionThreshold) { 174 if (DBG) Log.i(TAG, wordString + " does not make the score threshold"); 175 return true; 176 } 177 178 if (mLength < mMaxLength) { 179 final int copyLen = mLength - insertIndex; 180 ++mLength; 181 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen); 182 mSuggestions.add(insertIndex, wordString); 183 } else { 184 System.arraycopy(mScores, 1, mScores, 0, insertIndex); 185 mSuggestions.add(insertIndex, wordString); 186 mSuggestions.remove(0); 187 } 188 mScores[insertIndex] = score; 189 190 return true; 191 } 192 getResults(final int capitalizeType, final Locale locale)193 public Result getResults(final int capitalizeType, final Locale locale) { 194 final String[] gatheredSuggestions; 195 final boolean hasLikelySuggestions; 196 if (0 == mLength) { 197 // Either we found no suggestions, or we found some BUT the max length was 0. 198 // If we found some mBestSuggestion will not be null. If it is null, then 199 // we found none, regardless of the max length. 200 if (null == mBestSuggestion) { 201 gatheredSuggestions = null; 202 hasLikelySuggestions = false; 203 } else { 204 gatheredSuggestions = EMPTY_STRING_ARRAY; 205 final double normalizedScore = 206 Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore); 207 hasLikelySuggestions = (normalizedScore > mLikelyThreshold); 208 } 209 } else { 210 if (DBG) { 211 if (mLength != mSuggestions.size()) { 212 Log.e(TAG, "Suggestion size is not the same as stored mLength"); 213 } 214 for (int i = mLength - 1; i >= 0; --i) { 215 Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i)); 216 } 217 } 218 Collections.reverse(mSuggestions); 219 Utils.removeDupes(mSuggestions); 220 if (CAPITALIZE_ALL == capitalizeType) { 221 for (int i = 0; i < mSuggestions.size(); ++i) { 222 // get(i) returns a CharSequence which is actually a String so .toString() 223 // should return the same object. 224 mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale)); 225 } 226 } else if (CAPITALIZE_FIRST == capitalizeType) { 227 for (int i = 0; i < mSuggestions.size(); ++i) { 228 // Likewise 229 mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(), 230 locale)); 231 } 232 } 233 // This returns a String[], while toArray() returns an Object[] which cannot be cast 234 // into a String[]. 235 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY); 236 237 final int bestScore = mScores[mLength - 1]; 238 final CharSequence bestSuggestion = mSuggestions.get(0); 239 final double normalizedScore = 240 Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore); 241 hasLikelySuggestions = (normalizedScore > mLikelyThreshold); 242 if (DBG) { 243 Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore); 244 Log.i(TAG, "Normalized score = " + normalizedScore 245 + " (threshold " + mLikelyThreshold 246 + ") => hasLikelySuggestions = " + hasLikelySuggestions); 247 } 248 } 249 return new Result(gatheredSuggestions, hasLikelySuggestions); 250 } 251 } 252 253 @Override onUnbind(final Intent intent)254 public boolean onUnbind(final Intent intent) { 255 final Map<String, DictionaryPool> oldPools = mDictionaryPools; 256 mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>()); 257 final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries; 258 mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 259 final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries; 260 mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>()); 261 for (DictionaryPool pool : oldPools.values()) { 262 pool.close(); 263 } 264 for (Dictionary dict : oldUserDictionaries.values()) { 265 dict.close(); 266 } 267 for (Dictionary dict : oldWhitelistDictionaries.values()) { 268 dict.close(); 269 } 270 return false; 271 } 272 getDictionaryPool(final String locale)273 private DictionaryPool getDictionaryPool(final String locale) { 274 DictionaryPool pool = mDictionaryPools.get(locale); 275 if (null == pool) { 276 final Locale localeObject = LocaleUtils.constructLocaleFromString(locale); 277 pool = new DictionaryPool(POOL_SIZE, this, localeObject); 278 mDictionaryPools.put(locale, pool); 279 } 280 return pool; 281 } 282 createDictAndProximity(final Locale locale)283 public DictAndProximity createDictAndProximity(final Locale locale) { 284 final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo(); 285 final Resources resources = getResources(); 286 final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources); 287 final DictionaryCollection dictionaryCollection = 288 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId, 289 USE_FULL_EDIT_DISTANCE_FLAG_ARRAY); 290 final String localeStr = locale.toString(); 291 Dictionary userDictionary = mUserDictionaries.get(localeStr); 292 if (null == userDictionary) { 293 userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true); 294 mUserDictionaries.put(localeStr, userDictionary); 295 } 296 dictionaryCollection.addDictionary(userDictionary); 297 Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr); 298 if (null == whitelistDictionary) { 299 whitelistDictionary = new WhitelistDictionary(this, locale); 300 mWhitelistDictionaries.put(localeStr, whitelistDictionary); 301 } 302 dictionaryCollection.addDictionary(whitelistDictionary); 303 return new DictAndProximity(dictionaryCollection, proximityInfo); 304 } 305 306 // This method assumes the text is not empty or null. getCapitalizationType(String text)307 private static int getCapitalizationType(String text) { 308 // If the first char is not uppercase, then the word is either all lower case, 309 // and in either case we return CAPITALIZE_NONE. 310 if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE; 311 final int len = text.codePointCount(0, text.length()); 312 int capsCount = 1; 313 for (int i = 1; i < len; ++i) { 314 if (1 != capsCount && i != capsCount) break; 315 if (Character.isUpperCase(text.codePointAt(i))) ++capsCount; 316 } 317 // We know the first char is upper case. So we want to test if either everything 318 // else is lower case, or if everything else is upper case. If the string is 319 // exactly one char long, then we will arrive here with capsCount 1, and this is 320 // correct, too. 321 if (1 == capsCount) return CAPITALIZE_FIRST; 322 return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE); 323 } 324 325 private static class AndroidSpellCheckerSession extends Session { 326 // Immutable, but need the locale which is not available in the constructor yet 327 private DictionaryPool mDictionaryPool; 328 // Likewise 329 private Locale mLocale; 330 331 private final AndroidSpellCheckerService mService; 332 AndroidSpellCheckerSession(final AndroidSpellCheckerService service)333 AndroidSpellCheckerSession(final AndroidSpellCheckerService service) { 334 mService = service; 335 } 336 337 @Override onCreate()338 public void onCreate() { 339 final String localeString = getLocale(); 340 mDictionaryPool = mService.getDictionaryPool(localeString); 341 mLocale = LocaleUtils.constructLocaleFromString(localeString); 342 } 343 344 /** 345 * Finds out whether a particular string should be filtered out of spell checking. 346 * 347 * This will loosely match URLs, numbers, symbols. 348 * 349 * @param text the string to evaluate. 350 * @return true if we should filter this text out, false otherwise 351 */ shouldFilterOut(final String text)352 private boolean shouldFilterOut(final String text) { 353 if (TextUtils.isEmpty(text) || text.length() <= 1) return true; 354 355 // TODO: check if an equivalent processing can't be done more quickly with a 356 // compiled regexp. 357 // Filter by first letter 358 final int firstCodePoint = text.codePointAt(0); 359 // Filter out words that don't start with a letter or an apostrophe 360 if (!Character.isLetter(firstCodePoint) 361 && '\'' != firstCodePoint) return true; 362 363 // Filter contents 364 final int length = text.length(); 365 int letterCount = 0; 366 for (int i = 0; i < length; ++i) { 367 final int codePoint = text.codePointAt(i); 368 // Any word containing a '@' is probably an e-mail address 369 // Any word containing a '/' is probably either an ad-hoc combination of two 370 // words or a URI - in either case we don't want to spell check that 371 if ('@' == codePoint 372 || '/' == codePoint) return true; 373 if (Character.isLetter(codePoint)) ++letterCount; 374 } 375 // Guestimate heuristic: perform spell checking if at least 3/4 of the characters 376 // in this word are letters 377 return (letterCount * 4 < length * 3); 378 } 379 380 // Note : this must be reentrant 381 /** 382 * Gets a list of suggestions for a specific string. This returns a list of possible 383 * corrections for the text passed as an argument. It may split or group words, and 384 * even perform grammatical analysis. 385 */ 386 @Override onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)387 public SuggestionsInfo onGetSuggestions(final TextInfo textInfo, 388 final int suggestionsLimit) { 389 try { 390 final String text = textInfo.getText(); 391 392 if (shouldFilterOut(text)) { 393 DictAndProximity dictInfo = null; 394 try { 395 dictInfo = mDictionaryPool.takeOrGetNull(); 396 if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS; 397 return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS 398 : NOT_IN_DICT_EMPTY_SUGGESTIONS; 399 } finally { 400 if (null != dictInfo) { 401 if (!mDictionaryPool.offer(dictInfo)) { 402 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 403 } 404 } 405 } 406 } 407 408 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary 409 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text, 410 mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit); 411 final WordComposer composer = new WordComposer(); 412 final int length = text.length(); 413 for (int i = 0; i < length; ++i) { 414 final int character = text.codePointAt(i); 415 final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character); 416 final int[] proximities; 417 if (-1 == proximityIndex) { 418 proximities = new int[] { character }; 419 } else { 420 proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY, 421 proximityIndex, 422 proximityIndex + SpellCheckerProximityInfo.ROW_SIZE); 423 } 424 composer.add(character, proximities, 425 WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE); 426 } 427 428 final int capitalizeType = getCapitalizationType(text); 429 boolean isInDict = true; 430 DictAndProximity dictInfo = null; 431 try { 432 dictInfo = mDictionaryPool.takeOrGetNull(); 433 if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS; 434 dictInfo.mDictionary.getWords(composer, suggestionsGatherer, 435 dictInfo.mProximityInfo); 436 isInDict = dictInfo.mDictionary.isValidWord(text); 437 if (!isInDict && CAPITALIZE_NONE != capitalizeType) { 438 // We want to test the word again if it's all caps or first caps only. 439 // If it's fully down, we already tested it, if it's mixed case, we don't 440 // want to test a lowercase version of it. 441 isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale)); 442 } 443 } finally { 444 if (null != dictInfo) { 445 if (!mDictionaryPool.offer(dictInfo)) { 446 Log.e(TAG, "Can't re-insert a dictionary into its pool"); 447 } 448 } 449 } 450 451 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults( 452 capitalizeType, mLocale); 453 454 if (DBG) { 455 Log.i(TAG, "Spell checking results for " + text + " with suggestion limit " 456 + suggestionsLimit); 457 Log.i(TAG, "IsInDict = " + isInDict); 458 Log.i(TAG, "LooksLikeTypo = " + (!isInDict)); 459 Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions); 460 if (null != result.mSuggestions) { 461 for (String suggestion : result.mSuggestions) { 462 Log.i(TAG, suggestion); 463 } 464 } 465 } 466 467 // TODO: actually use result.mHasLikelySuggestions 468 final int flags = 469 (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY 470 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO); 471 return new SuggestionsInfo(flags, result.mSuggestions); 472 } catch (RuntimeException e) { 473 // Don't kill the keyboard if there is a bug in the spell checker 474 if (DBG) { 475 throw e; 476 } else { 477 Log.e(TAG, "Exception while spellcheking: " + e); 478 return NOT_IN_DICT_EMPTY_SUGGESTIONS; 479 } 480 } 481 } 482 } 483 } 484