• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package com.android.inputmethod.latin.spellcheck;
18 
19 import android.content.Intent;
20 import android.content.res.Resources;
21 import android.service.textservice.SpellCheckerService;
22 import android.text.TextUtils;
23 import android.util.Log;
24 import android.view.textservice.SuggestionsInfo;
25 import android.view.textservice.TextInfo;
26 
27 import com.android.inputmethod.compat.ArraysCompatUtils;
28 import com.android.inputmethod.keyboard.ProximityInfo;
29 import com.android.inputmethod.latin.BinaryDictionary;
30 import com.android.inputmethod.latin.Dictionary;
31 import com.android.inputmethod.latin.Dictionary.DataType;
32 import com.android.inputmethod.latin.Dictionary.WordCallback;
33 import com.android.inputmethod.latin.DictionaryCollection;
34 import com.android.inputmethod.latin.DictionaryFactory;
35 import com.android.inputmethod.latin.Flag;
36 import com.android.inputmethod.latin.LocaleUtils;
37 import com.android.inputmethod.latin.R;
38 import com.android.inputmethod.latin.SynchronouslyLoadedUserDictionary;
39 import com.android.inputmethod.latin.Utils;
40 import com.android.inputmethod.latin.WhitelistDictionary;
41 import com.android.inputmethod.latin.WordComposer;
42 
43 import java.util.ArrayList;
44 import java.util.Arrays;
45 import java.util.Collections;
46 import java.util.Locale;
47 import java.util.Map;
48 import java.util.TreeMap;
49 
50 /**
51  * Service for spell checking, using LatinIME's dictionaries and mechanisms.
52  */
53 public class AndroidSpellCheckerService extends SpellCheckerService {
54     private static final String TAG = AndroidSpellCheckerService.class.getSimpleName();
55     private static final boolean DBG = false;
56     private static final int POOL_SIZE = 2;
57 
58     private static final int CAPITALIZE_NONE = 0; // No caps, or mixed case
59     private static final int CAPITALIZE_FIRST = 1; // First only
60     private static final int CAPITALIZE_ALL = 2; // All caps
61 
62     private final static String[] EMPTY_STRING_ARRAY = new String[0];
63     private final static SuggestionsInfo NOT_IN_DICT_EMPTY_SUGGESTIONS =
64             new SuggestionsInfo(0, EMPTY_STRING_ARRAY);
65     private final static SuggestionsInfo IN_DICT_EMPTY_SUGGESTIONS =
66             new SuggestionsInfo(SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY,
67                     EMPTY_STRING_ARRAY);
68     private final static Flag[] USE_FULL_EDIT_DISTANCE_FLAG_ARRAY;
69     static {
70         // See BinaryDictionary.java for an explanation of these flags
71         // Specifially, ALL_CONFIG_FLAGS means that we want to consider all flags with the
72         // current dictionary configuration - for example, consider the UMLAUT flag
73         // so that it will be turned on for German dictionaries and off for others.
74         USE_FULL_EDIT_DISTANCE_FLAG_ARRAY = Arrays.copyOf(BinaryDictionary.ALL_CONFIG_FLAGS,
75                 BinaryDictionary.ALL_CONFIG_FLAGS.length + 1);
76         USE_FULL_EDIT_DISTANCE_FLAG_ARRAY[BinaryDictionary.ALL_CONFIG_FLAGS.length] =
77                 BinaryDictionary.FLAG_USE_FULL_EDIT_DISTANCE;
78     }
79     private Map<String, DictionaryPool> mDictionaryPools =
80             Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
81     private Map<String, Dictionary> mUserDictionaries =
82             Collections.synchronizedMap(new TreeMap<String, Dictionary>());
83     private Map<String, Dictionary> mWhitelistDictionaries =
84             Collections.synchronizedMap(new TreeMap<String, Dictionary>());
85 
86     // The threshold for a candidate to be offered as a suggestion.
87     private double mSuggestionThreshold;
88     // The threshold for a suggestion to be considered "likely".
89     private double mLikelyThreshold;
90 
onCreate()91     @Override public void onCreate() {
92         super.onCreate();
93         mSuggestionThreshold =
94                 Double.parseDouble(getString(R.string.spellchecker_suggestion_threshold_value));
95         mLikelyThreshold =
96                 Double.parseDouble(getString(R.string.spellchecker_likely_threshold_value));
97     }
98 
99     @Override
createSession()100     public Session createSession() {
101         return new AndroidSpellCheckerSession(this);
102     }
103 
104     private static class SuggestionsGatherer implements WordCallback {
105         public static class Result {
106             public final String[] mSuggestions;
107             public final boolean mHasLikelySuggestions;
Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions)108             public Result(final String[] gatheredSuggestions, final boolean hasLikelySuggestions) {
109                 mSuggestions = gatheredSuggestions;
110                 mHasLikelySuggestions = hasLikelySuggestions;
111             }
112         }
113 
114         private final ArrayList<CharSequence> mSuggestions;
115         private final int[] mScores;
116         private final String mOriginalText;
117         private final double mSuggestionThreshold;
118         private final double mLikelyThreshold;
119         private final int mMaxLength;
120         private int mLength = 0;
121 
122         // The two following attributes are only ever filled if the requested max length
123         // is 0 (or less, which is treated the same).
124         private String mBestSuggestion = null;
125         private int mBestScore = Integer.MIN_VALUE; // As small as possible
126 
SuggestionsGatherer(final String originalText, final double suggestionThreshold, final double likelyThreshold, final int maxLength)127         SuggestionsGatherer(final String originalText, final double suggestionThreshold,
128                 final double likelyThreshold, final int maxLength) {
129             mOriginalText = originalText;
130             mSuggestionThreshold = suggestionThreshold;
131             mLikelyThreshold = likelyThreshold;
132             mMaxLength = maxLength;
133             mSuggestions = new ArrayList<CharSequence>(maxLength + 1);
134             mScores = new int[mMaxLength];
135         }
136 
137         @Override
addWord(char[] word, int wordOffset, int wordLength, int score, int dicTypeId, DataType dataType)138         synchronized public boolean addWord(char[] word, int wordOffset, int wordLength, int score,
139                 int dicTypeId, DataType dataType) {
140             final int positionIndex = ArraysCompatUtils.binarySearch(mScores, 0, mLength, score);
141             // binarySearch returns the index if the element exists, and -<insertion index> - 1
142             // if it doesn't. See documentation for binarySearch.
143             final int insertIndex = positionIndex >= 0 ? positionIndex : -positionIndex - 1;
144 
145             if (insertIndex == 0 && mLength >= mMaxLength) {
146                 // In the future, we may want to keep track of the best suggestion score even if
147                 // we are asked for 0 suggestions. In this case, we can use the following
148                 // (tested) code to keep it:
149                 // If the maxLength is 0 (should never be less, but if it is, it's treated as 0)
150                 // then we need to keep track of the best suggestion in mBestScore and
151                 // mBestSuggestion. This is so that we know whether the best suggestion makes
152                 // the score cutoff, since we need to know that to return a meaningful
153                 // looksLikeTypo.
154                 // if (0 >= mMaxLength) {
155                 //     if (score > mBestScore) {
156                 //         mBestScore = score;
157                 //         mBestSuggestion = new String(word, wordOffset, wordLength);
158                 //     }
159                 // }
160                 return true;
161             }
162             if (insertIndex >= mMaxLength) {
163                 // We found a suggestion, but its score is too weak to be kept considering
164                 // the suggestion limit.
165                 return true;
166             }
167 
168             // Compute the normalized score and skip this word if it's normalized score does not
169             // make the threshold.
170             final String wordString = new String(word, wordOffset, wordLength);
171             final double normalizedScore =
172                     Utils.calcNormalizedScore(mOriginalText, wordString, score);
173             if (normalizedScore < mSuggestionThreshold) {
174                 if (DBG) Log.i(TAG, wordString + " does not make the score threshold");
175                 return true;
176             }
177 
178             if (mLength < mMaxLength) {
179                 final int copyLen = mLength - insertIndex;
180                 ++mLength;
181                 System.arraycopy(mScores, insertIndex, mScores, insertIndex + 1, copyLen);
182                 mSuggestions.add(insertIndex, wordString);
183             } else {
184                 System.arraycopy(mScores, 1, mScores, 0, insertIndex);
185                 mSuggestions.add(insertIndex, wordString);
186                 mSuggestions.remove(0);
187             }
188             mScores[insertIndex] = score;
189 
190             return true;
191         }
192 
getResults(final int capitalizeType, final Locale locale)193         public Result getResults(final int capitalizeType, final Locale locale) {
194             final String[] gatheredSuggestions;
195             final boolean hasLikelySuggestions;
196             if (0 == mLength) {
197                 // Either we found no suggestions, or we found some BUT the max length was 0.
198                 // If we found some mBestSuggestion will not be null. If it is null, then
199                 // we found none, regardless of the max length.
200                 if (null == mBestSuggestion) {
201                     gatheredSuggestions = null;
202                     hasLikelySuggestions = false;
203                 } else {
204                     gatheredSuggestions = EMPTY_STRING_ARRAY;
205                     final double normalizedScore =
206                             Utils.calcNormalizedScore(mOriginalText, mBestSuggestion, mBestScore);
207                     hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
208                 }
209             } else {
210                 if (DBG) {
211                     if (mLength != mSuggestions.size()) {
212                         Log.e(TAG, "Suggestion size is not the same as stored mLength");
213                     }
214                     for (int i = mLength - 1; i >= 0; --i) {
215                         Log.i(TAG, "" + mScores[i] + " " + mSuggestions.get(i));
216                     }
217                 }
218                 Collections.reverse(mSuggestions);
219                 Utils.removeDupes(mSuggestions);
220                 if (CAPITALIZE_ALL == capitalizeType) {
221                     for (int i = 0; i < mSuggestions.size(); ++i) {
222                         // get(i) returns a CharSequence which is actually a String so .toString()
223                         // should return the same object.
224                         mSuggestions.set(i, mSuggestions.get(i).toString().toUpperCase(locale));
225                     }
226                 } else if (CAPITALIZE_FIRST == capitalizeType) {
227                     for (int i = 0; i < mSuggestions.size(); ++i) {
228                         // Likewise
229                         mSuggestions.set(i, Utils.toTitleCase(mSuggestions.get(i).toString(),
230                                 locale));
231                     }
232                 }
233                 // This returns a String[], while toArray() returns an Object[] which cannot be cast
234                 // into a String[].
235                 gatheredSuggestions = mSuggestions.toArray(EMPTY_STRING_ARRAY);
236 
237                 final int bestScore = mScores[mLength - 1];
238                 final CharSequence bestSuggestion = mSuggestions.get(0);
239                 final double normalizedScore =
240                         Utils.calcNormalizedScore(mOriginalText, bestSuggestion, bestScore);
241                 hasLikelySuggestions = (normalizedScore > mLikelyThreshold);
242                 if (DBG) {
243                     Log.i(TAG, "Best suggestion : " + bestSuggestion + ", score " + bestScore);
244                     Log.i(TAG, "Normalized score = " + normalizedScore
245                             + " (threshold " + mLikelyThreshold
246                             + ") => hasLikelySuggestions = " + hasLikelySuggestions);
247                 }
248             }
249             return new Result(gatheredSuggestions, hasLikelySuggestions);
250         }
251     }
252 
253     @Override
onUnbind(final Intent intent)254     public boolean onUnbind(final Intent intent) {
255         final Map<String, DictionaryPool> oldPools = mDictionaryPools;
256         mDictionaryPools = Collections.synchronizedMap(new TreeMap<String, DictionaryPool>());
257         final Map<String, Dictionary> oldUserDictionaries = mUserDictionaries;
258         mUserDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
259         final Map<String, Dictionary> oldWhitelistDictionaries = mWhitelistDictionaries;
260         mWhitelistDictionaries = Collections.synchronizedMap(new TreeMap<String, Dictionary>());
261         for (DictionaryPool pool : oldPools.values()) {
262             pool.close();
263         }
264         for (Dictionary dict : oldUserDictionaries.values()) {
265             dict.close();
266         }
267         for (Dictionary dict : oldWhitelistDictionaries.values()) {
268             dict.close();
269         }
270         return false;
271     }
272 
getDictionaryPool(final String locale)273     private DictionaryPool getDictionaryPool(final String locale) {
274         DictionaryPool pool = mDictionaryPools.get(locale);
275         if (null == pool) {
276             final Locale localeObject = LocaleUtils.constructLocaleFromString(locale);
277             pool = new DictionaryPool(POOL_SIZE, this, localeObject);
278             mDictionaryPools.put(locale, pool);
279         }
280         return pool;
281     }
282 
createDictAndProximity(final Locale locale)283     public DictAndProximity createDictAndProximity(final Locale locale) {
284         final ProximityInfo proximityInfo = ProximityInfo.createSpellCheckerProximityInfo();
285         final Resources resources = getResources();
286         final int fallbackResourceId = Utils.getMainDictionaryResourceId(resources);
287         final DictionaryCollection dictionaryCollection =
288                 DictionaryFactory.createDictionaryFromManager(this, locale, fallbackResourceId,
289                         USE_FULL_EDIT_DISTANCE_FLAG_ARRAY);
290         final String localeStr = locale.toString();
291         Dictionary userDictionary = mUserDictionaries.get(localeStr);
292         if (null == userDictionary) {
293             userDictionary = new SynchronouslyLoadedUserDictionary(this, localeStr, true);
294             mUserDictionaries.put(localeStr, userDictionary);
295         }
296         dictionaryCollection.addDictionary(userDictionary);
297         Dictionary whitelistDictionary = mWhitelistDictionaries.get(localeStr);
298         if (null == whitelistDictionary) {
299             whitelistDictionary = new WhitelistDictionary(this, locale);
300             mWhitelistDictionaries.put(localeStr, whitelistDictionary);
301         }
302         dictionaryCollection.addDictionary(whitelistDictionary);
303         return new DictAndProximity(dictionaryCollection, proximityInfo);
304     }
305 
306     // This method assumes the text is not empty or null.
getCapitalizationType(String text)307     private static int getCapitalizationType(String text) {
308         // If the first char is not uppercase, then the word is either all lower case,
309         // and in either case we return CAPITALIZE_NONE.
310         if (!Character.isUpperCase(text.codePointAt(0))) return CAPITALIZE_NONE;
311         final int len = text.codePointCount(0, text.length());
312         int capsCount = 1;
313         for (int i = 1; i < len; ++i) {
314             if (1 != capsCount && i != capsCount) break;
315             if (Character.isUpperCase(text.codePointAt(i))) ++capsCount;
316         }
317         // We know the first char is upper case. So we want to test if either everything
318         // else is lower case, or if everything else is upper case. If the string is
319         // exactly one char long, then we will arrive here with capsCount 1, and this is
320         // correct, too.
321         if (1 == capsCount) return CAPITALIZE_FIRST;
322         return (len == capsCount ? CAPITALIZE_ALL : CAPITALIZE_NONE);
323     }
324 
325     private static class AndroidSpellCheckerSession extends Session {
326         // Immutable, but need the locale which is not available in the constructor yet
327         private DictionaryPool mDictionaryPool;
328         // Likewise
329         private Locale mLocale;
330 
331         private final AndroidSpellCheckerService mService;
332 
AndroidSpellCheckerSession(final AndroidSpellCheckerService service)333         AndroidSpellCheckerSession(final AndroidSpellCheckerService service) {
334             mService = service;
335         }
336 
337         @Override
onCreate()338         public void onCreate() {
339             final String localeString = getLocale();
340             mDictionaryPool = mService.getDictionaryPool(localeString);
341             mLocale = LocaleUtils.constructLocaleFromString(localeString);
342         }
343 
344         /**
345          * Finds out whether a particular string should be filtered out of spell checking.
346          *
347          * This will loosely match URLs, numbers, symbols.
348          *
349          * @param text the string to evaluate.
350          * @return true if we should filter this text out, false otherwise
351          */
shouldFilterOut(final String text)352         private boolean shouldFilterOut(final String text) {
353             if (TextUtils.isEmpty(text) || text.length() <= 1) return true;
354 
355             // TODO: check if an equivalent processing can't be done more quickly with a
356             // compiled regexp.
357             // Filter by first letter
358             final int firstCodePoint = text.codePointAt(0);
359             // Filter out words that don't start with a letter or an apostrophe
360             if (!Character.isLetter(firstCodePoint)
361                     && '\'' != firstCodePoint) return true;
362 
363             // Filter contents
364             final int length = text.length();
365             int letterCount = 0;
366             for (int i = 0; i < length; ++i) {
367                 final int codePoint = text.codePointAt(i);
368                 // Any word containing a '@' is probably an e-mail address
369                 // Any word containing a '/' is probably either an ad-hoc combination of two
370                 // words or a URI - in either case we don't want to spell check that
371                 if ('@' == codePoint
372                         || '/' == codePoint) return true;
373                 if (Character.isLetter(codePoint)) ++letterCount;
374             }
375             // Guestimate heuristic: perform spell checking if at least 3/4 of the characters
376             // in this word are letters
377             return (letterCount * 4 < length * 3);
378         }
379 
380         // Note : this must be reentrant
381         /**
382          * Gets a list of suggestions for a specific string. This returns a list of possible
383          * corrections for the text passed as an argument. It may split or group words, and
384          * even perform grammatical analysis.
385          */
386         @Override
onGetSuggestions(final TextInfo textInfo, final int suggestionsLimit)387         public SuggestionsInfo onGetSuggestions(final TextInfo textInfo,
388                 final int suggestionsLimit) {
389             try {
390                 final String text = textInfo.getText();
391 
392                 if (shouldFilterOut(text)) {
393                     DictAndProximity dictInfo = null;
394                     try {
395                         dictInfo = mDictionaryPool.takeOrGetNull();
396                         if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
397                         return dictInfo.mDictionary.isValidWord(text) ? IN_DICT_EMPTY_SUGGESTIONS
398                                 : NOT_IN_DICT_EMPTY_SUGGESTIONS;
399                     } finally {
400                         if (null != dictInfo) {
401                             if (!mDictionaryPool.offer(dictInfo)) {
402                                 Log.e(TAG, "Can't re-insert a dictionary into its pool");
403                             }
404                         }
405                     }
406                 }
407 
408                 // TODO: Don't gather suggestions if the limit is <= 0 unless necessary
409                 final SuggestionsGatherer suggestionsGatherer = new SuggestionsGatherer(text,
410                         mService.mSuggestionThreshold, mService.mLikelyThreshold, suggestionsLimit);
411                 final WordComposer composer = new WordComposer();
412                 final int length = text.length();
413                 for (int i = 0; i < length; ++i) {
414                     final int character = text.codePointAt(i);
415                     final int proximityIndex = SpellCheckerProximityInfo.getIndexOf(character);
416                     final int[] proximities;
417                     if (-1 == proximityIndex) {
418                         proximities = new int[] { character };
419                     } else {
420                         proximities = Arrays.copyOfRange(SpellCheckerProximityInfo.PROXIMITY,
421                                 proximityIndex,
422                                 proximityIndex + SpellCheckerProximityInfo.ROW_SIZE);
423                     }
424                     composer.add(character, proximities,
425                             WordComposer.NOT_A_COORDINATE, WordComposer.NOT_A_COORDINATE);
426                 }
427 
428                 final int capitalizeType = getCapitalizationType(text);
429                 boolean isInDict = true;
430                 DictAndProximity dictInfo = null;
431                 try {
432                     dictInfo = mDictionaryPool.takeOrGetNull();
433                     if (null == dictInfo) return NOT_IN_DICT_EMPTY_SUGGESTIONS;
434                     dictInfo.mDictionary.getWords(composer, suggestionsGatherer,
435                             dictInfo.mProximityInfo);
436                     isInDict = dictInfo.mDictionary.isValidWord(text);
437                     if (!isInDict && CAPITALIZE_NONE != capitalizeType) {
438                         // We want to test the word again if it's all caps or first caps only.
439                         // If it's fully down, we already tested it, if it's mixed case, we don't
440                         // want to test a lowercase version of it.
441                         isInDict = dictInfo.mDictionary.isValidWord(text.toLowerCase(mLocale));
442                     }
443                 } finally {
444                     if (null != dictInfo) {
445                         if (!mDictionaryPool.offer(dictInfo)) {
446                             Log.e(TAG, "Can't re-insert a dictionary into its pool");
447                         }
448                     }
449                 }
450 
451                 final SuggestionsGatherer.Result result = suggestionsGatherer.getResults(
452                         capitalizeType, mLocale);
453 
454                 if (DBG) {
455                     Log.i(TAG, "Spell checking results for " + text + " with suggestion limit "
456                             + suggestionsLimit);
457                     Log.i(TAG, "IsInDict = " + isInDict);
458                     Log.i(TAG, "LooksLikeTypo = " + (!isInDict));
459                     Log.i(TAG, "HasLikelySuggestions = " + result.mHasLikelySuggestions);
460                     if (null != result.mSuggestions) {
461                         for (String suggestion : result.mSuggestions) {
462                             Log.i(TAG, suggestion);
463                         }
464                     }
465                 }
466 
467                 // TODO: actually use result.mHasLikelySuggestions
468                 final int flags =
469                         (isInDict ? SuggestionsInfo.RESULT_ATTR_IN_THE_DICTIONARY
470                                 : SuggestionsInfo.RESULT_ATTR_LOOKS_LIKE_TYPO);
471                 return new SuggestionsInfo(flags, result.mSuggestions);
472             } catch (RuntimeException e) {
473                 // Don't kill the keyboard if there is a bug in the spell checker
474                 if (DBG) {
475                     throw e;
476                 } else {
477                     Log.e(TAG, "Exception while spellcheking: " + e);
478                     return NOT_IN_DICT_EMPTY_SUGGESTIONS;
479                 }
480             }
481         }
482     }
483 }
484