• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2008 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.content.Context;
20 import android.text.TextUtils;
21 
22 import com.android.inputmethod.keyboard.Keyboard;
23 import com.android.inputmethod.keyboard.ProximityInfo;
24 import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
25 
26 import java.io.File;
27 import java.util.ArrayList;
28 import java.util.Comparator;
29 import java.util.HashSet;
30 import java.util.Locale;
31 import java.util.concurrent.ConcurrentHashMap;
32 
33 /**
34  * This class loads a dictionary and provides a list of suggestions for a given sequence of
35  * characters. This includes corrections and completions.
36  */
37 public final class Suggest {
38     public static final String TAG = Suggest.class.getSimpleName();
39 
40     // Session id for
41     // {@link #getSuggestedWords(WordComposer,CharSequence,ProximityInfo,boolean,int)}.
42     public static final int SESSION_TYPING = 0;
43     public static final int SESSION_GESTURE = 1;
44 
45     // TODO: rename this to CORRECTION_OFF
46     public static final int CORRECTION_NONE = 0;
47     // TODO: rename this to CORRECTION_ON
48     public static final int CORRECTION_FULL = 1;
49 
50     public interface SuggestInitializationListener {
onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable)51         public void onUpdateMainDictionaryAvailability(boolean isMainDictionaryAvailable);
52     }
53 
54     private static final boolean DBG = LatinImeLogger.sDBG;
55 
56     private Dictionary mMainDictionary;
57     private ContactsBinaryDictionary mContactsDict;
58     private final ConcurrentHashMap<String, Dictionary> mDictionaries =
59             CollectionUtils.newConcurrentHashMap();
60 
61     public static final int MAX_SUGGESTIONS = 18;
62 
63     private float mAutoCorrectionThreshold;
64 
65     // Locale used for upper- and title-casing words
66     private final Locale mLocale;
67 
Suggest(final Context context, final Locale locale, final SuggestInitializationListener listener)68     public Suggest(final Context context, final Locale locale,
69             final SuggestInitializationListener listener) {
70         initAsynchronously(context, locale, listener);
71         mLocale = locale;
72     }
73 
Suggest(final Context context, final File dictionary, final long startOffset, final long length, final Locale locale)74     /* package for test */ Suggest(final Context context, final File dictionary,
75             final long startOffset, final long length, final Locale locale) {
76         final Dictionary mainDict = DictionaryFactory.createDictionaryForTest(context, dictionary,
77                 startOffset, length /* useFullEditDistance */, false, locale);
78         mLocale = locale;
79         mMainDictionary = mainDict;
80         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, mainDict);
81     }
82 
initAsynchronously(final Context context, final Locale locale, final SuggestInitializationListener listener)83     private void initAsynchronously(final Context context, final Locale locale,
84             final SuggestInitializationListener listener) {
85         resetMainDict(context, locale, listener);
86     }
87 
addOrReplaceDictionary( final ConcurrentHashMap<String, Dictionary> dictionaries, final String key, final Dictionary dict)88     private static void addOrReplaceDictionary(
89             final ConcurrentHashMap<String, Dictionary> dictionaries,
90             final String key, final Dictionary dict) {
91         final Dictionary oldDict = (dict == null)
92                 ? dictionaries.remove(key)
93                 : dictionaries.put(key, dict);
94         if (oldDict != null && dict != oldDict) {
95             oldDict.close();
96         }
97     }
98 
resetMainDict(final Context context, final Locale locale, final SuggestInitializationListener listener)99     public void resetMainDict(final Context context, final Locale locale,
100             final SuggestInitializationListener listener) {
101         mMainDictionary = null;
102         if (listener != null) {
103             listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
104         }
105         new Thread("InitializeBinaryDictionary") {
106             @Override
107             public void run() {
108                 final DictionaryCollection newMainDict =
109                         DictionaryFactory.createMainDictionaryFromManager(context, locale);
110                 addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_MAIN, newMainDict);
111                 mMainDictionary = newMainDict;
112                 if (listener != null) {
113                     listener.onUpdateMainDictionaryAvailability(hasMainDictionary());
114                 }
115             }
116         }.start();
117     }
118 
119     // The main dictionary could have been loaded asynchronously.  Don't cache the return value
120     // of this method.
hasMainDictionary()121     public boolean hasMainDictionary() {
122         return null != mMainDictionary && mMainDictionary.isInitialized();
123     }
124 
getMainDictionary()125     public Dictionary getMainDictionary() {
126         return mMainDictionary;
127     }
128 
getContactsDictionary()129     public ContactsBinaryDictionary getContactsDictionary() {
130         return mContactsDict;
131     }
132 
getUnigramDictionaries()133     public ConcurrentHashMap<String, Dictionary> getUnigramDictionaries() {
134         return mDictionaries;
135     }
136 
137     /**
138      * Sets an optional user dictionary resource to be loaded. The user dictionary is consulted
139      * before the main dictionary, if set. This refers to the system-managed user dictionary.
140      */
setUserDictionary(UserBinaryDictionary userDictionary)141     public void setUserDictionary(UserBinaryDictionary userDictionary) {
142         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER, userDictionary);
143     }
144 
145     /**
146      * Sets an optional contacts dictionary resource to be loaded. It is also possible to remove
147      * the contacts dictionary by passing null to this method. In this case no contacts dictionary
148      * won't be used.
149      */
setContactsDictionary(ContactsBinaryDictionary contactsDictionary)150     public void setContactsDictionary(ContactsBinaryDictionary contactsDictionary) {
151         mContactsDict = contactsDictionary;
152         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_CONTACTS, contactsDictionary);
153     }
154 
setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary)155     public void setUserHistoryDictionary(UserHistoryDictionary userHistoryDictionary) {
156         addOrReplaceDictionary(mDictionaries, Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
157     }
158 
setAutoCorrectionThreshold(float threshold)159     public void setAutoCorrectionThreshold(float threshold) {
160         mAutoCorrectionThreshold = threshold;
161     }
162 
getSuggestedWords( final WordComposer wordComposer, CharSequence prevWordForBigram, final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId)163     public SuggestedWords getSuggestedWords(
164             final WordComposer wordComposer, CharSequence prevWordForBigram,
165             final ProximityInfo proximityInfo, final boolean isCorrectionEnabled, int sessionId) {
166         LatinImeLogger.onStartSuggestion(prevWordForBigram);
167         if (wordComposer.isBatchMode()) {
168             return getSuggestedWordsForBatchInput(
169                     wordComposer, prevWordForBigram, proximityInfo, sessionId);
170         } else {
171             return getSuggestedWordsForTypingInput(wordComposer, prevWordForBigram, proximityInfo,
172                     isCorrectionEnabled);
173         }
174     }
175 
176     // Retrieves suggestions for the typing input.
getSuggestedWordsForTypingInput( final WordComposer wordComposer, CharSequence prevWordForBigram, final ProximityInfo proximityInfo, final boolean isCorrectionEnabled)177     private SuggestedWords getSuggestedWordsForTypingInput(
178             final WordComposer wordComposer, CharSequence prevWordForBigram,
179             final ProximityInfo proximityInfo, final boolean isCorrectionEnabled) {
180         final int trailingSingleQuotesCount = wordComposer.trailingSingleQuotesCount();
181         final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
182                 MAX_SUGGESTIONS);
183 
184         final String typedWord = wordComposer.getTypedWord();
185         final String consideredWord = trailingSingleQuotesCount > 0
186                 ? typedWord.substring(0, typedWord.length() - trailingSingleQuotesCount)
187                 : typedWord;
188         LatinImeLogger.onAddSuggestedWord(typedWord, Dictionary.TYPE_USER_TYPED);
189 
190         final WordComposer wordComposerForLookup;
191         if (trailingSingleQuotesCount > 0) {
192             wordComposerForLookup = new WordComposer(wordComposer);
193             for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
194                 wordComposerForLookup.deleteLast();
195             }
196         } else {
197             wordComposerForLookup = wordComposer;
198         }
199 
200         for (final String key : mDictionaries.keySet()) {
201             final Dictionary dictionary = mDictionaries.get(key);
202             suggestionsSet.addAll(dictionary.getSuggestions(
203                     wordComposerForLookup, prevWordForBigram, proximityInfo));
204         }
205 
206         final CharSequence whitelistedWord;
207         if (suggestionsSet.isEmpty()) {
208             whitelistedWord = null;
209         } else if (SuggestedWordInfo.KIND_WHITELIST != suggestionsSet.first().mKind) {
210             whitelistedWord = null;
211         } else {
212             whitelistedWord = suggestionsSet.first().mWord;
213         }
214 
215         // The word can be auto-corrected if it has a whitelist entry that is not itself,
216         // or if it's a 2+ characters non-word (i.e. it's not in the dictionary).
217         final boolean allowsToBeAutoCorrected = (null != whitelistedWord
218                 && !whitelistedWord.equals(consideredWord))
219                 || (consideredWord.length() > 1 && !AutoCorrection.isInTheDictionary(mDictionaries,
220                         consideredWord, wordComposer.isFirstCharCapitalized()));
221 
222         final boolean hasAutoCorrection;
223         // TODO: using isCorrectionEnabled here is not very good. It's probably useless, because
224         // any attempt to do auto-correction is already shielded with a test for this flag; at the
225         // same time, it feels wrong that the SuggestedWord object includes information about
226         // the current settings. It may also be useful to know, when the setting is off, whether
227         // the word *would* have been auto-corrected.
228         if (!isCorrectionEnabled || !allowsToBeAutoCorrected || !wordComposer.isComposingWord()
229                 || suggestionsSet.isEmpty() || wordComposer.hasDigits()
230                 || wordComposer.isMostlyCaps() || wordComposer.isResumed()
231                 || !hasMainDictionary()) {
232             // If we don't have a main dictionary, we never want to auto-correct. The reason for
233             // this is, the user may have a contact whose name happens to match a valid word in
234             // their language, and it will unexpectedly auto-correct. For example, if the user
235             // types in English with no dictionary and has a "Will" in their contact list, "will"
236             // would always auto-correct to "Will" which is unwanted. Hence, no main dict => no
237             // auto-correct.
238             hasAutoCorrection = false;
239         } else {
240             hasAutoCorrection = AutoCorrection.suggestionExceedsAutoCorrectionThreshold(
241                     suggestionsSet.first(), consideredWord, mAutoCorrectionThreshold);
242         }
243 
244         final ArrayList<SuggestedWordInfo> suggestionsContainer =
245                 CollectionUtils.newArrayList(suggestionsSet);
246         final int suggestionsCount = suggestionsContainer.size();
247         final boolean isFirstCharCapitalized = wordComposer.isFirstCharCapitalized();
248         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
249         if (isFirstCharCapitalized || isAllUpperCase || 0 != trailingSingleQuotesCount) {
250             for (int i = 0; i < suggestionsCount; ++i) {
251                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
252                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
253                         wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
254                         trailingSingleQuotesCount);
255                 suggestionsContainer.set(i, transformedWordInfo);
256             }
257         }
258 
259         for (int i = 0; i < suggestionsCount; ++i) {
260             final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
261             LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
262         }
263 
264         if (!TextUtils.isEmpty(typedWord)) {
265             suggestionsContainer.add(0, new SuggestedWordInfo(typedWord,
266                     SuggestedWordInfo.MAX_SCORE, SuggestedWordInfo.KIND_TYPED,
267                     Dictionary.TYPE_USER_TYPED));
268         }
269         SuggestedWordInfo.removeDups(suggestionsContainer);
270 
271         final ArrayList<SuggestedWordInfo> suggestionsList;
272         if (DBG && !suggestionsContainer.isEmpty()) {
273             suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWord, suggestionsContainer);
274         } else {
275             suggestionsList = suggestionsContainer;
276         }
277 
278         return new SuggestedWords(suggestionsList,
279                 // TODO: this first argument is lying. If this is a whitelisted word which is an
280                 // actual word, it says typedWordValid = false, which looks wrong. We should either
281                 // rename the attribute or change the value.
282                 !allowsToBeAutoCorrected /* typedWordValid */,
283                 hasAutoCorrection, /* willAutoCorrect */
284                 false /* isPunctuationSuggestions */,
285                 false /* isObsoleteSuggestions */,
286                 !wordComposer.isComposingWord() /* isPrediction */);
287     }
288 
289     // Retrieves suggestions for the batch input.
getSuggestedWordsForBatchInput( final WordComposer wordComposer, CharSequence prevWordForBigram, final ProximityInfo proximityInfo, int sessionId)290     private SuggestedWords getSuggestedWordsForBatchInput(
291             final WordComposer wordComposer, CharSequence prevWordForBigram,
292             final ProximityInfo proximityInfo, int sessionId) {
293         final BoundedTreeSet suggestionsSet = new BoundedTreeSet(sSuggestedWordInfoComparator,
294                 MAX_SUGGESTIONS);
295 
296         // At second character typed, search the unigrams (scores being affected by bigrams)
297         for (final String key : mDictionaries.keySet()) {
298             // Skip User history dictionary for lookup
299             // TODO: The user history dictionary should just override getSuggestionsWithSessionId
300             // to make sure it doesn't return anything and we should remove this test
301             if (key.equals(Dictionary.TYPE_USER_HISTORY)) {
302                 continue;
303             }
304             final Dictionary dictionary = mDictionaries.get(key);
305             suggestionsSet.addAll(dictionary.getSuggestionsWithSessionId(
306                     wordComposer, prevWordForBigram, proximityInfo, sessionId));
307         }
308 
309         for (SuggestedWordInfo wordInfo : suggestionsSet) {
310             LatinImeLogger.onAddSuggestedWord(wordInfo.mWord.toString(), wordInfo.mSourceDict);
311         }
312 
313         final ArrayList<SuggestedWordInfo> suggestionsContainer =
314                 CollectionUtils.newArrayList(suggestionsSet);
315         final int suggestionsCount = suggestionsContainer.size();
316         final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
317         final boolean isAllUpperCase = wordComposer.isAllUpperCase();
318         if (isFirstCharCapitalized || isAllUpperCase) {
319             for (int i = 0; i < suggestionsCount; ++i) {
320                 final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
321                 final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
322                         wordInfo, mLocale, isAllUpperCase, isFirstCharCapitalized,
323                         0 /* trailingSingleQuotesCount */);
324                 suggestionsContainer.set(i, transformedWordInfo);
325             }
326         }
327 
328         SuggestedWordInfo.removeDups(suggestionsContainer);
329         // In the batch input mode, the most relevant suggested word should act as a "typed word"
330         // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
331         return new SuggestedWords(suggestionsContainer,
332                 true /* typedWordValid */,
333                 false /* willAutoCorrect */,
334                 false /* isPunctuationSuggestions */,
335                 false /* isObsoleteSuggestions */,
336                 false /* isPrediction */);
337     }
338 
getSuggestionsInfoListWithDebugInfo( final String typedWord, final ArrayList<SuggestedWordInfo> suggestions)339     private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
340             final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
341         final SuggestedWordInfo typedWordInfo = suggestions.get(0);
342         typedWordInfo.setDebugString("+");
343         final int suggestionsSize = suggestions.size();
344         final ArrayList<SuggestedWordInfo> suggestionsList =
345                 CollectionUtils.newArrayList(suggestionsSize);
346         suggestionsList.add(typedWordInfo);
347         // Note: i here is the index in mScores[], but the index in mSuggestions is one more
348         // than i because we added the typed word to mSuggestions without touching mScores.
349         for (int i = 0; i < suggestionsSize - 1; ++i) {
350             final SuggestedWordInfo cur = suggestions.get(i + 1);
351             final float normalizedScore = BinaryDictionary.calcNormalizedScore(
352                     typedWord, cur.toString(), cur.mScore);
353             final String scoreInfoString;
354             if (normalizedScore > 0) {
355                 scoreInfoString = String.format("%d (%4.2f)", cur.mScore, normalizedScore);
356             } else {
357                 scoreInfoString = Integer.toString(cur.mScore);
358             }
359             cur.setDebugString(scoreInfoString);
360             suggestionsList.add(cur);
361         }
362         return suggestionsList;
363     }
364 
365     private static final class SuggestedWordInfoComparator
366             implements Comparator<SuggestedWordInfo> {
367         // This comparator ranks the word info with the higher frequency first. That's because
368         // that's the order we want our elements in.
369         @Override
compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2)370         public int compare(final SuggestedWordInfo o1, final SuggestedWordInfo o2) {
371             if (o1.mScore > o2.mScore) return -1;
372             if (o1.mScore < o2.mScore) return 1;
373             if (o1.mCodePointCount < o2.mCodePointCount) return -1;
374             if (o1.mCodePointCount > o2.mCodePointCount) return 1;
375             return o1.mWord.toString().compareTo(o2.mWord.toString());
376         }
377     }
378     private static final SuggestedWordInfoComparator sSuggestedWordInfoComparator =
379             new SuggestedWordInfoComparator();
380 
getTransformedSuggestedWordInfo( final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount)381     private static SuggestedWordInfo getTransformedSuggestedWordInfo(
382             final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
383             final boolean isFirstCharCapitalized, final int trailingSingleQuotesCount) {
384         final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
385         if (isAllUpperCase) {
386             sb.append(wordInfo.mWord.toString().toUpperCase(locale));
387         } else if (isFirstCharCapitalized) {
388             sb.append(StringUtils.toTitleCase(wordInfo.mWord.toString(), locale));
389         } else {
390             sb.append(wordInfo.mWord);
391         }
392         for (int i = trailingSingleQuotesCount - 1; i >= 0; --i) {
393             sb.appendCodePoint(Keyboard.CODE_SINGLE_QUOTE);
394         }
395         return new SuggestedWordInfo(sb, wordInfo.mScore, wordInfo.mKind, wordInfo.mSourceDict);
396     }
397 
close()398     public void close() {
399         final HashSet<Dictionary> dictionaries = CollectionUtils.newHashSet();
400         dictionaries.addAll(mDictionaries.values());
401         for (final Dictionary dictionary : dictionaries) {
402             dictionary.close();
403         }
404         mMainDictionary = null;
405     }
406 }
407