• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.content.Context;
20 import android.content.SharedPreferences;
21 import android.content.pm.PackageManager.NameNotFoundException;
22 import android.content.res.AssetFileDescriptor;
23 import android.content.res.Resources;
24 import android.util.Log;
25 
26 import java.io.File;
27 import java.util.ArrayList;
28 import java.util.List;
29 import java.util.Locale;
30 
31 /**
32  * Helper class to get the address of a mmap'able dictionary file.
33  */
34 class BinaryDictionaryGetter {
35 
36     /**
37      * Used for Log actions from this class
38      */
39     private static final String TAG = BinaryDictionaryGetter.class.getSimpleName();
40 
41     /**
42      * Used to return empty lists
43      */
44     private static final File[] EMPTY_FILE_ARRAY = new File[0];
45 
46     /**
47      * Name of the common preferences name to know which word list are on and which are off.
48      */
49     private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs";
50 
51     // Prevents this from being instantiated
BinaryDictionaryGetter()52     private BinaryDictionaryGetter() {}
53 
54     /**
55      * Returns whether we may want to use this character as part of a file name.
56      *
57      * This basically only accepts ascii letters and numbers, and rejects everything else.
58      */
isFileNameCharacter(int codePoint)59     private static boolean isFileNameCharacter(int codePoint) {
60         if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
61         if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
62         if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
63         return codePoint == '_'; // Underscore
64     }
65 
66     /**
67      * Escapes a string for any characters that may be suspicious for a file or directory name.
68      *
69      * Concretely this does a sort of URL-encoding except it will encode everything that's not
70      * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
71      * we cannot allow here)
72      */
73     // TODO: create a unit test for this method
replaceFileNameDangerousCharacters(final String name)74     private static String replaceFileNameDangerousCharacters(final String name) {
75         // This assumes '%' is fully available as a non-separator, normal
76         // character in a file name. This is probably true for all file systems.
77         final StringBuilder sb = new StringBuilder();
78         for (int i = 0; i < name.length(); ++i) {
79             final int codePoint = name.codePointAt(i);
80             if (isFileNameCharacter(codePoint)) {
81                 sb.appendCodePoint(codePoint);
82             } else {
83                 // 6 digits - unicode is limited to 21 bits
84                 sb.append(String.format((Locale)null, "%%%1$06x", codePoint));
85             }
86         }
87         return sb.toString();
88     }
89 
90     /**
91      * Reverse escaping done by replaceFileNameDangerousCharacters.
92      */
getWordListIdFromFileName(final String fname)93     private static String getWordListIdFromFileName(final String fname) {
94         final StringBuilder sb = new StringBuilder();
95         for (int i = 0; i < fname.length(); ++i) {
96             final int codePoint = fname.codePointAt(i);
97             if ('%' != codePoint) {
98                 sb.appendCodePoint(codePoint);
99             } else {
100                 final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16);
101                 i += 6;
102                 sb.appendCodePoint(encodedCodePoint);
103             }
104         }
105         return sb.toString();
106     }
107 
108     /**
109      * Helper method to get the top level cache directory.
110      */
getWordListCacheDirectory(final Context context)111     private static String getWordListCacheDirectory(final Context context) {
112         return context.getFilesDir() + File.separator + "dicts";
113     }
114 
115     /**
116      * Find out the cache directory associated with a specific locale.
117      */
getCacheDirectoryForLocale(final String locale, final Context context)118     private static String getCacheDirectoryForLocale(final String locale, final Context context) {
119         final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
120         final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
121                 + relativeDirectoryName;
122         final File directory = new File(absoluteDirectoryName);
123         if (!directory.exists()) {
124             if (!directory.mkdirs()) {
125                 Log.e(TAG, "Could not create the directory for locale" + locale);
126             }
127         }
128         return absoluteDirectoryName;
129     }
130 
131     /**
132      * Generates a file name for the id and locale passed as an argument.
133      *
134      * In the current implementation the file name returned will always be unique for
135      * any id/locale pair, but please do not expect that the id can be the same for
136      * different dictionaries with different locales. An id should be unique for any
137      * dictionary.
138      * The file name is pretty much an URL-encoded version of the id inside a directory
139      * named like the locale, except it will also escape characters that look dangerous
140      * to some file systems.
141      * @param id the id of the dictionary for which to get a file name
142      * @param locale the locale for which to get the file name as a string
143      * @param context the context to use for getting the directory
144      * @return the name of the file to be created
145      */
getCacheFileName(String id, String locale, Context context)146     public static String getCacheFileName(String id, String locale, Context context) {
147         final String fileName = replaceFileNameDangerousCharacters(id);
148         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
149     }
150 
151     /**
152      * Returns a file address from a resource, or null if it cannot be opened.
153      */
loadFallbackResource(final Context context, final int fallbackResId, final Locale locale)154     private static AssetFileAddress loadFallbackResource(final Context context,
155             final int fallbackResId, final Locale locale) {
156         final Resources res = context.getResources();
157         final Locale savedLocale = LocaleUtils.setSystemLocale(res, locale);
158         final AssetFileDescriptor afd = res.openRawResourceFd(fallbackResId);
159         LocaleUtils.setSystemLocale(res, savedLocale);
160 
161         if (afd == null) {
162             Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId="
163                     + fallbackResId);
164             return null;
165         }
166         return AssetFileAddress.makeFromFileNameAndOffset(
167                 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength());
168     }
169 
170     static private class DictPackSettings {
171         final SharedPreferences mDictPreferences;
DictPackSettings(final Context context)172         public DictPackSettings(final Context context) {
173             Context dictPackContext = null;
174             try {
175                 final String dictPackName =
176                         context.getString(R.string.dictionary_pack_package_name);
177                 dictPackContext = context.createPackageContext(dictPackName, 0);
178             } catch (NameNotFoundException e) {
179                 // The dictionary pack is not installed...
180                 // TODO: fallback on the built-in dict, see the TODO above
181                 Log.e(TAG, "Could not find a dictionary pack");
182             }
183             mDictPreferences = null == dictPackContext ? null
184                     : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME,
185                             Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS);
186         }
isWordListActive(final String dictId)187         public boolean isWordListActive(final String dictId) {
188             if (null == mDictPreferences) {
189                 // If we don't have preferences it basically means we can't find the dictionary
190                 // pack - either it's not installed, or it's disabled, or there is some strange
191                 // bug. Either way, a word list with no settings should be on by default: default
192                 // dictionaries in LatinIME are on if there is no settings at all, and if for some
193                 // reason some dictionaries have been installed BUT the dictionary pack can't be
194                 // found anymore it's safer to actually supply installed dictionaries.
195                 return true;
196             } else {
197                 // The default is true here for the same reasons as above. We got the dictionary
198                 // pack but if we don't have any settings for it it means the user has never been
199                 // to the settings yet. So by default, the main dictionaries should be on.
200                 return mDictPreferences.getBoolean(dictId, true);
201             }
202         }
203     }
204 
205     /**
206      * Helper method to the list of cache directories, one for each distinct locale.
207      */
getCachedDirectoryList(final Context context)208     private static File[] getCachedDirectoryList(final Context context) {
209         return new File(getWordListCacheDirectory(context)).listFiles();
210     }
211 
212     /**
213      * Returns the list of cached files for a specific locale.
214      *
215      * @param locale the locale to find the dictionary files for, as a string.
216      * @param context the context on which to open the files upon.
217      * @return an array of binary dictionary files, which may be empty but may not be null.
218      */
getCachedWordLists(final String locale, final Context context)219     private static File[] getCachedWordLists(final String locale,
220             final Context context) {
221         final File[] directoryList = getCachedDirectoryList(context);
222         if (null == directoryList) return EMPTY_FILE_ARRAY;
223         final ArrayList<File> cacheFiles = new ArrayList<File>();
224         for (File directory : directoryList) {
225             if (!directory.isDirectory()) continue;
226             final String dirLocale = getWordListIdFromFileName(directory.getName());
227             if (LocaleUtils.isMatch(LocaleUtils.getMatchLevel(dirLocale, locale))) {
228                 final File[] wordLists = directory.listFiles();
229                 if (null != wordLists) {
230                     for (File wordList : wordLists) {
231                         cacheFiles.add(wordList);
232                     }
233                 }
234             }
235         }
236         if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
237         return cacheFiles.toArray(EMPTY_FILE_ARRAY);
238     }
239 
240     /**
241      * Returns the id associated with the main word list for a specified locale.
242      *
243      * Word lists stored in Android Keyboard's resources are referred to as the "main"
244      * word lists. Since they can be updated like any other list, we need to assign a
245      * unique ID to them. This ID is just the name of the language (locale-wise) they
246      * are for, and this method returns this ID.
247      */
getMainDictId(final Locale locale)248     private static String getMainDictId(final Locale locale) {
249         // This works because we don't include by default different dictionaries for
250         // different countries. This actually needs to return the id that we would
251         // like to use for word lists included in resources, and the following is okay.
252         return locale.getLanguage().toString();
253     }
254 
255     /**
256      * Returns a list of file addresses for a given locale, trying relevant methods in order.
257      *
258      * Tries to get binary dictionaries from various sources, in order:
259      * - Uses a content provider to get a public dictionary set, as per the protocol described
260      *   in BinaryDictionaryFileDumper.
261      * If that fails:
262      * - Gets a file name from the fallback resource passed as an argument.
263      * If that fails:
264      * - Returns null.
265      * @return The address of a valid file, or null.
266      */
getDictionaryFiles(final Locale locale, final Context context, final int fallbackResId)267     public static List<AssetFileAddress> getDictionaryFiles(final Locale locale,
268             final Context context, final int fallbackResId) {
269 
270         // cacheWordListsFromContentProvider returns the list of files it copied to local
271         // storage, but we don't really care about what was copied NOW: what we want is the
272         // list of everything we ever cached, so we ignore the return value.
273         BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context);
274         final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
275 
276         final String mainDictId = getMainDictId(locale);
277 
278         final DictPackSettings dictPackSettings = new DictPackSettings(context);
279 
280         boolean foundMainDict = false;
281         final ArrayList<AssetFileAddress> fileList = new ArrayList<AssetFileAddress>();
282         // cachedWordLists may not be null, see doc for getCachedDictionaryList
283         for (final File f : cachedWordLists) {
284             final String wordListId = getWordListIdFromFileName(f.getName());
285             if (wordListId.equals(mainDictId)) {
286                 foundMainDict = true;
287             }
288             if (!dictPackSettings.isWordListActive(wordListId)) continue;
289             if (f.canRead()) {
290                 fileList.add(AssetFileAddress.makeFromFileName(f.getPath()));
291             } else {
292                 Log.e(TAG, "Found a cached dictionary file but cannot read it");
293             }
294         }
295 
296         if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) {
297             final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId,
298                     locale);
299             if (null != fallbackAsset) {
300                 fileList.add(fallbackAsset);
301             }
302         }
303 
304         return fileList;
305     }
306 }
307