• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5  * use this file except in compliance with the License. You may obtain a copy of
6  * the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13  * License for the specific language governing permissions and limitations under
14  * the License.
15  */
16 
17 package com.android.inputmethod.latin;
18 
19 import android.content.Context;
20 import android.content.SharedPreferences;
21 import android.content.pm.PackageManager.NameNotFoundException;
22 import android.content.res.AssetFileDescriptor;
23 import android.util.Log;
24 
25 import java.io.File;
26 import java.util.ArrayList;
27 import java.util.HashMap;
28 import java.util.Locale;
29 
30 /**
31  * Helper class to get the address of a mmap'able dictionary file.
32  */
33 class BinaryDictionaryGetter {
34 
35     /**
36      * Used for Log actions from this class
37      */
38     private static final String TAG = BinaryDictionaryGetter.class.getSimpleName();
39 
40     /**
41      * Used to return empty lists
42      */
43     private static final File[] EMPTY_FILE_ARRAY = new File[0];
44 
45     /**
46      * Name of the common preferences name to know which word list are on and which are off.
47      */
48     private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs";
49 
50     // Name of the category for the main dictionary
51     private static final String MAIN_DICTIONARY_CATEGORY = "main";
52     public static final String ID_CATEGORY_SEPARATOR = ":";
53 
54     // Prevents this from being instantiated
BinaryDictionaryGetter()55     private BinaryDictionaryGetter() {}
56 
57     /**
58      * Returns whether we may want to use this character as part of a file name.
59      *
60      * This basically only accepts ascii letters and numbers, and rejects everything else.
61      */
isFileNameCharacter(int codePoint)62     private static boolean isFileNameCharacter(int codePoint) {
63         if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
64         if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
65         if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
66         return codePoint == '_'; // Underscore
67     }
68 
69     /**
70      * Escapes a string for any characters that may be suspicious for a file or directory name.
71      *
72      * Concretely this does a sort of URL-encoding except it will encode everything that's not
73      * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
74      * we cannot allow here)
75      */
76     // TODO: create a unit test for this method
replaceFileNameDangerousCharacters(final String name)77     private static String replaceFileNameDangerousCharacters(final String name) {
78         // This assumes '%' is fully available as a non-separator, normal
79         // character in a file name. This is probably true for all file systems.
80         final StringBuilder sb = new StringBuilder();
81         final int nameLength = name.length();
82         for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
83             final int codePoint = name.codePointAt(i);
84             if (isFileNameCharacter(codePoint)) {
85                 sb.appendCodePoint(codePoint);
86             } else {
87                 // 6 digits - unicode is limited to 21 bits
88                 sb.append(String.format((Locale)null, "%%%1$06x", codePoint));
89             }
90         }
91         return sb.toString();
92     }
93 
94     /**
95      * Reverse escaping done by replaceFileNameDangerousCharacters.
96      */
getWordListIdFromFileName(final String fname)97     private static String getWordListIdFromFileName(final String fname) {
98         final StringBuilder sb = new StringBuilder();
99         final int fnameLength = fname.length();
100         for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
101             final int codePoint = fname.codePointAt(i);
102             if ('%' != codePoint) {
103                 sb.appendCodePoint(codePoint);
104             } else {
105                 final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16);
106                 i += 6;
107                 sb.appendCodePoint(encodedCodePoint);
108             }
109         }
110         return sb.toString();
111     }
112 
113     /**
114      * Helper method to get the top level cache directory.
115      */
getWordListCacheDirectory(final Context context)116     private static String getWordListCacheDirectory(final Context context) {
117         return context.getFilesDir() + File.separator + "dicts";
118     }
119 
120     /**
121      * Find out the cache directory associated with a specific locale.
122      */
getCacheDirectoryForLocale(final String locale, final Context context)123     private static String getCacheDirectoryForLocale(final String locale, final Context context) {
124         final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
125         final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
126                 + relativeDirectoryName;
127         final File directory = new File(absoluteDirectoryName);
128         if (!directory.exists()) {
129             if (!directory.mkdirs()) {
130                 Log.e(TAG, "Could not create the directory for locale" + locale);
131             }
132         }
133         return absoluteDirectoryName;
134     }
135 
136     /**
137      * Generates a file name for the id and locale passed as an argument.
138      *
139      * In the current implementation the file name returned will always be unique for
140      * any id/locale pair, but please do not expect that the id can be the same for
141      * different dictionaries with different locales. An id should be unique for any
142      * dictionary.
143      * The file name is pretty much an URL-encoded version of the id inside a directory
144      * named like the locale, except it will also escape characters that look dangerous
145      * to some file systems.
146      * @param id the id of the dictionary for which to get a file name
147      * @param locale the locale for which to get the file name as a string
148      * @param context the context to use for getting the directory
149      * @return the name of the file to be created
150      */
getCacheFileName(String id, String locale, Context context)151     public static String getCacheFileName(String id, String locale, Context context) {
152         final String fileName = replaceFileNameDangerousCharacters(id);
153         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
154     }
155 
156     /**
157      * Returns a file address from a resource, or null if it cannot be opened.
158      */
loadFallbackResource(final Context context, final int fallbackResId)159     private static AssetFileAddress loadFallbackResource(final Context context,
160             final int fallbackResId) {
161         final AssetFileDescriptor afd = context.getResources().openRawResourceFd(fallbackResId);
162         if (afd == null) {
163             Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId="
164                     + fallbackResId);
165             return null;
166         }
167         return AssetFileAddress.makeFromFileNameAndOffset(
168                 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength());
169     }
170 
171     static private class DictPackSettings {
172         final SharedPreferences mDictPreferences;
DictPackSettings(final Context context)173         public DictPackSettings(final Context context) {
174             Context dictPackContext = null;
175             try {
176                 final String dictPackName =
177                         context.getString(R.string.dictionary_pack_package_name);
178                 dictPackContext = context.createPackageContext(dictPackName, 0);
179             } catch (NameNotFoundException e) {
180                 // The dictionary pack is not installed...
181                 // TODO: fallback on the built-in dict, see the TODO above
182                 Log.e(TAG, "Could not find a dictionary pack");
183             }
184             mDictPreferences = null == dictPackContext ? null
185                     : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME,
186                             Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS);
187         }
isWordListActive(final String dictId)188         public boolean isWordListActive(final String dictId) {
189             if (null == mDictPreferences) {
190                 // If we don't have preferences it basically means we can't find the dictionary
191                 // pack - either it's not installed, or it's disabled, or there is some strange
192                 // bug. Either way, a word list with no settings should be on by default: default
193                 // dictionaries in LatinIME are on if there is no settings at all, and if for some
194                 // reason some dictionaries have been installed BUT the dictionary pack can't be
195                 // found anymore it's safer to actually supply installed dictionaries.
196                 return true;
197             } else {
198                 // The default is true here for the same reasons as above. We got the dictionary
199                 // pack but if we don't have any settings for it it means the user has never been
200                 // to the settings yet. So by default, the main dictionaries should be on.
201                 return mDictPreferences.getBoolean(dictId, true);
202             }
203         }
204     }
205 
206     /**
207      * Helper method to the list of cache directories, one for each distinct locale.
208      */
getCachedDirectoryList(final Context context)209     private static File[] getCachedDirectoryList(final Context context) {
210         return new File(getWordListCacheDirectory(context)).listFiles();
211     }
212 
213     /**
214      * Returns the category for a given file name.
215      *
216      * This parses the file name, extracts the category, and returns it. See
217      * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
218      * @return The category as a string or null if it can't be found in the file name.
219      */
getCategoryFromFileName(final String fileName)220     private static String getCategoryFromFileName(final String fileName) {
221         final String id = getWordListIdFromFileName(fileName);
222         final String[] idArray = id.split(ID_CATEGORY_SEPARATOR);
223         if (2 != idArray.length) return null;
224         return idArray[0];
225     }
226 
227     /**
228      * Utility class for the {@link #getCachedWordLists} method
229      */
230     private static class FileAndMatchLevel {
231         final File mFile;
232         final int mMatchLevel;
FileAndMatchLevel(final File file, final int matchLevel)233         public FileAndMatchLevel(final File file, final int matchLevel) {
234             mFile = file;
235             mMatchLevel = matchLevel;
236         }
237     }
238 
239     /**
240      * Returns the list of cached files for a specific locale, one for each category.
241      *
242      * This will return exactly one file for each word list category that matches
243      * the passed locale. If several files match the locale for any given category,
244      * this returns the file with the closest match to the locale. For example, if
245      * the passed word list is en_US, and for a category we have an en and an en_US
246      * word list available, we'll return only the en_US one.
247      * Thus, the list will contain as many files as there are categories.
248      *
249      * @param locale the locale to find the dictionary files for, as a string.
250      * @param context the context on which to open the files upon.
251      * @return an array of binary dictionary files, which may be empty but may not be null.
252      */
getCachedWordLists(final String locale, final Context context)253     private static File[] getCachedWordLists(final String locale,
254             final Context context) {
255         final File[] directoryList = getCachedDirectoryList(context);
256         if (null == directoryList) return EMPTY_FILE_ARRAY;
257         final HashMap<String, FileAndMatchLevel> cacheFiles =
258                 new HashMap<String, FileAndMatchLevel>();
259         for (File directory : directoryList) {
260             if (!directory.isDirectory()) continue;
261             final String dirLocale = getWordListIdFromFileName(directory.getName());
262             final int matchLevel = LocaleUtils.getMatchLevel(dirLocale, locale);
263             if (LocaleUtils.isMatch(matchLevel)) {
264                 final File[] wordLists = directory.listFiles();
265                 if (null != wordLists) {
266                     for (File wordList : wordLists) {
267                         final String category = getCategoryFromFileName(wordList.getName());
268                         final FileAndMatchLevel currentBestMatch = cacheFiles.get(category);
269                         if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) {
270                             cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel));
271                         }
272                     }
273                 }
274             }
275         }
276         if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY;
277         final File[] result = new File[cacheFiles.size()];
278         int index = 0;
279         for (final FileAndMatchLevel entry : cacheFiles.values()) {
280             result[index++] = entry.mFile;
281         }
282         return result;
283     }
284 
285     /**
286      * Remove all files with the passed id, except the passed file.
287      *
288      * If a dictionary with a given ID has a metadata change that causes it to change
289      * path, we need to remove the old version. The only way to do this is to check all
290      * installed files for a matching ID in a different directory.
291      */
removeFilesWithIdExcept(final Context context, final String id, final File fileToKeep)292     public static void removeFilesWithIdExcept(final Context context, final String id,
293             final File fileToKeep) {
294         try {
295             final File canonicalFileToKeep = fileToKeep.getCanonicalFile();
296             final File[] directoryList = getCachedDirectoryList(context);
297             if (null == directoryList) return;
298             for (File directory : directoryList) {
299                 // There is one directory per locale. See #getCachedDirectoryList
300                 if (!directory.isDirectory()) continue;
301                 final File[] wordLists = directory.listFiles();
302                 if (null == wordLists) continue;
303                 for (File wordList : wordLists) {
304                     final String fileId = getWordListIdFromFileName(wordList.getName());
305                     if (fileId.equals(id)) {
306                         if (!canonicalFileToKeep.equals(wordList.getCanonicalFile())) {
307                             wordList.delete();
308                         }
309                     }
310                 }
311             }
312         } catch (java.io.IOException e) {
313             Log.e(TAG, "IOException trying to cleanup files : " + e);
314         }
315     }
316 
317 
318     /**
319      * Returns the id associated with the main word list for a specified locale.
320      *
321      * Word lists stored in Android Keyboard's resources are referred to as the "main"
322      * word lists. Since they can be updated like any other list, we need to assign a
323      * unique ID to them. This ID is just the name of the language (locale-wise) they
324      * are for, and this method returns this ID.
325      */
getMainDictId(final Locale locale)326     private static String getMainDictId(final Locale locale) {
327         // This works because we don't include by default different dictionaries for
328         // different countries. This actually needs to return the id that we would
329         // like to use for word lists included in resources, and the following is okay.
330         return MAIN_DICTIONARY_CATEGORY + ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
331     }
332 
isMainWordListId(final String id)333     private static boolean isMainWordListId(final String id) {
334         final String[] idArray = id.split(ID_CATEGORY_SEPARATOR);
335         if (2 != idArray.length) return false;
336         return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
337     }
338 
339     /**
340      * Returns a list of file addresses for a given locale, trying relevant methods in order.
341      *
342      * Tries to get binary dictionaries from various sources, in order:
343      * - Uses a content provider to get a public dictionary set, as per the protocol described
344      *   in BinaryDictionaryFileDumper.
345      * If that fails:
346      * - Gets a file name from the built-in dictionary for this locale, if any.
347      * If that fails:
348      * - Returns null.
349      * @return The list of addresses of valid dictionary files, or null.
350      */
getDictionaryFiles(final Locale locale, final Context context)351     public static ArrayList<AssetFileAddress> getDictionaryFiles(final Locale locale,
352             final Context context) {
353 
354         final boolean hasDefaultWordList = DictionaryFactory.isDictionaryAvailable(context, locale);
355         // cacheWordListsFromContentProvider returns the list of files it copied to local
356         // storage, but we don't really care about what was copied NOW: what we want is the
357         // list of everything we ever cached, so we ignore the return value.
358         BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context,
359                 hasDefaultWordList);
360         final File[] cachedWordLists = getCachedWordLists(locale.toString(), context);
361         final String mainDictId = getMainDictId(locale);
362         final DictPackSettings dictPackSettings = new DictPackSettings(context);
363 
364         boolean foundMainDict = false;
365         final ArrayList<AssetFileAddress> fileList = new ArrayList<AssetFileAddress>();
366         // cachedWordLists may not be null, see doc for getCachedDictionaryList
367         for (final File f : cachedWordLists) {
368             final String wordListId = getWordListIdFromFileName(f.getName());
369             if (isMainWordListId(wordListId)) {
370                 foundMainDict = true;
371             }
372             if (!dictPackSettings.isWordListActive(wordListId)) continue;
373             if (f.canRead()) {
374                 fileList.add(AssetFileAddress.makeFromFileName(f.getPath()));
375             } else {
376                 Log.e(TAG, "Found a cached dictionary file but cannot read it");
377             }
378         }
379 
380         if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) {
381             final int fallbackResId =
382                     DictionaryFactory.getMainDictionaryResourceId(context.getResources(), locale);
383             final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId);
384             if (null != fallbackAsset) {
385                 fileList.add(fallbackAsset);
386             }
387         }
388 
389         return fileList;
390     }
391 }
392