1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.content.SharedPreferences; 21 import android.content.pm.PackageManager.NameNotFoundException; 22 import android.content.res.AssetFileDescriptor; 23 import android.util.Log; 24 25 import java.io.File; 26 import java.util.ArrayList; 27 import java.util.HashMap; 28 import java.util.Locale; 29 30 /** 31 * Helper class to get the address of a mmap'able dictionary file. 32 */ 33 class BinaryDictionaryGetter { 34 35 /** 36 * Used for Log actions from this class 37 */ 38 private static final String TAG = BinaryDictionaryGetter.class.getSimpleName(); 39 40 /** 41 * Used to return empty lists 42 */ 43 private static final File[] EMPTY_FILE_ARRAY = new File[0]; 44 45 /** 46 * Name of the common preferences name to know which word list are on and which are off. 47 */ 48 private static final String COMMON_PREFERENCES_NAME = "LatinImeDictPrefs"; 49 50 // Name of the category for the main dictionary 51 private static final String MAIN_DICTIONARY_CATEGORY = "main"; 52 public static final String ID_CATEGORY_SEPARATOR = ":"; 53 54 // Prevents this from being instantiated BinaryDictionaryGetter()55 private BinaryDictionaryGetter() {} 56 57 /** 58 * Returns whether we may want to use this character as part of a file name. 59 * 60 * This basically only accepts ascii letters and numbers, and rejects everything else. 61 */ isFileNameCharacter(int codePoint)62 private static boolean isFileNameCharacter(int codePoint) { 63 if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit 64 if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase 65 if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase 66 return codePoint == '_'; // Underscore 67 } 68 69 /** 70 * Escapes a string for any characters that may be suspicious for a file or directory name. 71 * 72 * Concretely this does a sort of URL-encoding except it will encode everything that's not 73 * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which 74 * we cannot allow here) 75 */ 76 // TODO: create a unit test for this method replaceFileNameDangerousCharacters(final String name)77 private static String replaceFileNameDangerousCharacters(final String name) { 78 // This assumes '%' is fully available as a non-separator, normal 79 // character in a file name. This is probably true for all file systems. 80 final StringBuilder sb = new StringBuilder(); 81 final int nameLength = name.length(); 82 for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { 83 final int codePoint = name.codePointAt(i); 84 if (isFileNameCharacter(codePoint)) { 85 sb.appendCodePoint(codePoint); 86 } else { 87 // 6 digits - unicode is limited to 21 bits 88 sb.append(String.format((Locale)null, "%%%1$06x", codePoint)); 89 } 90 } 91 return sb.toString(); 92 } 93 94 /** 95 * Reverse escaping done by replaceFileNameDangerousCharacters. 96 */ getWordListIdFromFileName(final String fname)97 private static String getWordListIdFromFileName(final String fname) { 98 final StringBuilder sb = new StringBuilder(); 99 final int fnameLength = fname.length(); 100 for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { 101 final int codePoint = fname.codePointAt(i); 102 if ('%' != codePoint) { 103 sb.appendCodePoint(codePoint); 104 } else { 105 final int encodedCodePoint = Integer.parseInt(fname.substring(i + 1, i + 7), 16); 106 i += 6; 107 sb.appendCodePoint(encodedCodePoint); 108 } 109 } 110 return sb.toString(); 111 } 112 113 /** 114 * Helper method to get the top level cache directory. 115 */ getWordListCacheDirectory(final Context context)116 private static String getWordListCacheDirectory(final Context context) { 117 return context.getFilesDir() + File.separator + "dicts"; 118 } 119 120 /** 121 * Find out the cache directory associated with a specific locale. 122 */ getCacheDirectoryForLocale(final String locale, final Context context)123 private static String getCacheDirectoryForLocale(final String locale, final Context context) { 124 final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); 125 final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator 126 + relativeDirectoryName; 127 final File directory = new File(absoluteDirectoryName); 128 if (!directory.exists()) { 129 if (!directory.mkdirs()) { 130 Log.e(TAG, "Could not create the directory for locale" + locale); 131 } 132 } 133 return absoluteDirectoryName; 134 } 135 136 /** 137 * Generates a file name for the id and locale passed as an argument. 138 * 139 * In the current implementation the file name returned will always be unique for 140 * any id/locale pair, but please do not expect that the id can be the same for 141 * different dictionaries with different locales. An id should be unique for any 142 * dictionary. 143 * The file name is pretty much an URL-encoded version of the id inside a directory 144 * named like the locale, except it will also escape characters that look dangerous 145 * to some file systems. 146 * @param id the id of the dictionary for which to get a file name 147 * @param locale the locale for which to get the file name as a string 148 * @param context the context to use for getting the directory 149 * @return the name of the file to be created 150 */ getCacheFileName(String id, String locale, Context context)151 public static String getCacheFileName(String id, String locale, Context context) { 152 final String fileName = replaceFileNameDangerousCharacters(id); 153 return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; 154 } 155 156 /** 157 * Returns a file address from a resource, or null if it cannot be opened. 158 */ loadFallbackResource(final Context context, final int fallbackResId)159 private static AssetFileAddress loadFallbackResource(final Context context, 160 final int fallbackResId) { 161 final AssetFileDescriptor afd = context.getResources().openRawResourceFd(fallbackResId); 162 if (afd == null) { 163 Log.e(TAG, "Found the resource but cannot read it. Is it compressed? resId=" 164 + fallbackResId); 165 return null; 166 } 167 return AssetFileAddress.makeFromFileNameAndOffset( 168 context.getApplicationInfo().sourceDir, afd.getStartOffset(), afd.getLength()); 169 } 170 171 static private class DictPackSettings { 172 final SharedPreferences mDictPreferences; DictPackSettings(final Context context)173 public DictPackSettings(final Context context) { 174 Context dictPackContext = null; 175 try { 176 final String dictPackName = 177 context.getString(R.string.dictionary_pack_package_name); 178 dictPackContext = context.createPackageContext(dictPackName, 0); 179 } catch (NameNotFoundException e) { 180 // The dictionary pack is not installed... 181 // TODO: fallback on the built-in dict, see the TODO above 182 Log.e(TAG, "Could not find a dictionary pack"); 183 } 184 mDictPreferences = null == dictPackContext ? null 185 : dictPackContext.getSharedPreferences(COMMON_PREFERENCES_NAME, 186 Context.MODE_WORLD_READABLE | Context.MODE_MULTI_PROCESS); 187 } isWordListActive(final String dictId)188 public boolean isWordListActive(final String dictId) { 189 if (null == mDictPreferences) { 190 // If we don't have preferences it basically means we can't find the dictionary 191 // pack - either it's not installed, or it's disabled, or there is some strange 192 // bug. Either way, a word list with no settings should be on by default: default 193 // dictionaries in LatinIME are on if there is no settings at all, and if for some 194 // reason some dictionaries have been installed BUT the dictionary pack can't be 195 // found anymore it's safer to actually supply installed dictionaries. 196 return true; 197 } else { 198 // The default is true here for the same reasons as above. We got the dictionary 199 // pack but if we don't have any settings for it it means the user has never been 200 // to the settings yet. So by default, the main dictionaries should be on. 201 return mDictPreferences.getBoolean(dictId, true); 202 } 203 } 204 } 205 206 /** 207 * Helper method to the list of cache directories, one for each distinct locale. 208 */ getCachedDirectoryList(final Context context)209 private static File[] getCachedDirectoryList(final Context context) { 210 return new File(getWordListCacheDirectory(context)).listFiles(); 211 } 212 213 /** 214 * Returns the category for a given file name. 215 * 216 * This parses the file name, extracts the category, and returns it. See 217 * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. 218 * @return The category as a string or null if it can't be found in the file name. 219 */ getCategoryFromFileName(final String fileName)220 private static String getCategoryFromFileName(final String fileName) { 221 final String id = getWordListIdFromFileName(fileName); 222 final String[] idArray = id.split(ID_CATEGORY_SEPARATOR); 223 if (2 != idArray.length) return null; 224 return idArray[0]; 225 } 226 227 /** 228 * Utility class for the {@link #getCachedWordLists} method 229 */ 230 private static class FileAndMatchLevel { 231 final File mFile; 232 final int mMatchLevel; FileAndMatchLevel(final File file, final int matchLevel)233 public FileAndMatchLevel(final File file, final int matchLevel) { 234 mFile = file; 235 mMatchLevel = matchLevel; 236 } 237 } 238 239 /** 240 * Returns the list of cached files for a specific locale, one for each category. 241 * 242 * This will return exactly one file for each word list category that matches 243 * the passed locale. If several files match the locale for any given category, 244 * this returns the file with the closest match to the locale. For example, if 245 * the passed word list is en_US, and for a category we have an en and an en_US 246 * word list available, we'll return only the en_US one. 247 * Thus, the list will contain as many files as there are categories. 248 * 249 * @param locale the locale to find the dictionary files for, as a string. 250 * @param context the context on which to open the files upon. 251 * @return an array of binary dictionary files, which may be empty but may not be null. 252 */ getCachedWordLists(final String locale, final Context context)253 private static File[] getCachedWordLists(final String locale, 254 final Context context) { 255 final File[] directoryList = getCachedDirectoryList(context); 256 if (null == directoryList) return EMPTY_FILE_ARRAY; 257 final HashMap<String, FileAndMatchLevel> cacheFiles = 258 new HashMap<String, FileAndMatchLevel>(); 259 for (File directory : directoryList) { 260 if (!directory.isDirectory()) continue; 261 final String dirLocale = getWordListIdFromFileName(directory.getName()); 262 final int matchLevel = LocaleUtils.getMatchLevel(dirLocale, locale); 263 if (LocaleUtils.isMatch(matchLevel)) { 264 final File[] wordLists = directory.listFiles(); 265 if (null != wordLists) { 266 for (File wordList : wordLists) { 267 final String category = getCategoryFromFileName(wordList.getName()); 268 final FileAndMatchLevel currentBestMatch = cacheFiles.get(category); 269 if (null == currentBestMatch || currentBestMatch.mMatchLevel < matchLevel) { 270 cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel)); 271 } 272 } 273 } 274 } 275 } 276 if (cacheFiles.isEmpty()) return EMPTY_FILE_ARRAY; 277 final File[] result = new File[cacheFiles.size()]; 278 int index = 0; 279 for (final FileAndMatchLevel entry : cacheFiles.values()) { 280 result[index++] = entry.mFile; 281 } 282 return result; 283 } 284 285 /** 286 * Remove all files with the passed id, except the passed file. 287 * 288 * If a dictionary with a given ID has a metadata change that causes it to change 289 * path, we need to remove the old version. The only way to do this is to check all 290 * installed files for a matching ID in a different directory. 291 */ removeFilesWithIdExcept(final Context context, final String id, final File fileToKeep)292 public static void removeFilesWithIdExcept(final Context context, final String id, 293 final File fileToKeep) { 294 try { 295 final File canonicalFileToKeep = fileToKeep.getCanonicalFile(); 296 final File[] directoryList = getCachedDirectoryList(context); 297 if (null == directoryList) return; 298 for (File directory : directoryList) { 299 // There is one directory per locale. See #getCachedDirectoryList 300 if (!directory.isDirectory()) continue; 301 final File[] wordLists = directory.listFiles(); 302 if (null == wordLists) continue; 303 for (File wordList : wordLists) { 304 final String fileId = getWordListIdFromFileName(wordList.getName()); 305 if (fileId.equals(id)) { 306 if (!canonicalFileToKeep.equals(wordList.getCanonicalFile())) { 307 wordList.delete(); 308 } 309 } 310 } 311 } 312 } catch (java.io.IOException e) { 313 Log.e(TAG, "IOException trying to cleanup files : " + e); 314 } 315 } 316 317 318 /** 319 * Returns the id associated with the main word list for a specified locale. 320 * 321 * Word lists stored in Android Keyboard's resources are referred to as the "main" 322 * word lists. Since they can be updated like any other list, we need to assign a 323 * unique ID to them. This ID is just the name of the language (locale-wise) they 324 * are for, and this method returns this ID. 325 */ getMainDictId(final Locale locale)326 private static String getMainDictId(final Locale locale) { 327 // This works because we don't include by default different dictionaries for 328 // different countries. This actually needs to return the id that we would 329 // like to use for word lists included in resources, and the following is okay. 330 return MAIN_DICTIONARY_CATEGORY + ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); 331 } 332 isMainWordListId(final String id)333 private static boolean isMainWordListId(final String id) { 334 final String[] idArray = id.split(ID_CATEGORY_SEPARATOR); 335 if (2 != idArray.length) return false; 336 return MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); 337 } 338 339 /** 340 * Returns a list of file addresses for a given locale, trying relevant methods in order. 341 * 342 * Tries to get binary dictionaries from various sources, in order: 343 * - Uses a content provider to get a public dictionary set, as per the protocol described 344 * in BinaryDictionaryFileDumper. 345 * If that fails: 346 * - Gets a file name from the built-in dictionary for this locale, if any. 347 * If that fails: 348 * - Returns null. 349 * @return The list of addresses of valid dictionary files, or null. 350 */ getDictionaryFiles(final Locale locale, final Context context)351 public static ArrayList<AssetFileAddress> getDictionaryFiles(final Locale locale, 352 final Context context) { 353 354 final boolean hasDefaultWordList = DictionaryFactory.isDictionaryAvailable(context, locale); 355 // cacheWordListsFromContentProvider returns the list of files it copied to local 356 // storage, but we don't really care about what was copied NOW: what we want is the 357 // list of everything we ever cached, so we ignore the return value. 358 BinaryDictionaryFileDumper.cacheWordListsFromContentProvider(locale, context, 359 hasDefaultWordList); 360 final File[] cachedWordLists = getCachedWordLists(locale.toString(), context); 361 final String mainDictId = getMainDictId(locale); 362 final DictPackSettings dictPackSettings = new DictPackSettings(context); 363 364 boolean foundMainDict = false; 365 final ArrayList<AssetFileAddress> fileList = new ArrayList<AssetFileAddress>(); 366 // cachedWordLists may not be null, see doc for getCachedDictionaryList 367 for (final File f : cachedWordLists) { 368 final String wordListId = getWordListIdFromFileName(f.getName()); 369 if (isMainWordListId(wordListId)) { 370 foundMainDict = true; 371 } 372 if (!dictPackSettings.isWordListActive(wordListId)) continue; 373 if (f.canRead()) { 374 fileList.add(AssetFileAddress.makeFromFileName(f.getPath())); 375 } else { 376 Log.e(TAG, "Found a cached dictionary file but cannot read it"); 377 } 378 } 379 380 if (!foundMainDict && dictPackSettings.isWordListActive(mainDictId)) { 381 final int fallbackResId = 382 DictionaryFactory.getMainDictionaryResourceId(context.getResources(), locale); 383 final AssetFileAddress fallbackAsset = loadFallbackResource(context, fallbackResId); 384 if (null != fallbackAsset) { 385 fileList.add(fallbackAsset); 386 } 387 } 388 389 return fileList; 390 } 391 } 392