1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.utils; 18 19 import android.content.ContentValues; 20 import android.content.Context; 21 import android.content.res.AssetManager; 22 import android.content.res.Resources; 23 import android.util.Log; 24 25 import com.android.inputmethod.latin.AssetFileAddress; 26 import com.android.inputmethod.latin.BinaryDictionaryGetter; 27 import com.android.inputmethod.latin.R; 28 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; 29 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; 30 31 import java.io.File; 32 import java.util.ArrayList; 33 import java.util.Iterator; 34 import java.util.Locale; 35 import java.util.concurrent.TimeUnit; 36 37 /** 38 * This class encapsulates the logic for the Latin-IME side of dictionary information management. 39 */ 40 public class DictionaryInfoUtils { 41 private static final String TAG = DictionaryInfoUtils.class.getSimpleName(); 42 private static final String RESOURCE_PACKAGE_NAME = R.class.getPackage().getName(); 43 private static final String DEFAULT_MAIN_DICT = "main"; 44 private static final String MAIN_DICT_PREFIX = "main_"; 45 // 6 digits - unicode is limited to 21 bits 46 private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6; 47 48 public static class DictionaryInfo { 49 private static final String LOCALE_COLUMN = "locale"; 50 private static final String WORDLISTID_COLUMN = "id"; 51 private static final String LOCAL_FILENAME_COLUMN = "filename"; 52 private static final String DESCRIPTION_COLUMN = "description"; 53 private static final String DATE_COLUMN = "date"; 54 private static final String FILESIZE_COLUMN = "filesize"; 55 private static final String VERSION_COLUMN = "version"; 56 public final String mId; 57 public final Locale mLocale; 58 public final String mDescription; 59 public final AssetFileAddress mFileAddress; 60 public final int mVersion; DictionaryInfo(final String id, final Locale locale, final String description, final AssetFileAddress fileAddress, final int version)61 public DictionaryInfo(final String id, final Locale locale, final String description, 62 final AssetFileAddress fileAddress, final int version) { 63 mId = id; 64 mLocale = locale; 65 mDescription = description; 66 mFileAddress = fileAddress; 67 mVersion = version; 68 } toContentValues()69 public ContentValues toContentValues() { 70 final ContentValues values = new ContentValues(); 71 values.put(WORDLISTID_COLUMN, mId); 72 values.put(LOCALE_COLUMN, mLocale.toString()); 73 values.put(DESCRIPTION_COLUMN, mDescription); 74 values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename); 75 values.put(DATE_COLUMN, TimeUnit.MILLISECONDS.toSeconds( 76 new File(mFileAddress.mFilename).lastModified())); 77 values.put(FILESIZE_COLUMN, mFileAddress.mLength); 78 values.put(VERSION_COLUMN, mVersion); 79 return values; 80 } 81 } 82 DictionaryInfoUtils()83 private DictionaryInfoUtils() { 84 // Private constructor to forbid instantation of this helper class. 85 } 86 87 /** 88 * Returns whether we may want to use this character as part of a file name. 89 * 90 * This basically only accepts ascii letters and numbers, and rejects everything else. 91 */ isFileNameCharacter(int codePoint)92 private static boolean isFileNameCharacter(int codePoint) { 93 if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit 94 if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase 95 if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase 96 return codePoint == '_'; // Underscore 97 } 98 99 /** 100 * Escapes a string for any characters that may be suspicious for a file or directory name. 101 * 102 * Concretely this does a sort of URL-encoding except it will encode everything that's not 103 * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which 104 * we cannot allow here) 105 */ 106 // TODO: create a unit test for this method replaceFileNameDangerousCharacters(final String name)107 public static String replaceFileNameDangerousCharacters(final String name) { 108 // This assumes '%' is fully available as a non-separator, normal 109 // character in a file name. This is probably true for all file systems. 110 final StringBuilder sb = new StringBuilder(); 111 final int nameLength = name.length(); 112 for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { 113 final int codePoint = name.codePointAt(i); 114 if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) { 115 sb.appendCodePoint(codePoint); 116 } else { 117 sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", 118 codePoint)); 119 } 120 } 121 return sb.toString(); 122 } 123 124 /** 125 * Helper method to get the top level cache directory. 126 */ getWordListCacheDirectory(final Context context)127 private static String getWordListCacheDirectory(final Context context) { 128 return context.getFilesDir() + File.separator + "dicts"; 129 } 130 131 /** 132 * Helper method to get the top level temp directory. 133 */ getWordListTempDirectory(final Context context)134 public static String getWordListTempDirectory(final Context context) { 135 return context.getFilesDir() + File.separator + "tmp"; 136 } 137 138 /** 139 * Reverse escaping done by replaceFileNameDangerousCharacters. 140 */ getWordListIdFromFileName(final String fname)141 public static String getWordListIdFromFileName(final String fname) { 142 final StringBuilder sb = new StringBuilder(); 143 final int fnameLength = fname.length(); 144 for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { 145 final int codePoint = fname.codePointAt(i); 146 if ('%' != codePoint) { 147 sb.appendCodePoint(codePoint); 148 } else { 149 // + 1 to pass the % sign 150 final int encodedCodePoint = Integer.parseInt( 151 fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16); 152 i += MAX_HEX_DIGITS_FOR_CODEPOINT; 153 sb.appendCodePoint(encodedCodePoint); 154 } 155 } 156 return sb.toString(); 157 } 158 159 /** 160 * Helper method to the list of cache directories, one for each distinct locale. 161 */ getCachedDirectoryList(final Context context)162 public static File[] getCachedDirectoryList(final Context context) { 163 return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles(); 164 } 165 166 /** 167 * Returns the category for a given file name. 168 * 169 * This parses the file name, extracts the category, and returns it. See 170 * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}. 171 * @return The category as a string or null if it can't be found in the file name. 172 */ getCategoryFromFileName(final String fileName)173 public static String getCategoryFromFileName(final String fileName) { 174 final String id = getWordListIdFromFileName(fileName); 175 final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); 176 // An id is supposed to be in format category:locale, so splitting on the separator 177 // should yield a 2-elements array 178 if (2 != idArray.length) return null; 179 return idArray[0]; 180 } 181 182 /** 183 * Find out the cache directory associated with a specific locale. 184 */ getCacheDirectoryForLocale(final String locale, final Context context)185 private static String getCacheDirectoryForLocale(final String locale, final Context context) { 186 final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale); 187 final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator 188 + relativeDirectoryName; 189 final File directory = new File(absoluteDirectoryName); 190 if (!directory.exists()) { 191 if (!directory.mkdirs()) { 192 Log.e(TAG, "Could not create the directory for locale" + locale); 193 } 194 } 195 return absoluteDirectoryName; 196 } 197 198 /** 199 * Generates a file name for the id and locale passed as an argument. 200 * 201 * In the current implementation the file name returned will always be unique for 202 * any id/locale pair, but please do not expect that the id can be the same for 203 * different dictionaries with different locales. An id should be unique for any 204 * dictionary. 205 * The file name is pretty much an URL-encoded version of the id inside a directory 206 * named like the locale, except it will also escape characters that look dangerous 207 * to some file systems. 208 * @param id the id of the dictionary for which to get a file name 209 * @param locale the locale for which to get the file name as a string 210 * @param context the context to use for getting the directory 211 * @return the name of the file to be created 212 */ getCacheFileName(String id, String locale, Context context)213 public static String getCacheFileName(String id, String locale, Context context) { 214 final String fileName = replaceFileNameDangerousCharacters(id); 215 return getCacheDirectoryForLocale(locale, context) + File.separator + fileName; 216 } 217 isMainWordListId(final String id)218 public static boolean isMainWordListId(final String id) { 219 final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR); 220 // An id is supposed to be in format category:locale, so splitting on the separator 221 // should yield a 2-elements array 222 if (2 != idArray.length) return false; 223 return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]); 224 } 225 226 /** 227 * Helper method to return a dictionary res id for a locale, or 0 if none. 228 * @param locale dictionary locale 229 * @return main dictionary resource id 230 */ getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, final Locale locale)231 public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, 232 final Locale locale) { 233 int resId; 234 // Try to find main_language_country dictionary. 235 if (!locale.getCountry().isEmpty()) { 236 final String dictLanguageCountry = 237 MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT); 238 if ((resId = res.getIdentifier( 239 dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) { 240 return resId; 241 } 242 } 243 244 // Try to find main_language dictionary. 245 final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage(); 246 if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) { 247 return resId; 248 } 249 250 // Not found, return 0 251 return 0; 252 } 253 254 /** 255 * Returns a main dictionary resource id 256 * @param locale dictionary locale 257 * @return main dictionary resource id 258 */ getMainDictionaryResourceId(final Resources res, final Locale locale)259 public static int getMainDictionaryResourceId(final Resources res, final Locale locale) { 260 int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale); 261 if (0 != resourceId) return resourceId; 262 return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME); 263 } 264 265 /** 266 * Returns the id associated with the main word list for a specified locale. 267 * 268 * Word lists stored in Android Keyboard's resources are referred to as the "main" 269 * word lists. Since they can be updated like any other list, we need to assign a 270 * unique ID to them. This ID is just the name of the language (locale-wise) they 271 * are for, and this method returns this ID. 272 */ getMainDictId(final Locale locale)273 public static String getMainDictId(final Locale locale) { 274 // This works because we don't include by default different dictionaries for 275 // different countries. This actually needs to return the id that we would 276 // like to use for word lists included in resources, and the following is okay. 277 return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY + 278 BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString(); 279 } 280 getDictionaryFileHeaderOrNull(final File file)281 public static FileHeader getDictionaryFileHeaderOrNull(final File file) { 282 return BinaryDictIOUtils.getDictionaryFileHeaderOrNull(file, 0, file.length()); 283 } 284 createDictionaryInfoFromFileAddress( final AssetFileAddress fileAddress)285 private static DictionaryInfo createDictionaryInfoFromFileAddress( 286 final AssetFileAddress fileAddress) { 287 final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull( 288 new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength); 289 final String id = header.getId(); 290 final Locale locale = LocaleUtils.constructLocaleFromString(header.getLocaleString()); 291 final String description = header.getDescription(); 292 final String version = header.getVersion(); 293 return new DictionaryInfo(id, locale, description, fileAddress, Integer.parseInt(version)); 294 } 295 addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList, final DictionaryInfo newElement)296 private static void addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList, 297 final DictionaryInfo newElement) { 298 final Iterator<DictionaryInfo> iter = dictList.iterator(); 299 while (iter.hasNext()) { 300 final DictionaryInfo thisDictInfo = iter.next(); 301 if (thisDictInfo.mLocale.equals(newElement.mLocale)) { 302 if (newElement.mVersion <= thisDictInfo.mVersion) { 303 return; 304 } 305 iter.remove(); 306 } 307 } 308 dictList.add(newElement); 309 } 310 getCurrentDictionaryFileNameAndVersionInfo( final Context context)311 public static ArrayList<DictionaryInfo> getCurrentDictionaryFileNameAndVersionInfo( 312 final Context context) { 313 final ArrayList<DictionaryInfo> dictList = CollectionUtils.newArrayList(); 314 315 // Retrieve downloaded dictionaries 316 final File[] directoryList = getCachedDirectoryList(context); 317 if (null != directoryList) { 318 for (final File directory : directoryList) { 319 final String localeString = getWordListIdFromFileName(directory.getName()); 320 File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context); 321 for (final File dict : dicts) { 322 final String wordListId = getWordListIdFromFileName(dict.getName()); 323 if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue; 324 final Locale locale = LocaleUtils.constructLocaleFromString(localeString); 325 final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict); 326 final DictionaryInfo dictionaryInfo = 327 createDictionaryInfoFromFileAddress(fileAddress); 328 // Protect against cases of a less-specific dictionary being found, like an 329 // en dictionary being used for an en_US locale. In this case, the en dictionary 330 // should be used for en_US but discounted for listing purposes. 331 if (!dictionaryInfo.mLocale.equals(locale)) continue; 332 addOrUpdateDictInfo(dictList, dictionaryInfo); 333 } 334 } 335 } 336 337 // Retrieve files from assets 338 final Resources resources = context.getResources(); 339 final AssetManager assets = resources.getAssets(); 340 for (final String localeString : assets.getLocales()) { 341 final Locale locale = LocaleUtils.constructLocaleFromString(localeString); 342 final int resourceId = 343 DictionaryInfoUtils.getMainDictionaryResourceIdIfAvailableForLocale( 344 context.getResources(), locale); 345 if (0 == resourceId) continue; 346 final AssetFileAddress fileAddress = 347 BinaryDictionaryGetter.loadFallbackResource(context, resourceId); 348 final DictionaryInfo dictionaryInfo = createDictionaryInfoFromFileAddress(fileAddress); 349 // Protect against cases of a less-specific dictionary being found, like an 350 // en dictionary being used for an en_US locale. In this case, the en dictionary 351 // should be used for en_US but discounted for listing purposes. 352 if (!dictionaryInfo.mLocale.equals(locale)) continue; 353 addOrUpdateDictInfo(dictList, dictionaryInfo); 354 } 355 356 return dictList; 357 } 358 } 359