• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.utils;
18 
19 import android.content.ContentValues;
20 import android.content.Context;
21 import android.content.res.AssetManager;
22 import android.content.res.Resources;
23 import android.util.Log;
24 
25 import com.android.inputmethod.latin.AssetFileAddress;
26 import com.android.inputmethod.latin.BinaryDictionaryGetter;
27 import com.android.inputmethod.latin.R;
28 import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
29 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
30 
31 import java.io.File;
32 import java.util.ArrayList;
33 import java.util.Iterator;
34 import java.util.Locale;
35 import java.util.concurrent.TimeUnit;
36 
37 /**
38  * This class encapsulates the logic for the Latin-IME side of dictionary information management.
39  */
40 public class DictionaryInfoUtils {
41     private static final String TAG = DictionaryInfoUtils.class.getSimpleName();
42     private static final String RESOURCE_PACKAGE_NAME = R.class.getPackage().getName();
43     private static final String DEFAULT_MAIN_DICT = "main";
44     private static final String MAIN_DICT_PREFIX = "main_";
45     // 6 digits - unicode is limited to 21 bits
46     private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
47 
48     public static class DictionaryInfo {
49         private static final String LOCALE_COLUMN = "locale";
50         private static final String WORDLISTID_COLUMN = "id";
51         private static final String LOCAL_FILENAME_COLUMN = "filename";
52         private static final String DESCRIPTION_COLUMN = "description";
53         private static final String DATE_COLUMN = "date";
54         private static final String FILESIZE_COLUMN = "filesize";
55         private static final String VERSION_COLUMN = "version";
56         public final String mId;
57         public final Locale mLocale;
58         public final String mDescription;
59         public final AssetFileAddress mFileAddress;
60         public final int mVersion;
DictionaryInfo(final String id, final Locale locale, final String description, final AssetFileAddress fileAddress, final int version)61         public DictionaryInfo(final String id, final Locale locale, final String description,
62                 final AssetFileAddress fileAddress, final int version) {
63             mId = id;
64             mLocale = locale;
65             mDescription = description;
66             mFileAddress = fileAddress;
67             mVersion = version;
68         }
toContentValues()69         public ContentValues toContentValues() {
70             final ContentValues values = new ContentValues();
71             values.put(WORDLISTID_COLUMN, mId);
72             values.put(LOCALE_COLUMN, mLocale.toString());
73             values.put(DESCRIPTION_COLUMN, mDescription);
74             values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename);
75             values.put(DATE_COLUMN, TimeUnit.MILLISECONDS.toSeconds(
76                     new File(mFileAddress.mFilename).lastModified()));
77             values.put(FILESIZE_COLUMN, mFileAddress.mLength);
78             values.put(VERSION_COLUMN, mVersion);
79             return values;
80         }
81     }
82 
DictionaryInfoUtils()83     private DictionaryInfoUtils() {
84         // Private constructor to forbid instantation of this helper class.
85     }
86 
87     /**
88      * Returns whether we may want to use this character as part of a file name.
89      *
90      * This basically only accepts ascii letters and numbers, and rejects everything else.
91      */
isFileNameCharacter(int codePoint)92     private static boolean isFileNameCharacter(int codePoint) {
93         if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
94         if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
95         if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
96         return codePoint == '_'; // Underscore
97     }
98 
99     /**
100      * Escapes a string for any characters that may be suspicious for a file or directory name.
101      *
102      * Concretely this does a sort of URL-encoding except it will encode everything that's not
103      * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
104      * we cannot allow here)
105      */
106     // TODO: create a unit test for this method
replaceFileNameDangerousCharacters(final String name)107     public static String replaceFileNameDangerousCharacters(final String name) {
108         // This assumes '%' is fully available as a non-separator, normal
109         // character in a file name. This is probably true for all file systems.
110         final StringBuilder sb = new StringBuilder();
111         final int nameLength = name.length();
112         for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
113             final int codePoint = name.codePointAt(i);
114             if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) {
115                 sb.appendCodePoint(codePoint);
116             } else {
117                 sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x",
118                         codePoint));
119             }
120         }
121         return sb.toString();
122     }
123 
124     /**
125      * Helper method to get the top level cache directory.
126      */
getWordListCacheDirectory(final Context context)127     private static String getWordListCacheDirectory(final Context context) {
128         return context.getFilesDir() + File.separator + "dicts";
129     }
130 
131     /**
132      * Helper method to get the top level temp directory.
133      */
getWordListTempDirectory(final Context context)134     public static String getWordListTempDirectory(final Context context) {
135         return context.getFilesDir() + File.separator + "tmp";
136     }
137 
138     /**
139      * Reverse escaping done by replaceFileNameDangerousCharacters.
140      */
getWordListIdFromFileName(final String fname)141     public static String getWordListIdFromFileName(final String fname) {
142         final StringBuilder sb = new StringBuilder();
143         final int fnameLength = fname.length();
144         for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
145             final int codePoint = fname.codePointAt(i);
146             if ('%' != codePoint) {
147                 sb.appendCodePoint(codePoint);
148             } else {
149                 // + 1 to pass the % sign
150                 final int encodedCodePoint = Integer.parseInt(
151                         fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16);
152                 i += MAX_HEX_DIGITS_FOR_CODEPOINT;
153                 sb.appendCodePoint(encodedCodePoint);
154             }
155         }
156         return sb.toString();
157     }
158 
159     /**
160      * Helper method to the list of cache directories, one for each distinct locale.
161      */
getCachedDirectoryList(final Context context)162     public static File[] getCachedDirectoryList(final Context context) {
163         return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles();
164     }
165 
166     /**
167      * Returns the category for a given file name.
168      *
169      * This parses the file name, extracts the category, and returns it. See
170      * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
171      * @return The category as a string or null if it can't be found in the file name.
172      */
getCategoryFromFileName(final String fileName)173     public static String getCategoryFromFileName(final String fileName) {
174         final String id = getWordListIdFromFileName(fileName);
175         final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
176         // An id is supposed to be in format category:locale, so splitting on the separator
177         // should yield a 2-elements array
178         if (2 != idArray.length) return null;
179         return idArray[0];
180     }
181 
182     /**
183      * Find out the cache directory associated with a specific locale.
184      */
getCacheDirectoryForLocale(final String locale, final Context context)185     private static String getCacheDirectoryForLocale(final String locale, final Context context) {
186         final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
187         final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
188                 + relativeDirectoryName;
189         final File directory = new File(absoluteDirectoryName);
190         if (!directory.exists()) {
191             if (!directory.mkdirs()) {
192                 Log.e(TAG, "Could not create the directory for locale" + locale);
193             }
194         }
195         return absoluteDirectoryName;
196     }
197 
198     /**
199      * Generates a file name for the id and locale passed as an argument.
200      *
201      * In the current implementation the file name returned will always be unique for
202      * any id/locale pair, but please do not expect that the id can be the same for
203      * different dictionaries with different locales. An id should be unique for any
204      * dictionary.
205      * The file name is pretty much an URL-encoded version of the id inside a directory
206      * named like the locale, except it will also escape characters that look dangerous
207      * to some file systems.
208      * @param id the id of the dictionary for which to get a file name
209      * @param locale the locale for which to get the file name as a string
210      * @param context the context to use for getting the directory
211      * @return the name of the file to be created
212      */
getCacheFileName(String id, String locale, Context context)213     public static String getCacheFileName(String id, String locale, Context context) {
214         final String fileName = replaceFileNameDangerousCharacters(id);
215         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
216     }
217 
isMainWordListId(final String id)218     public static boolean isMainWordListId(final String id) {
219         final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
220         // An id is supposed to be in format category:locale, so splitting on the separator
221         // should yield a 2-elements array
222         if (2 != idArray.length) return false;
223         return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
224     }
225 
226     /**
227      * Helper method to return a dictionary res id for a locale, or 0 if none.
228      * @param locale dictionary locale
229      * @return main dictionary resource id
230      */
getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, final Locale locale)231     public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res,
232             final Locale locale) {
233         int resId;
234         // Try to find main_language_country dictionary.
235         if (!locale.getCountry().isEmpty()) {
236             final String dictLanguageCountry =
237                     MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT);
238             if ((resId = res.getIdentifier(
239                     dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
240                 return resId;
241             }
242         }
243 
244         // Try to find main_language dictionary.
245         final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage();
246         if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
247             return resId;
248         }
249 
250         // Not found, return 0
251         return 0;
252     }
253 
254     /**
255      * Returns a main dictionary resource id
256      * @param locale dictionary locale
257      * @return main dictionary resource id
258      */
getMainDictionaryResourceId(final Resources res, final Locale locale)259     public static int getMainDictionaryResourceId(final Resources res, final Locale locale) {
260         int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale);
261         if (0 != resourceId) return resourceId;
262         return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME);
263     }
264 
265     /**
266      * Returns the id associated with the main word list for a specified locale.
267      *
268      * Word lists stored in Android Keyboard's resources are referred to as the "main"
269      * word lists. Since they can be updated like any other list, we need to assign a
270      * unique ID to them. This ID is just the name of the language (locale-wise) they
271      * are for, and this method returns this ID.
272      */
getMainDictId(final Locale locale)273     public static String getMainDictId(final Locale locale) {
274         // This works because we don't include by default different dictionaries for
275         // different countries. This actually needs to return the id that we would
276         // like to use for word lists included in resources, and the following is okay.
277         return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY +
278                 BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
279     }
280 
getDictionaryFileHeaderOrNull(final File file)281     public static FileHeader getDictionaryFileHeaderOrNull(final File file) {
282         return BinaryDictIOUtils.getDictionaryFileHeaderOrNull(file, 0, file.length());
283     }
284 
createDictionaryInfoFromFileAddress( final AssetFileAddress fileAddress)285     private static DictionaryInfo createDictionaryInfoFromFileAddress(
286             final AssetFileAddress fileAddress) {
287         final FileHeader header = BinaryDictIOUtils.getDictionaryFileHeaderOrNull(
288                 new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength);
289         final String id = header.getId();
290         final Locale locale = LocaleUtils.constructLocaleFromString(header.getLocaleString());
291         final String description = header.getDescription();
292         final String version = header.getVersion();
293         return new DictionaryInfo(id, locale, description, fileAddress, Integer.parseInt(version));
294     }
295 
addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList, final DictionaryInfo newElement)296     private static void addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList,
297             final DictionaryInfo newElement) {
298         final Iterator<DictionaryInfo> iter = dictList.iterator();
299         while (iter.hasNext()) {
300             final DictionaryInfo thisDictInfo = iter.next();
301             if (thisDictInfo.mLocale.equals(newElement.mLocale)) {
302                 if (newElement.mVersion <= thisDictInfo.mVersion) {
303                     return;
304                 }
305                 iter.remove();
306             }
307         }
308         dictList.add(newElement);
309     }
310 
getCurrentDictionaryFileNameAndVersionInfo( final Context context)311     public static ArrayList<DictionaryInfo> getCurrentDictionaryFileNameAndVersionInfo(
312             final Context context) {
313         final ArrayList<DictionaryInfo> dictList = CollectionUtils.newArrayList();
314 
315         // Retrieve downloaded dictionaries
316         final File[] directoryList = getCachedDirectoryList(context);
317         if (null != directoryList) {
318             for (final File directory : directoryList) {
319                 final String localeString = getWordListIdFromFileName(directory.getName());
320                 File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context);
321                 for (final File dict : dicts) {
322                     final String wordListId = getWordListIdFromFileName(dict.getName());
323                     if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue;
324                     final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
325                     final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict);
326                     final DictionaryInfo dictionaryInfo =
327                             createDictionaryInfoFromFileAddress(fileAddress);
328                     // Protect against cases of a less-specific dictionary being found, like an
329                     // en dictionary being used for an en_US locale. In this case, the en dictionary
330                     // should be used for en_US but discounted for listing purposes.
331                     if (!dictionaryInfo.mLocale.equals(locale)) continue;
332                     addOrUpdateDictInfo(dictList, dictionaryInfo);
333                 }
334             }
335         }
336 
337         // Retrieve files from assets
338         final Resources resources = context.getResources();
339         final AssetManager assets = resources.getAssets();
340         for (final String localeString : assets.getLocales()) {
341             final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
342             final int resourceId =
343                     DictionaryInfoUtils.getMainDictionaryResourceIdIfAvailableForLocale(
344                             context.getResources(), locale);
345             if (0 == resourceId) continue;
346             final AssetFileAddress fileAddress =
347                     BinaryDictionaryGetter.loadFallbackResource(context, resourceId);
348             final DictionaryInfo dictionaryInfo = createDictionaryInfoFromFileAddress(fileAddress);
349             // Protect against cases of a less-specific dictionary being found, like an
350             // en dictionary being used for an en_US locale. In this case, the en dictionary
351             // should be used for en_US but discounted for listing purposes.
352             if (!dictionaryInfo.mLocale.equals(locale)) continue;
353             addOrUpdateDictInfo(dictList, dictionaryInfo);
354         }
355 
356         return dictList;
357     }
358 }
359