• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2013 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.utils;
18 
19 import android.content.ContentValues;
20 import android.content.Context;
21 import android.content.res.AssetManager;
22 import android.content.res.Resources;
23 import android.text.TextUtils;
24 import android.util.Log;
25 
26 import com.android.inputmethod.latin.AssetFileAddress;
27 import com.android.inputmethod.latin.BinaryDictionaryGetter;
28 import com.android.inputmethod.latin.Constants;
29 import com.android.inputmethod.latin.R;
30 import com.android.inputmethod.latin.makedict.DictionaryHeader;
31 import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
32 import com.android.inputmethod.latin.settings.SpacingAndPunctuations;
33 
34 import java.io.File;
35 import java.io.IOException;
36 import java.util.ArrayList;
37 import java.util.Iterator;
38 import java.util.Locale;
39 import java.util.concurrent.TimeUnit;
40 
41 /**
42  * This class encapsulates the logic for the Latin-IME side of dictionary information management.
43  */
44 public class DictionaryInfoUtils {
45     private static final String TAG = DictionaryInfoUtils.class.getSimpleName();
46     private static final String RESOURCE_PACKAGE_NAME = R.class.getPackage().getName();
47     private static final String DEFAULT_MAIN_DICT = "main";
48     private static final String MAIN_DICT_PREFIX = "main_";
49     // 6 digits - unicode is limited to 21 bits
50     private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
51 
52     public static class DictionaryInfo {
53         private static final String LOCALE_COLUMN = "locale";
54         private static final String WORDLISTID_COLUMN = "id";
55         private static final String LOCAL_FILENAME_COLUMN = "filename";
56         private static final String DESCRIPTION_COLUMN = "description";
57         private static final String DATE_COLUMN = "date";
58         private static final String FILESIZE_COLUMN = "filesize";
59         private static final String VERSION_COLUMN = "version";
60         public final String mId;
61         public final Locale mLocale;
62         public final String mDescription;
63         public final AssetFileAddress mFileAddress;
64         public final int mVersion;
DictionaryInfo(final String id, final Locale locale, final String description, final AssetFileAddress fileAddress, final int version)65         public DictionaryInfo(final String id, final Locale locale, final String description,
66                 final AssetFileAddress fileAddress, final int version) {
67             mId = id;
68             mLocale = locale;
69             mDescription = description;
70             mFileAddress = fileAddress;
71             mVersion = version;
72         }
toContentValues()73         public ContentValues toContentValues() {
74             final ContentValues values = new ContentValues();
75             values.put(WORDLISTID_COLUMN, mId);
76             values.put(LOCALE_COLUMN, mLocale.toString());
77             values.put(DESCRIPTION_COLUMN, mDescription);
78             values.put(LOCAL_FILENAME_COLUMN, mFileAddress.mFilename);
79             values.put(DATE_COLUMN, TimeUnit.MILLISECONDS.toSeconds(
80                     new File(mFileAddress.mFilename).lastModified()));
81             values.put(FILESIZE_COLUMN, mFileAddress.mLength);
82             values.put(VERSION_COLUMN, mVersion);
83             return values;
84         }
85     }
86 
DictionaryInfoUtils()87     private DictionaryInfoUtils() {
88         // Private constructor to forbid instantation of this helper class.
89     }
90 
91     /**
92      * Returns whether we may want to use this character as part of a file name.
93      *
94      * This basically only accepts ascii letters and numbers, and rejects everything else.
95      */
isFileNameCharacter(int codePoint)96     private static boolean isFileNameCharacter(int codePoint) {
97         if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
98         if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
99         if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
100         return codePoint == '_'; // Underscore
101     }
102 
103     /**
104      * Escapes a string for any characters that may be suspicious for a file or directory name.
105      *
106      * Concretely this does a sort of URL-encoding except it will encode everything that's not
107      * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
108      * we cannot allow here)
109      */
110     // TODO: create a unit test for this method
replaceFileNameDangerousCharacters(final String name)111     public static String replaceFileNameDangerousCharacters(final String name) {
112         // This assumes '%' is fully available as a non-separator, normal
113         // character in a file name. This is probably true for all file systems.
114         final StringBuilder sb = new StringBuilder();
115         final int nameLength = name.length();
116         for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
117             final int codePoint = name.codePointAt(i);
118             if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) {
119                 sb.appendCodePoint(codePoint);
120             } else {
121                 sb.append(String.format((Locale)null, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x",
122                         codePoint));
123             }
124         }
125         return sb.toString();
126     }
127 
128     /**
129      * Helper method to get the top level cache directory.
130      */
getWordListCacheDirectory(final Context context)131     private static String getWordListCacheDirectory(final Context context) {
132         return context.getFilesDir() + File.separator + "dicts";
133     }
134 
135     /**
136      * Helper method to get the top level temp directory.
137      */
getWordListTempDirectory(final Context context)138     public static String getWordListTempDirectory(final Context context) {
139         return context.getFilesDir() + File.separator + "tmp";
140     }
141 
142     /**
143      * Reverse escaping done by replaceFileNameDangerousCharacters.
144      */
getWordListIdFromFileName(final String fname)145     public static String getWordListIdFromFileName(final String fname) {
146         final StringBuilder sb = new StringBuilder();
147         final int fnameLength = fname.length();
148         for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
149             final int codePoint = fname.codePointAt(i);
150             if ('%' != codePoint) {
151                 sb.appendCodePoint(codePoint);
152             } else {
153                 // + 1 to pass the % sign
154                 final int encodedCodePoint = Integer.parseInt(
155                         fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16);
156                 i += MAX_HEX_DIGITS_FOR_CODEPOINT;
157                 sb.appendCodePoint(encodedCodePoint);
158             }
159         }
160         return sb.toString();
161     }
162 
163     /**
164      * Helper method to the list of cache directories, one for each distinct locale.
165      */
getCachedDirectoryList(final Context context)166     public static File[] getCachedDirectoryList(final Context context) {
167         return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles();
168     }
169 
170     /**
171      * Returns the category for a given file name.
172      *
173      * This parses the file name, extracts the category, and returns it. See
174      * {@link #getMainDictId(Locale)} and {@link #isMainWordListId(String)}.
175      * @return The category as a string or null if it can't be found in the file name.
176      */
getCategoryFromFileName(final String fileName)177     public static String getCategoryFromFileName(final String fileName) {
178         final String id = getWordListIdFromFileName(fileName);
179         final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
180         // An id is supposed to be in format category:locale, so splitting on the separator
181         // should yield a 2-elements array
182         if (2 != idArray.length) return null;
183         return idArray[0];
184     }
185 
186     /**
187      * Find out the cache directory associated with a specific locale.
188      */
getCacheDirectoryForLocale(final String locale, final Context context)189     private static String getCacheDirectoryForLocale(final String locale, final Context context) {
190         final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale);
191         final String absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator
192                 + relativeDirectoryName;
193         final File directory = new File(absoluteDirectoryName);
194         if (!directory.exists()) {
195             if (!directory.mkdirs()) {
196                 Log.e(TAG, "Could not create the directory for locale" + locale);
197             }
198         }
199         return absoluteDirectoryName;
200     }
201 
202     /**
203      * Generates a file name for the id and locale passed as an argument.
204      *
205      * In the current implementation the file name returned will always be unique for
206      * any id/locale pair, but please do not expect that the id can be the same for
207      * different dictionaries with different locales. An id should be unique for any
208      * dictionary.
209      * The file name is pretty much an URL-encoded version of the id inside a directory
210      * named like the locale, except it will also escape characters that look dangerous
211      * to some file systems.
212      * @param id the id of the dictionary for which to get a file name
213      * @param locale the locale for which to get the file name as a string
214      * @param context the context to use for getting the directory
215      * @return the name of the file to be created
216      */
getCacheFileName(String id, String locale, Context context)217     public static String getCacheFileName(String id, String locale, Context context) {
218         final String fileName = replaceFileNameDangerousCharacters(id);
219         return getCacheDirectoryForLocale(locale, context) + File.separator + fileName;
220     }
221 
isMainWordListId(final String id)222     public static boolean isMainWordListId(final String id) {
223         final String[] idArray = id.split(BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR);
224         // An id is supposed to be in format category:locale, so splitting on the separator
225         // should yield a 2-elements array
226         if (2 != idArray.length) return false;
227         return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY.equals(idArray[0]);
228     }
229 
230     /**
231      * Helper method to return a dictionary res id for a locale, or 0 if none.
232      * @param locale dictionary locale
233      * @return main dictionary resource id
234      */
getMainDictionaryResourceIdIfAvailableForLocale(final Resources res, final Locale locale)235     public static int getMainDictionaryResourceIdIfAvailableForLocale(final Resources res,
236             final Locale locale) {
237         int resId;
238         // Try to find main_language_country dictionary.
239         if (!locale.getCountry().isEmpty()) {
240             final String dictLanguageCountry =
241                     MAIN_DICT_PREFIX + locale.toString().toLowerCase(Locale.ROOT);
242             if ((resId = res.getIdentifier(
243                     dictLanguageCountry, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
244                 return resId;
245             }
246         }
247 
248         // Try to find main_language dictionary.
249         final String dictLanguage = MAIN_DICT_PREFIX + locale.getLanguage();
250         if ((resId = res.getIdentifier(dictLanguage, "raw", RESOURCE_PACKAGE_NAME)) != 0) {
251             return resId;
252         }
253 
254         // Not found, return 0
255         return 0;
256     }
257 
258     /**
259      * Returns a main dictionary resource id
260      * @param locale dictionary locale
261      * @return main dictionary resource id
262      */
getMainDictionaryResourceId(final Resources res, final Locale locale)263     public static int getMainDictionaryResourceId(final Resources res, final Locale locale) {
264         int resourceId = getMainDictionaryResourceIdIfAvailableForLocale(res, locale);
265         if (0 != resourceId) return resourceId;
266         return res.getIdentifier(DEFAULT_MAIN_DICT, "raw", RESOURCE_PACKAGE_NAME);
267     }
268 
269     /**
270      * Returns the id associated with the main word list for a specified locale.
271      *
272      * Word lists stored in Android Keyboard's resources are referred to as the "main"
273      * word lists. Since they can be updated like any other list, we need to assign a
274      * unique ID to them. This ID is just the name of the language (locale-wise) they
275      * are for, and this method returns this ID.
276      */
getMainDictId(final Locale locale)277     public static String getMainDictId(final Locale locale) {
278         // This works because we don't include by default different dictionaries for
279         // different countries. This actually needs to return the id that we would
280         // like to use for word lists included in resources, and the following is okay.
281         return BinaryDictionaryGetter.MAIN_DICTIONARY_CATEGORY +
282                 BinaryDictionaryGetter.ID_CATEGORY_SEPARATOR + locale.getLanguage().toString();
283     }
284 
getDictionaryFileHeaderOrNull(final File file)285     public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) {
286         return getDictionaryFileHeaderOrNull(file, 0, file.length());
287     }
288 
getDictionaryFileHeaderOrNull(final File file, final long offset, final long length)289     private static DictionaryHeader getDictionaryFileHeaderOrNull(final File file,
290             final long offset, final long length) {
291         try {
292             final DictionaryHeader header =
293                     BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length);
294             return header;
295         } catch (UnsupportedFormatException e) {
296             return null;
297         } catch (IOException e) {
298             return null;
299         }
300     }
301 
302     /**
303      * Returns information of the dictionary.
304      *
305      * @param fileAddress the asset dictionary file address.
306      * @return information of the specified dictionary.
307      */
createDictionaryInfoFromFileAddress( final AssetFileAddress fileAddress)308     private static DictionaryInfo createDictionaryInfoFromFileAddress(
309             final AssetFileAddress fileAddress) {
310         final DictionaryHeader header = getDictionaryFileHeaderOrNull(
311                 new File(fileAddress.mFilename), fileAddress.mOffset, fileAddress.mLength);
312         if (header == null) {
313             return null;
314         }
315         final String id = header.getId();
316         final Locale locale = LocaleUtils.constructLocaleFromString(header.getLocaleString());
317         final String description = header.getDescription();
318         final String version = header.getVersion();
319         return new DictionaryInfo(id, locale, description, fileAddress, Integer.parseInt(version));
320     }
321 
addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList, final DictionaryInfo newElement)322     private static void addOrUpdateDictInfo(final ArrayList<DictionaryInfo> dictList,
323             final DictionaryInfo newElement) {
324         final Iterator<DictionaryInfo> iter = dictList.iterator();
325         while (iter.hasNext()) {
326             final DictionaryInfo thisDictInfo = iter.next();
327             if (thisDictInfo.mLocale.equals(newElement.mLocale)) {
328                 if (newElement.mVersion <= thisDictInfo.mVersion) {
329                     return;
330                 }
331                 iter.remove();
332             }
333         }
334         dictList.add(newElement);
335     }
336 
getCurrentDictionaryFileNameAndVersionInfo( final Context context)337     public static ArrayList<DictionaryInfo> getCurrentDictionaryFileNameAndVersionInfo(
338             final Context context) {
339         final ArrayList<DictionaryInfo> dictList = new ArrayList<>();
340 
341         // Retrieve downloaded dictionaries
342         final File[] directoryList = getCachedDirectoryList(context);
343         if (null != directoryList) {
344             for (final File directory : directoryList) {
345                 final String localeString = getWordListIdFromFileName(directory.getName());
346                 File[] dicts = BinaryDictionaryGetter.getCachedWordLists(localeString, context);
347                 for (final File dict : dicts) {
348                     final String wordListId = getWordListIdFromFileName(dict.getName());
349                     if (!DictionaryInfoUtils.isMainWordListId(wordListId)) continue;
350                     final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
351                     final AssetFileAddress fileAddress = AssetFileAddress.makeFromFile(dict);
352                     final DictionaryInfo dictionaryInfo =
353                             createDictionaryInfoFromFileAddress(fileAddress);
354                     // Protect against cases of a less-specific dictionary being found, like an
355                     // en dictionary being used for an en_US locale. In this case, the en dictionary
356                     // should be used for en_US but discounted for listing purposes.
357                     if (dictionaryInfo == null || !dictionaryInfo.mLocale.equals(locale)) continue;
358                     addOrUpdateDictInfo(dictList, dictionaryInfo);
359                 }
360             }
361         }
362 
363         // Retrieve files from assets
364         final Resources resources = context.getResources();
365         final AssetManager assets = resources.getAssets();
366         for (final String localeString : assets.getLocales()) {
367             final Locale locale = LocaleUtils.constructLocaleFromString(localeString);
368             final int resourceId =
369                     DictionaryInfoUtils.getMainDictionaryResourceIdIfAvailableForLocale(
370                             context.getResources(), locale);
371             if (0 == resourceId) continue;
372             final AssetFileAddress fileAddress =
373                     BinaryDictionaryGetter.loadFallbackResource(context, resourceId);
374             final DictionaryInfo dictionaryInfo = createDictionaryInfoFromFileAddress(fileAddress);
375             // Protect against cases of a less-specific dictionary being found, like an
376             // en dictionary being used for an en_US locale. In this case, the en dictionary
377             // should be used for en_US but discounted for listing purposes.
378             if (!dictionaryInfo.mLocale.equals(locale)) continue;
379             addOrUpdateDictInfo(dictList, dictionaryInfo);
380         }
381 
382         return dictList;
383     }
384 
looksValidForDictionaryInsertion(final CharSequence text, final SpacingAndPunctuations spacingAndPunctuations)385     public static boolean looksValidForDictionaryInsertion(final CharSequence text,
386             final SpacingAndPunctuations spacingAndPunctuations) {
387         if (TextUtils.isEmpty(text)) return false;
388         final int length = text.length();
389         if (length > Constants.DICTIONARY_MAX_WORD_LENGTH) {
390             return false;
391         }
392         int i = 0;
393         int digitCount = 0;
394         while (i < length) {
395             final int codePoint = Character.codePointAt(text, i);
396             final int charCount = Character.charCount(codePoint);
397             i += charCount;
398             if (Character.isDigit(codePoint)) {
399                 // Count digits: see below
400                 digitCount += charCount;
401                 continue;
402             }
403             if (!spacingAndPunctuations.isWordCodePoint(codePoint)) return false;
404         }
405         // We reject strings entirely comprised of digits to avoid using PIN codes or credit
406         // card numbers. It would come in handy for word prediction though; a good example is
407         // when writing one's address where the street number is usually quite discriminative,
408         // as well as the postal code.
409         return digitCount < length;
410     }
411 }
412