1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 import android.text.TextUtils; 21 22 import com.android.inputmethod.keyboard.ProximityInfo; 23 24 import java.util.Arrays; 25 import java.util.Locale; 26 27 /** 28 * Implements a static, compacted, binary dictionary of standard words. 29 */ 30 public class BinaryDictionary extends Dictionary { 31 32 public static final String DICTIONARY_PACK_AUTHORITY = 33 "com.android.inputmethod.latin.dictionarypack"; 34 35 /** 36 * There is a difference between what java and native code can handle. 37 * This value should only be used in BinaryDictionary.java 38 * It is necessary to keep it at this value because some languages e.g. German have 39 * really long words. 40 */ 41 public static final int MAX_WORD_LENGTH = 48; 42 public static final int MAX_WORDS = 18; 43 44 private static final String TAG = "BinaryDictionary"; 45 private static final int MAX_BIGRAMS = 60; 46 47 private static final int TYPED_LETTER_MULTIPLIER = 2; 48 49 private int mDicTypeId; 50 private long mNativeDict; 51 private final int[] mInputCodes = new int[MAX_WORD_LENGTH]; 52 private final char[] mOutputChars = new char[MAX_WORD_LENGTH * MAX_WORDS]; 53 private final char[] mOutputChars_bigrams = new char[MAX_WORD_LENGTH * MAX_BIGRAMS]; 54 private final int[] mScores = new int[MAX_WORDS]; 55 private final int[] mBigramScores = new int[MAX_BIGRAMS]; 56 57 private final boolean mUseFullEditDistance; 58 59 /** 60 * Constructor for the binary dictionary. This is supposed to be called from the 61 * dictionary factory. 62 * All implementations should pass null into flagArray, except for testing purposes. 63 * @param context the context to access the environment from. 64 * @param filename the name of the file to read through native code. 65 * @param offset the offset of the dictionary data within the file. 66 * @param length the length of the binary data. 67 * @param useFullEditDistance whether to use the full edit distance in suggestions 68 */ BinaryDictionary(final Context context, final String filename, final long offset, final long length, final boolean useFullEditDistance, final Locale locale)69 public BinaryDictionary(final Context context, 70 final String filename, final long offset, final long length, 71 final boolean useFullEditDistance, final Locale locale) { 72 // Note: at the moment a binary dictionary is always of the "main" type. 73 // Initializing this here will help transitioning out of the scheme where 74 // the Suggest class knows everything about every single dictionary. 75 mDicTypeId = Suggest.DIC_MAIN; 76 mUseFullEditDistance = useFullEditDistance; 77 loadDictionary(filename, offset, length); 78 } 79 80 static { JniUtils.loadNativeLibrary()81 JniUtils.loadNativeLibrary(); 82 } 83 openNative(String sourceDir, long dictOffset, long dictSize, int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords)84 private native long openNative(String sourceDir, long dictOffset, long dictSize, 85 int typedLetterMultiplier, int fullWordMultiplier, int maxWordLength, int maxWords); closeNative(long dict)86 private native void closeNative(long dict); getFrequencyNative(long dict, int[] word, int wordLength)87 private native int getFrequencyNative(long dict, int[] word, int wordLength); isValidBigramNative(long dict, int[] word1, int[] word2)88 private native boolean isValidBigramNative(long dict, int[] word1, int[] word2); getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, boolean useFullEditDistance, char[] outputChars, int[] scores)89 private native int getSuggestionsNative(long dict, long proximityInfo, int[] xCoordinates, 90 int[] yCoordinates, int[] inputCodes, int codesSize, int[] prevWordForBigrams, 91 boolean useFullEditDistance, char[] outputChars, int[] scores); getBigramsNative(long dict, int[] prevWord, int prevWordLength, int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, int maxWordLength, int maxBigrams)92 private native int getBigramsNative(long dict, int[] prevWord, int prevWordLength, 93 int[] inputCodes, int inputCodesLength, char[] outputChars, int[] scores, 94 int maxWordLength, int maxBigrams); calcNormalizedScoreNative( char[] before, int beforeLength, char[] after, int afterLength, int score)95 private static native float calcNormalizedScoreNative( 96 char[] before, int beforeLength, char[] after, int afterLength, int score); editDistanceNative( char[] before, int beforeLength, char[] after, int afterLength)97 private static native int editDistanceNative( 98 char[] before, int beforeLength, char[] after, int afterLength); 99 loadDictionary(String path, long startOffset, long length)100 private final void loadDictionary(String path, long startOffset, long length) { 101 mNativeDict = openNative(path, startOffset, length, 102 TYPED_LETTER_MULTIPLIER, FULL_WORD_SCORE_MULTIPLIER, MAX_WORD_LENGTH, MAX_WORDS); 103 } 104 105 @Override getBigrams(final WordComposer codes, final CharSequence previousWord, final WordCallback callback)106 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 107 final WordCallback callback) { 108 if (mNativeDict == 0) return; 109 110 int[] codePoints = StringUtils.toCodePointArray(previousWord.toString()); 111 Arrays.fill(mOutputChars_bigrams, (char) 0); 112 Arrays.fill(mBigramScores, 0); 113 114 int codesSize = codes.size(); 115 Arrays.fill(mInputCodes, -1); 116 if (codesSize > 0) { 117 mInputCodes[0] = codes.getCodeAt(0); 118 } 119 120 int count = getBigramsNative(mNativeDict, codePoints, codePoints.length, mInputCodes, 121 codesSize, mOutputChars_bigrams, mBigramScores, MAX_WORD_LENGTH, MAX_BIGRAMS); 122 if (count > MAX_BIGRAMS) { 123 count = MAX_BIGRAMS; 124 } 125 126 for (int j = 0; j < count; ++j) { 127 if (codesSize > 0 && mBigramScores[j] < 1) break; 128 final int start = j * MAX_WORD_LENGTH; 129 int len = 0; 130 while (len < MAX_WORD_LENGTH && mOutputChars_bigrams[start + len] != 0) { 131 ++len; 132 } 133 if (len > 0) { 134 callback.addWord(mOutputChars_bigrams, start, len, mBigramScores[j], 135 mDicTypeId, Dictionary.BIGRAM); 136 } 137 } 138 } 139 140 // proximityInfo and/or prevWordForBigrams may not be null. 141 @Override getWords(final WordComposer codes, final CharSequence prevWordForBigrams, final WordCallback callback, final ProximityInfo proximityInfo)142 public void getWords(final WordComposer codes, final CharSequence prevWordForBigrams, 143 final WordCallback callback, final ProximityInfo proximityInfo) { 144 final int count = getSuggestions(codes, prevWordForBigrams, proximityInfo, mOutputChars, 145 mScores); 146 147 for (int j = 0; j < count; ++j) { 148 if (mScores[j] < 1) break; 149 final int start = j * MAX_WORD_LENGTH; 150 int len = 0; 151 while (len < MAX_WORD_LENGTH && mOutputChars[start + len] != 0) { 152 ++len; 153 } 154 if (len > 0) { 155 callback.addWord(mOutputChars, start, len, mScores[j], mDicTypeId, 156 Dictionary.UNIGRAM); 157 } 158 } 159 } 160 isValidDictionary()161 /* package for test */ boolean isValidDictionary() { 162 return mNativeDict != 0; 163 } 164 165 // proximityInfo may not be null. getSuggestions(final WordComposer codes, final CharSequence prevWordForBigrams, final ProximityInfo proximityInfo, char[] outputChars, int[] scores)166 /* package for test */ int getSuggestions(final WordComposer codes, 167 final CharSequence prevWordForBigrams, final ProximityInfo proximityInfo, 168 char[] outputChars, int[] scores) { 169 if (!isValidDictionary()) return -1; 170 171 final int codesSize = codes.size(); 172 // Won't deal with really long words. 173 if (codesSize > MAX_WORD_LENGTH - 1) return -1; 174 175 Arrays.fill(mInputCodes, WordComposer.NOT_A_CODE); 176 for (int i = 0; i < codesSize; i++) { 177 mInputCodes[i] = codes.getCodeAt(i); 178 } 179 Arrays.fill(outputChars, (char) 0); 180 Arrays.fill(scores, 0); 181 182 final int[] prevWordCodePointArray = null == prevWordForBigrams 183 ? null : StringUtils.toCodePointArray(prevWordForBigrams.toString()); 184 185 // TODO: pass the previous word to native code 186 return getSuggestionsNative( 187 mNativeDict, proximityInfo.getNativeProximityInfo(), 188 codes.getXCoordinates(), codes.getYCoordinates(), mInputCodes, codesSize, 189 prevWordCodePointArray, mUseFullEditDistance, outputChars, scores); 190 } 191 calcNormalizedScore(String before, String after, int score)192 public static float calcNormalizedScore(String before, String after, int score) { 193 return calcNormalizedScoreNative(before.toCharArray(), before.length(), 194 after.toCharArray(), after.length(), score); 195 } 196 editDistance(String before, String after)197 public static int editDistance(String before, String after) { 198 return editDistanceNative( 199 before.toCharArray(), before.length(), after.toCharArray(), after.length()); 200 } 201 202 @Override isValidWord(CharSequence word)203 public boolean isValidWord(CharSequence word) { 204 return getFrequency(word) >= 0; 205 } 206 207 @Override getFrequency(CharSequence word)208 public int getFrequency(CharSequence word) { 209 if (word == null) return -1; 210 int[] chars = StringUtils.toCodePointArray(word.toString()); 211 return getFrequencyNative(mNativeDict, chars, chars.length); 212 } 213 214 // TODO: Add a batch process version (isValidBigramMultiple?) to avoid excessive numbers of jni 215 // calls when checking for changes in an entire dictionary. isValidBigram(CharSequence word1, CharSequence word2)216 public boolean isValidBigram(CharSequence word1, CharSequence word2) { 217 if (TextUtils.isEmpty(word1) || TextUtils.isEmpty(word2)) return false; 218 int[] chars1 = StringUtils.toCodePointArray(word1.toString()); 219 int[] chars2 = StringUtils.toCodePointArray(word2.toString()); 220 return isValidBigramNative(mNativeDict, chars1, chars2); 221 } 222 223 @Override close()224 public synchronized void close() { 225 closeInternal(); 226 } 227 closeInternal()228 private void closeInternal() { 229 if (mNativeDict != 0) { 230 closeNative(mNativeDict); 231 mNativeDict = 0; 232 } 233 } 234 235 @Override finalize()236 protected void finalize() throws Throwable { 237 try { 238 closeInternal(); 239 } finally { 240 super.finalize(); 241 } 242 } 243 } 244