• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.makedict;
18 
19 import android.test.AndroidTestCase;
20 import android.test.MoreAsserts;
21 import android.test.suitebuilder.annotation.LargeTest;
22 import android.util.Log;
23 
24 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
25 import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
26 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
27 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
28 import com.android.inputmethod.latin.utils.CollectionUtils;
29 
30 import java.io.File;
31 import java.io.IOException;
32 import java.util.ArrayList;
33 import java.util.HashMap;
34 import java.util.Random;
35 
36 @LargeTest
37 public class BinaryDictIOUtilsTests extends AndroidTestCase {
38     private static final String TAG = BinaryDictIOUtilsTests.class.getSimpleName();
39     private static final FormatSpec.FormatOptions FORMAT_OPTIONS =
40             new FormatSpec.FormatOptions(3, true);
41 
42     private static final ArrayList<String> sWords = CollectionUtils.newArrayList();
43     public static final int DEFAULT_MAX_UNIGRAMS = 1500;
44     private final int mMaxUnigrams;
45 
46     private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
47 
48     private static final int VERSION3 = 3;
49     private static final int VERSION4 = 4;
50 
51     private static final String[] CHARACTERS = {
52         "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
53         "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
54         "\u00FC" /* ü */, "\u00E2" /* â */, "\u00F1" /* ñ */, // accented characters
55         "\u4E9C" /* 亜 */, "\u4F0A" /* 伊 */, "\u5B87" /* 宇 */, // kanji
56         "\uD841\uDE28" /* �� */, "\uD840\uDC0B" /* �� */, "\uD861\uDED7" /* �� */ // surrogate pair
57     };
58 
BinaryDictIOUtilsTests()59     public BinaryDictIOUtilsTests() {
60         // 1500 is the default max unigrams
61         this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
62     }
63 
BinaryDictIOUtilsTests(final long seed, final int maxUnigrams)64     public BinaryDictIOUtilsTests(final long seed, final int maxUnigrams) {
65         super();
66         Log.d(TAG, "Seed for test is " + seed + ", maxUnigrams is " + maxUnigrams);
67         mMaxUnigrams = maxUnigrams;
68         final Random random = new Random(seed);
69         sWords.clear();
70         for (int i = 0; i < maxUnigrams; ++i) {
71             sWords.add(generateWord(random.nextInt()));
72         }
73     }
74 
75     // Utilities for test
generateWord(final int value)76     private String generateWord(final int value) {
77         final int lengthOfChars = CHARACTERS.length;
78         StringBuilder builder = new StringBuilder("");
79         long lvalue = Math.abs((long)value);
80         while (lvalue > 0) {
81             builder.append(CHARACTERS[(int)(lvalue % lengthOfChars)]);
82             lvalue /= lengthOfChars;
83         }
84         if (builder.toString().equals("")) return "a";
85         return builder.toString();
86     }
87 
printPtNode(final PtNodeInfo info)88     private static void printPtNode(final PtNodeInfo info) {
89         Log.d(TAG, "    PtNode at " + info.mOriginalAddress);
90         Log.d(TAG, "        flags = " + info.mFlags);
91         Log.d(TAG, "        parentAddress = " + info.mParentAddress);
92         Log.d(TAG, "        characters = " + new String(info.mCharacters, 0,
93                 info.mCharacters.length));
94         if (info.mFrequency != -1) Log.d(TAG, "        frequency = " + info.mFrequency);
95         if (info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS) {
96             Log.d(TAG, "        children address = no children address");
97         } else {
98             Log.d(TAG, "        children address = " + info.mChildrenAddress);
99         }
100         if (info.mShortcutTargets != null) {
101             for (final WeightedString ws : info.mShortcutTargets) {
102                 Log.d(TAG, "        shortcuts = " + ws.mWord);
103             }
104         }
105         if (info.mBigrams != null) {
106             for (final PendingAttribute attr : info.mBigrams) {
107                 Log.d(TAG, "        bigram = " + attr.mAddress);
108             }
109         }
110         Log.d(TAG, "    end address = " + info.mEndAddress);
111     }
112 
printNode(final Ver3DictDecoder dictDecoder, final FormatSpec.FormatOptions formatOptions)113     private static void printNode(final Ver3DictDecoder dictDecoder,
114             final FormatSpec.FormatOptions formatOptions) {
115         final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
116         Log.d(TAG, "Node at " + dictBuffer.position());
117         final int count = BinaryDictDecoderUtils.readPtNodeCount(dictBuffer);
118         Log.d(TAG, "    ptNodeCount = " + count);
119         for (int i = 0; i < count; ++i) {
120             final PtNodeInfo currentInfo = dictDecoder.readPtNode(dictBuffer.position(),
121                     formatOptions);
122             printPtNode(currentInfo);
123         }
124         if (formatOptions.mSupportsDynamicUpdate) {
125             final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
126             Log.d(TAG, "    forwardLinkAddress = " + forwardLinkAddress);
127         }
128     }
129 
130     @SuppressWarnings("unused")
printBinaryFile(final Ver3DictDecoder dictDecoder)131     private static void printBinaryFile(final Ver3DictDecoder dictDecoder)
132             throws IOException, UnsupportedFormatException {
133         final FileHeader fileHeader = dictDecoder.readHeader();
134         final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
135         while (dictBuffer.position() < dictBuffer.limit()) {
136             printNode(dictDecoder, fileHeader.mFormatOptions);
137         }
138     }
139 
getWordPosition(final File file, final String word)140     private int getWordPosition(final File file, final String word) {
141         int position = FormatSpec.NOT_VALID_WORD;
142 
143         try {
144             final Ver3DictDecoder dictDecoder = new Ver3DictDecoder(file,
145                     DictDecoder.USE_READONLY_BYTEBUFFER);
146             position = dictDecoder.getTerminalPosition(word);
147         } catch (IOException e) {
148         } catch (UnsupportedFormatException e) {
149         }
150         return position;
151     }
152 
153     /**
154      * Find a word using the DictDecoder.
155      *
156      * @param dictDecoder the dict decoder
157      * @param word the word searched
158      * @return the found ptNodeInfo
159      * @throws IOException
160      * @throws UnsupportedFormatException
161      */
findWordByBinaryDictReader(final DictDecoder dictDecoder, final String word)162     private static PtNodeInfo findWordByBinaryDictReader(final DictDecoder dictDecoder,
163             final String word) throws IOException, UnsupportedFormatException {
164         int position = dictDecoder.getTerminalPosition(word);
165         if (position != FormatSpec.NOT_VALID_WORD) {
166             dictDecoder.setPosition(0);
167             final FileHeader header = dictDecoder.readHeader();
168             dictDecoder.setPosition(position);
169             return dictDecoder.readPtNode(position, header.mFormatOptions);
170         }
171         return null;
172     }
173 
findWordFromFile(final File file, final String word)174     private PtNodeInfo findWordFromFile(final File file, final String word) {
175         final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
176         PtNodeInfo info = null;
177         try {
178             dictDecoder.openDictBuffer();
179             info = findWordByBinaryDictReader(dictDecoder, word);
180         } catch (IOException e) {
181         } catch (UnsupportedFormatException e) {
182         }
183         return info;
184     }
185 
186     // return amount of time to insert a word
insertAndCheckWord(final File file, final String word, final int frequency, final boolean exist, final ArrayList<WeightedString> bigrams, final ArrayList<WeightedString> shortcuts, final int formatVersion)187     private long insertAndCheckWord(final File file, final String word, final int frequency,
188             final boolean exist, final ArrayList<WeightedString> bigrams,
189             final ArrayList<WeightedString> shortcuts, final int formatVersion) {
190         long amountOfTime = -1;
191         try {
192             final DictUpdater dictUpdater;
193             if (formatVersion == VERSION3) {
194                 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
195             } else {
196                 throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
197                         + " exist.");
198             }
199 
200             if (!exist) {
201                 assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
202             }
203             final long now = System.nanoTime();
204             dictUpdater.insertWord(word, frequency, bigrams, shortcuts, false, false);
205             amountOfTime = System.nanoTime() - now;
206             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
207         } catch (IOException e) {
208             Log.e(TAG, "Raised an IOException while inserting a word", e);
209         } catch (UnsupportedFormatException e) {
210             Log.e(TAG, "Raised an UnsupportedFormatException error while inserting a word", e);
211         }
212         return amountOfTime;
213     }
214 
deleteWord(final File file, final String word, final int formatVersion)215     private void deleteWord(final File file, final String word, final int formatVersion) {
216         try {
217             final DictUpdater dictUpdater;
218             if (formatVersion == VERSION3) {
219                 dictUpdater = new Ver3DictUpdater(file, DictDecoder.USE_WRITABLE_BYTEBUFFER);
220             } else {
221                 throw new RuntimeException("DictUpdater for version " + formatVersion + " doesn't"
222                         + " exist.");
223             }
224             dictUpdater.deleteWord(word);
225         } catch (IOException e) {
226         } catch (UnsupportedFormatException e) {
227         }
228     }
229 
checkReverseLookup(final File file, final String word, final int position)230     private void checkReverseLookup(final File file, final String word, final int position) {
231 
232         try {
233             final DictDecoder dictDecoder = FormatSpec.getDictDecoder(file);
234             final FileHeader fileHeader = dictDecoder.readHeader();
235             assertEquals(word,
236                     BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mHeaderSize,
237                             position, fileHeader.mFormatOptions).mWord);
238         } catch (IOException e) {
239             Log.e(TAG, "Raised an IOException while looking up a word", e);
240         } catch (UnsupportedFormatException e) {
241             Log.e(TAG, "Raised an UnsupportedFormatException error while looking up a word", e);
242         }
243     }
244 
runTestInsertWord(final int formatVersion)245     private void runTestInsertWord(final int formatVersion) {
246         File file = null;
247         try {
248             file = File.createTempFile("testInsertWord", TEST_DICT_FILE_EXTENSION,
249                     getContext().getCacheDir());
250         } catch (IOException e) {
251             fail("IOException while creating temporary file: " + e);
252         }
253 
254         // set an initial dictionary.
255         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
256                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
257         dict.add("abcd", 10, null, false);
258 
259         try {
260             final DictEncoder dictEncoder = new Ver3DictEncoder(file);
261             dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
262         } catch (IOException e) {
263             fail("IOException while writing an initial dictionary : " + e);
264         } catch (UnsupportedFormatException e) {
265             fail("UnsupportedFormatException while writing an initial dictionary : " + e);
266         }
267 
268         MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
269         insertAndCheckWord(file, "abcde", 10, false, null, null, formatVersion);
270 
271         insertAndCheckWord(file, "abcdefghijklmn", 10, false, null, null, formatVersion);
272         checkReverseLookup(file, "abcdefghijklmn", getWordPosition(file, "abcdefghijklmn"));
273 
274         insertAndCheckWord(file, "abcdabcd", 10, false, null, null, formatVersion);
275         checkReverseLookup(file, "abcdabcd", getWordPosition(file, "abcdabcd"));
276 
277         // update the existing word.
278         insertAndCheckWord(file, "abcdabcd", 15, true, null, null, formatVersion);
279 
280         // split 1
281         insertAndCheckWord(file, "ab", 20, false, null, null, formatVersion);
282 
283         // split 2
284         insertAndCheckWord(file, "ami", 30, false, null, null, formatVersion);
285 
286         deleteWord(file, "ami", formatVersion);
287         assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "ami"));
288 
289         insertAndCheckWord(file, "abcdabfg", 30, false, null, null, formatVersion);
290 
291         deleteWord(file, "abcd", formatVersion);
292         assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, "abcd"));
293     }
294 
testInsertWord()295     public void testInsertWord() {
296         runTestInsertWord(VERSION3);
297     }
298 
runTestInsertWordWithBigrams(final int formatVersion)299     private void runTestInsertWordWithBigrams(final int formatVersion) {
300         File file = null;
301         try {
302             file = File.createTempFile("testInsertWordWithBigrams", TEST_DICT_FILE_EXTENSION,
303                     getContext().getCacheDir());
304         } catch (IOException e) {
305             fail("IOException while creating temporary file: " + e);
306         }
307 
308         // set an initial dictionary.
309         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
310                 new FusionDictionary.DictionaryOptions(new HashMap<String,String>(), false, false));
311         dict.add("abcd", 10, null, false);
312         dict.add("efgh", 15, null, false);
313 
314         try {
315             final DictEncoder dictEncoder = new Ver3DictEncoder(file);
316             dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
317         } catch (IOException e) {
318             fail("IOException while writing an initial dictionary : " + e);
319         } catch (UnsupportedFormatException e) {
320             fail("UnsupportedFormatException while writing an initial dictionary : " + e);
321         }
322 
323         final ArrayList<WeightedString> banana = new ArrayList<WeightedString>();
324         banana.add(new WeightedString("banana", 10));
325 
326         insertAndCheckWord(file, "banana", 0, false, null, null, formatVersion);
327         insertAndCheckWord(file, "recursive", 60, true, banana, null, formatVersion);
328 
329         final PtNodeInfo info = findWordFromFile(file, "recursive");
330         int bananaPos = getWordPosition(file, "banana");
331         assertNotNull(info.mBigrams);
332         assertEquals(info.mBigrams.size(), 1);
333         assertEquals(info.mBigrams.get(0).mAddress, bananaPos);
334     }
335 
testInsertWordWithBigrams()336     public void testInsertWordWithBigrams() {
337         runTestInsertWordWithBigrams(VERSION3);
338     }
339 
runTestRandomWords(final int formatVersion)340     private void runTestRandomWords(final int formatVersion) {
341         File file = null;
342         try {
343             file = File.createTempFile("testRandomWord", TEST_DICT_FILE_EXTENSION,
344                     getContext().getCacheDir());
345         } catch (IOException e) {
346         }
347         assertNotNull(file);
348 
349         // set an initial dictionary.
350         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
351                 new FusionDictionary.DictionaryOptions(new HashMap<String, String>(), false,
352                         false));
353         dict.add("initial", 10, null, false);
354 
355         try {
356             final DictEncoder dictEncoder = new Ver3DictEncoder(file);
357             dictEncoder.writeDictionary(dict, FORMAT_OPTIONS);
358         } catch (IOException e) {
359             assertTrue(false);
360         } catch (UnsupportedFormatException e) {
361             assertTrue(false);
362         }
363 
364         long maxTimeToInsert = 0, sum = 0;
365         long minTimeToInsert = 100000000; // 1000000000 is an upper bound for minTimeToInsert.
366         int cnt = 0;
367         for (final String word : sWords) {
368             final long diff = insertAndCheckWord(file, word,
369                     cnt % FormatSpec.MAX_TERMINAL_FREQUENCY, false, null, null, formatVersion);
370             maxTimeToInsert = Math.max(maxTimeToInsert, diff);
371             minTimeToInsert = Math.min(minTimeToInsert, diff);
372             sum += diff;
373             cnt++;
374         }
375         cnt = 0;
376         for (final String word : sWords) {
377             MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
378         }
379 
380         Log.d(TAG, "Test version " + formatVersion);
381         Log.d(TAG, "max = " + ((double)maxTimeToInsert/1000000) + " ms.");
382         Log.d(TAG, "min = " + ((double)minTimeToInsert/1000000) + " ms.");
383         Log.d(TAG, "avg = " + ((double)sum/mMaxUnigrams/1000000) + " ms.");
384     }
385 
testRandomWords()386     public void testRandomWords() {
387         runTestRandomWords(VERSION3);
388     }
389 }
390