• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2012 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.inputmethod.latin.makedict;
18 
19 import android.test.AndroidTestCase;
20 import android.test.suitebuilder.annotation.LargeTest;
21 import android.util.Log;
22 import android.util.Pair;
23 import android.util.SparseArray;
24 
25 import com.android.inputmethod.latin.BinaryDictionary;
26 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
27 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
28 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
29 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
31 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
32 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
33 
34 import java.io.File;
35 import java.io.IOException;
36 import java.util.ArrayList;
37 import java.util.Arrays;
38 import java.util.HashMap;
39 import java.util.HashSet;
40 import java.util.List;
41 import java.util.Locale;
42 import java.util.Map.Entry;
43 import java.util.Random;
44 import java.util.Set;
45 import java.util.TreeMap;
46 
47 /**
48  * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils.
49  */
50 @LargeTest
51 public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
52     private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName();
53     private static final int DEFAULT_MAX_UNIGRAMS = 300;
54     private static final int DEFAULT_CODE_POINT_SET_SIZE = 50;
55     private static final int LARGE_CODE_POINT_SET_SIZE = 300;
56     private static final int UNIGRAM_FREQ = 10;
57     private static final int BIGRAM_FREQ = 50;
58     private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
59     private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50;
60     private static final int NUM_OF_SHORTCUTS = 5;
61 
62     private static final ArrayList<String> sWords = new ArrayList<>();
63     private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>();
64     private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>();
65     private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>();
66     private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
67     private static final HashMap<String, List<String>> sShortcuts = new HashMap<>();
68 
BinaryDictDecoderEncoderTests()69     public BinaryDictDecoderEncoderTests() {
70         this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
71     }
72 
BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams)73     public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) {
74         super();
75         BinaryDictionaryUtils.setCurrentTimeForTest(0);
76         Log.e(TAG, "Testing dictionary: seed is " + seed);
77         final Random random = new Random(seed);
78         sWords.clear();
79         sWordsWithVariousCodePoints.clear();
80         generateWords(maxUnigrams, random);
81 
82         for (int i = 0; i < sWords.size(); ++i) {
83             sChainBigrams.put(i, new ArrayList<Integer>());
84             if (i > 0) {
85                 sChainBigrams.get(i - 1).add(i);
86             }
87         }
88 
89         sStarBigrams.put(0, new ArrayList<Integer>());
90         // MAX - 1 because we added one above already
91         final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1);
92         for (int i = 1; i < maxBigrams; ++i) {
93             sStarBigrams.get(0).add(i);
94         }
95 
96         sShortcuts.clear();
97         for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
98             final int from = Math.abs(random.nextInt()) % sWords.size();
99             sShortcuts.put(sWords.get(from), new ArrayList<String>());
100             for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
101                 final int to = Math.abs(random.nextInt()) % sWords.size();
102                 sShortcuts.get(sWords.get(from)).add(sWords.get(to));
103             }
104         }
105     }
106 
107     @Override
setUp()108     protected void setUp() throws Exception {
109         super.setUp();
110         BinaryDictionaryUtils.setCurrentTimeForTest(0);
111     }
112 
113     @Override
tearDown()114     protected void tearDown() throws Exception {
115         // Quit test mode.
116         BinaryDictionaryUtils.setCurrentTimeForTest(-1);
117         super.tearDown();
118     }
119 
generateWords(final int number, final Random random)120     private void generateWords(final int number, final Random random) {
121         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
122                 random);
123         final Set<String> wordSet = new HashSet<>();
124         while (wordSet.size() < number) {
125             wordSet.add(CodePointUtils.generateWord(random, codePointSet));
126         }
127         sWords.addAll(wordSet);
128 
129         final int[] largeCodePointSet = CodePointUtils.generateCodePointSet(
130                 LARGE_CODE_POINT_SET_SIZE, random);
131         wordSet.clear();
132         while (wordSet.size() < number) {
133             wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet));
134         }
135         sWordsWithVariousCodePoints.addAll(wordSet);
136     }
137 
138     /**
139      * Adds unigrams to the dictionary.
140      */
addUnigrams(final int number, final FusionDictionary dict, final List<String> words, final HashMap<String, List<String>> shortcutMap)141     private void addUnigrams(final int number, final FusionDictionary dict,
142             final List<String> words, final HashMap<String, List<String>> shortcutMap) {
143         for (int i = 0; i < number; ++i) {
144             final String word = words.get(i);
145             final ArrayList<WeightedString> shortcuts = new ArrayList<>();
146             if (shortcutMap != null && shortcutMap.containsKey(word)) {
147                 for (final String shortcut : shortcutMap.get(word)) {
148                     shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ));
149                 }
150             }
151             dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ),
152                     (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */);
153         }
154     }
155 
addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)156     private void addBigrams(final FusionDictionary dict,
157             final List<String> words,
158             final SparseArray<List<Integer>> bigrams) {
159         for (int i = 0; i < bigrams.size(); ++i) {
160             final int w1 = bigrams.keyAt(i);
161             for (int w2 : bigrams.valueAt(i)) {
162                 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ));
163             }
164         }
165     }
166 
167 //    The following is useful to dump the dictionary into a textual file, but it can't compile
168 //    on-device, so it's commented out.
169 //    private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename)
170 //            throws IOException {
171 //        com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined(
172 //                new java.io.FileWriter(new File(filename)), dict);
173 //    }
174 
timeWritingDictToFile(final File file, final FusionDictionary dict, final FormatSpec.FormatOptions formatOptions)175     private long timeWritingDictToFile(final File file, final FusionDictionary dict,
176             final FormatSpec.FormatOptions formatOptions) {
177 
178         long now = -1, diff = -1;
179 
180         try {
181             final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions);
182 
183             now = System.currentTimeMillis();
184             // If you need to dump the dict to a textual file, uncomment the line below and the
185             // function above
186             // dumpToCombinedFileForDebug(file, "/tmp/foo");
187             dictEncoder.writeDictionary(dict, formatOptions);
188             diff = System.currentTimeMillis() - now;
189         } catch (IOException e) {
190             Log.e(TAG, "IO exception while writing file", e);
191         } catch (UnsupportedFormatException e) {
192             Log.e(TAG, "UnsupportedFormatException", e);
193         }
194 
195         return diff;
196     }
197 
checkDictionary(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcutMap)198     private void checkDictionary(final FusionDictionary dict, final List<String> words,
199             final SparseArray<List<Integer>> bigrams,
200             final HashMap<String, List<String>> shortcutMap) {
201         assertNotNull(dict);
202 
203         // check unigram
204         for (final String word : words) {
205             final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word);
206             assertNotNull(ptNode);
207         }
208 
209         // check bigram
210         for (int i = 0; i < bigrams.size(); ++i) {
211             final int w1 = bigrams.keyAt(i);
212             for (final int w2 : bigrams.valueAt(i)) {
213                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
214                         words.get(w1));
215                 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2)));
216             }
217         }
218 
219         // check shortcut
220         if (shortcutMap != null) {
221             for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) {
222                 assertTrue(words.contains(entry.getKey()));
223                 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray,
224                         entry.getKey());
225                 for (final String word : entry.getValue()) {
226                     assertNotNull("shortcut not found: " + entry.getKey() + ", " + word,
227                             ptNode.getShortcut(word));
228                 }
229             }
230         }
231     }
232 
outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions)233     private String outputOptions(final int bufferType,
234             final FormatSpec.FormatOptions formatOptions) {
235         String result = " : buffer type = "
236                 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array");
237         return result + " : version = " + formatOptions.mVersion;
238     }
239 
240     // Tests for readDictionaryBinary and writeDictionaryBinary
241 
timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcutMap, final int bufferType)242     private long timeReadingAndCheckDict(final File file, final List<String> words,
243             final SparseArray<List<Integer>> bigrams,
244             final HashMap<String, List<String>> shortcutMap, final int bufferType) {
245         long now, diff = -1;
246 
247         FusionDictionary dict = null;
248         try {
249             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
250                     bufferType);
251             now = System.currentTimeMillis();
252             dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
253             diff  = System.currentTimeMillis() - now;
254         } catch (IOException e) {
255             Log.e(TAG, "IOException while reading dictionary", e);
256         } catch (UnsupportedFormatException e) {
257             Log.e(TAG, "Unsupported format", e);
258         }
259 
260         checkDictionary(dict, words, bigrams, shortcutMap);
261         return diff;
262     }
263 
264     // Tests for readDictionaryBinary and writeDictionaryBinary
runReadAndWrite(final List<String> words, final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message)265     private String runReadAndWrite(final List<String> words,
266             final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts,
267             final int bufferType, final FormatSpec.FormatOptions formatOptions,
268             final String message) {
269 
270         final String dictName = "runReadAndWrite";
271         final String dictVersion = Long.toString(System.currentTimeMillis());
272         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
273                 getContext().getCacheDir());
274 
275         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
276                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
277         addUnigrams(words.size(), dict, words, shortcuts);
278         addBigrams(dict, words, bigrams);
279         checkDictionary(dict, words, bigrams, shortcuts);
280 
281         final long write = timeWritingDictToFile(file, dict, formatOptions);
282         final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType);
283 
284         return "PROF: read=" + read + "ms, write=" + write + "ms :" + message
285                 + " : " + outputOptions(bufferType, formatOptions);
286     }
287 
runReadAndWriteTests(final List<String> results, final int bufferType, final FormatSpec.FormatOptions formatOptions)288     private void runReadAndWriteTests(final List<String> results, final int bufferType,
289             final FormatSpec.FormatOptions formatOptions) {
290         results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType,
291                 formatOptions, "unigram"));
292         results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType,
293                 formatOptions, "chain"));
294         results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType,
295                 formatOptions, "star"));
296         results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions,
297                 "unigram with shortcuts"));
298         results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions,
299                 "chain with shortcuts"));
300         results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions,
301                 "star with shortcuts"));
302         results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams,
303                 null /* shortcuts */, bufferType, formatOptions,
304                 "unigram with various code points"));
305     }
306 
307     // Unit test for CharEncoding.readString and CharEncoding.writeString.
testCharEncoding()308     public void testCharEncoding() {
309         // the max length of a word in sWords is less than 50.
310         // See generateWords.
311         final byte[] buffer = new byte[50 * 3];
312         final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer);
313         for (final String word : sWords) {
314             Arrays.fill(buffer, (byte) 0);
315             CharEncoding.writeString(buffer, 0, word);
316             dictBuffer.position(0);
317             final String str = CharEncoding.readString(dictBuffer);
318             assertEquals(word, str);
319         }
320     }
321 
testReadAndWriteWithByteBuffer()322     public void testReadAndWriteWithByteBuffer() {
323         final List<String> results = new ArrayList<>();
324 
325         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
326                 BinaryDictUtils.VERSION2_OPTIONS);
327         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
328                 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
329         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
330                 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
331         for (final String result : results) {
332             Log.d(TAG, result);
333         }
334     }
335 
testReadAndWriteWithByteArray()336     public void testReadAndWriteWithByteArray() {
337         final List<String> results = new ArrayList<>();
338 
339         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
340                 BinaryDictUtils.VERSION2_OPTIONS);
341         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
342                 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP);
343         runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
344                 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP);
345 
346         for (final String result : results) {
347             Log.d(TAG, result);
348         }
349     }
350 
351     // Tests for readUnigramsAndBigramsBinary
352 
checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final TreeMap<Integer, String> resultWords, final TreeMap<Integer, Integer> resultFrequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, final boolean checkProbability)353     private void checkWordMap(final List<String> expectedWords,
354             final SparseArray<List<Integer>> expectedBigrams,
355             final TreeMap<Integer, String> resultWords,
356             final TreeMap<Integer, Integer> resultFrequencies,
357             final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams,
358             final boolean checkProbability) {
359         // check unigrams
360         final Set<String> actualWordsSet = new HashSet<>(resultWords.values());
361         final Set<String> expectedWordsSet = new HashSet<>(expectedWords);
362         assertEquals(actualWordsSet, expectedWordsSet);
363         if (checkProbability) {
364             for (int freq : resultFrequencies.values()) {
365                 assertEquals(freq, UNIGRAM_FREQ);
366             }
367         }
368 
369         // check bigrams
370         final HashMap<String, Set<String>> expBigrams = new HashMap<>();
371         for (int i = 0; i < expectedBigrams.size(); ++i) {
372             final String word1 = expectedWords.get(expectedBigrams.keyAt(i));
373             for (int w2 : expectedBigrams.valueAt(i)) {
374                 if (expBigrams.get(word1) == null) {
375                     expBigrams.put(word1, new HashSet<String>());
376                 }
377                 expBigrams.get(word1).add(expectedWords.get(w2));
378             }
379         }
380 
381         final HashMap<String, Set<String>> actBigrams = new HashMap<>();
382         for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) {
383             final String word1 = resultWords.get(entry.getKey());
384             final int unigramFreq = resultFrequencies.get(entry.getKey());
385             for (PendingAttribute attr : entry.getValue()) {
386                 final String word2 = resultWords.get(attr.mAddress);
387                 if (actBigrams.get(word1) == null) {
388                     actBigrams.put(word1, new HashSet<String>());
389                 }
390                 actBigrams.get(word1).add(word2);
391 
392                 if (checkProbability) {
393                     final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
394                             unigramFreq, attr.mFrequency);
395                     assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
396                 }
397             }
398         }
399         assertEquals(actBigrams, expBigrams);
400     }
401 
402     private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words,
403             final SparseArray<List<Integer>> bigrams, final int bufferType,
404             final boolean checkProbability) {
405         final TreeMap<Integer, String> resultWords = new TreeMap<>();
406         final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>();
407         final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>();
408 
409         long now = -1, diff = -1;
410         try {
411             final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
412                     bufferType);
413             now = System.currentTimeMillis();
414             dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams);
415             diff = System.currentTimeMillis() - now;
416         } catch (IOException e) {
417             Log.e(TAG, "IOException", e);
418         } catch (UnsupportedFormatException e) {
419             Log.e(TAG, "UnsupportedFormatException", e);
420         }
421 
422         checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability);
423         return diff;
424     }
425 
426     private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words,
427             final SparseArray<List<Integer>> bigrams, final int bufferType,
428             final FormatSpec.FormatOptions formatOptions, final String message) {
429         final String dictName = "runReadUnigrams";
430         final String dictVersion = Long.toString(System.currentTimeMillis());
431         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
432                 getContext().getCacheDir());
433 
434         // making the dictionary from lists of words.
435         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
436                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
437         addUnigrams(words.size(), dict, words, null /* shortcutMap */);
438         addBigrams(dict, words, bigrams);
439 
440         timeWritingDictToFile(file, dict, formatOptions);
441 
442         // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the
443         // probability when there's a timestamp for the entry.
444         // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native.
445         long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType,
446                 !formatOptions.mHasTimestamp /* checkProbability */);
447         long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */,
448                 bufferType);
449 
450         return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap
451                 + " : " + message + " : " + outputOptions(bufferType, formatOptions);
452     }
453 
454     private void runReadUnigramsAndBigramsTests(final ArrayList<String> results,
455             final int bufferType, final FormatSpec.FormatOptions formatOptions) {
456         results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType,
457                 formatOptions, "unigram"));
458         results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType,
459                 formatOptions, "chain"));
460         results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType,
461                 formatOptions, "star"));
462     }
463 
464     public void testReadUnigramsAndBigramsBinaryWithByteBuffer() {
465         final ArrayList<String> results = new ArrayList<>();
466 
467         runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER,
468                 BinaryDictUtils.VERSION2_OPTIONS);
469 
470         for (final String result : results) {
471             Log.d(TAG, result);
472         }
473     }
474 
475     public void testReadUnigramsAndBigramsBinaryWithByteArray() {
476         final ArrayList<String> results = new ArrayList<>();
477 
478         runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY,
479                 BinaryDictUtils.VERSION2_OPTIONS);
480 
481         for (final String result : results) {
482             Log.d(TAG, result);
483         }
484     }
485 
486     // Tests for getTerminalPosition
487     private String getWordFromBinary(final DictDecoder dictDecoder, final int address) {
488         if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0);
489 
490         DictionaryHeader fileHeader = null;
491         try {
492             fileHeader = dictDecoder.readHeader();
493         } catch (IOException e) {
494             return null;
495         } catch (UnsupportedFormatException e) {
496             return null;
497         }
498         if (fileHeader == null) return null;
499         return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset,
500                 address).mWord;
501     }
502 
503     private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word,
504             final boolean contained) {
505         long diff = -1;
506         int position = -1;
507         try {
508             final long now = System.nanoTime();
509             position = dictDecoder.getTerminalPosition(word);
510             diff = System.nanoTime() - now;
511         } catch (IOException e) {
512             Log.e(TAG, "IOException while getTerminalPosition", e);
513         } catch (UnsupportedFormatException e) {
514             Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e);
515         }
516 
517         assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
518         if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
519         return diff;
520     }
521 
522     private void runGetTerminalPosition(final ArrayList<String> words,
523             final SparseArray<List<Integer>> bigrams, final int bufferType,
524             final FormatOptions formatOptions, final String message) {
525         final String dictName = "testGetTerminalPosition";
526         final String dictVersion = Long.toString(System.currentTimeMillis());
527         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
528                 getContext().getCacheDir());
529 
530         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
531                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
532         addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
533         addBigrams(dict, words, bigrams);
534         timeWritingDictToFile(file, dict, formatOptions);
535 
536         final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(),
537                 DictDecoder.USE_BYTEARRAY);
538         try {
539             dictDecoder.openDictBuffer();
540         } catch (IOException e) {
541             Log.e(TAG, "IOException while opening the buffer", e);
542         } catch (UnsupportedFormatException e) {
543             Log.e(TAG, "IOException while opening the buffer", e);
544         }
545         assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen());
546 
547         try {
548             // too long word
549             final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
550             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord));
551 
552             // null
553             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null));
554 
555             // empty string
556             assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(""));
557         } catch (IOException e) {
558         } catch (UnsupportedFormatException e) {
559         }
560 
561         // Test a word that is contained within the dictionary.
562         long sum = 0;
563         for (int i = 0; i < sWords.size(); ++i) {
564             final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true);
565             sum += time == -1 ? 0 : time;
566         }
567         Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message
568                 + " : " + outputOptions(bufferType, formatOptions));
569 
570         // Test a word that isn't contained within the dictionary.
571         final Random random = new Random((int)System.currentTimeMillis());
572         final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
573                 random);
574         for (int i = 0; i < 1000; ++i) {
575             final String word = CodePointUtils.generateWord(random, codePointSet);
576             if (sWords.indexOf(word) != -1) continue;
577             checkGetTerminalPosition(dictDecoder, word, false);
578         }
579     }
580 
581     private void runGetTerminalPositionTests(final int bufferType,
582             final FormatOptions formatOptions) {
583         runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram");
584     }
585 
586     public void testGetTerminalPosition() {
587         final ArrayList<String> results = new ArrayList<>();
588 
589         runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY,
590                 BinaryDictUtils.VERSION2_OPTIONS);
591         runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER,
592                 BinaryDictUtils.VERSION2_OPTIONS);
593 
594         for (final String result : results) {
595             Log.d(TAG, result);
596         }
597     }
598 
599     public void testVer2DictGetWordProperty() {
600         final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
601         final ArrayList<String> words = sWords;
602         final HashMap<String, List<String>> shortcuts = sShortcuts;
603         final String dictName = "testGetWordProperty";
604         final String dictVersion = Long.toString(System.currentTimeMillis());
605         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
606                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
607         addUnigrams(words.size(), dict, words, shortcuts);
608         addBigrams(dict, words, sEmptyBigrams);
609         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
610                 getContext().getCacheDir());
611         file.delete();
612         timeWritingDictToFile(file, dict, formatOptions);
613         final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
614                 0 /* offset */, file.length(), true /* useFullEditDistance */,
615                 Locale.ENGLISH, dictName, false /* isUpdatable */);
616         for (final String word : words) {
617             final WordProperty wordProperty = binaryDictionary.getWordProperty(word,
618                     false /* isBeginningOfSentence */);
619             assertEquals(word, wordProperty.mWord);
620             assertEquals(UNIGRAM_FREQ, wordProperty.getProbability());
621             if (shortcuts.containsKey(word)) {
622                 assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size());
623                 final List<String> shortcutList = shortcuts.get(word);
624                 assertTrue(wordProperty.mHasShortcuts);
625                 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
626                     assertTrue(shortcutList.contains(shortcutTarget.mWord));
627                     assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
628                     shortcutList.remove(shortcutTarget.mWord);
629                 }
630                 assertTrue(shortcutList.isEmpty());
631             }
632         }
633     }
634 
635     public void testVer2DictIteration() {
636         final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS;
637         final ArrayList<String> words = sWords;
638         final HashMap<String, List<String>> shortcuts = sShortcuts;
639         final SparseArray<List<Integer>> bigrams = sEmptyBigrams;
640         final String dictName = "testGetWordProperty";
641         final String dictVersion = Long.toString(System.currentTimeMillis());
642         final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
643                 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions));
644         addUnigrams(words.size(), dict, words, shortcuts);
645         addBigrams(dict, words, bigrams);
646         final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
647                 getContext().getCacheDir());
648         timeWritingDictToFile(file, dict, formatOptions);
649         Log.d(TAG, file.getAbsolutePath());
650         final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(),
651                 0 /* offset */, file.length(), true /* useFullEditDistance */,
652                 Locale.ENGLISH, dictName, false /* isUpdatable */);
653 
654         final HashSet<String> wordSet = new HashSet<>(words);
655         final HashSet<Pair<String, String>> bigramSet = new HashSet<>();
656 
657         for (int i = 0; i < words.size(); i++) {
658             final List<Integer> bigramList = bigrams.get(i);
659             if (bigramList != null) {
660                 for (final Integer word1Index : bigramList) {
661                     final String word1 = words.get(word1Index);
662                     bigramSet.add(new Pair<>(words.get(i), word1));
663                 }
664             }
665         }
666         int token = 0;
667         do {
668             final BinaryDictionary.GetNextWordPropertyResult result =
669                     binaryDictionary.getNextWordProperty(token);
670             final WordProperty wordProperty = result.mWordProperty;
671             final String word0 = wordProperty.mWord;
672             assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability);
673             wordSet.remove(word0);
674             if (shortcuts.containsKey(word0)) {
675                 assertEquals(shortcuts.get(word0).size(), wordProperty.mShortcutTargets.size());
676                 final List<String> shortcutList = shortcuts.get(word0);
677                 assertNotNull(wordProperty.mShortcutTargets);
678                 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
679                     assertTrue(shortcutList.contains(shortcutTarget.mWord));
680                     assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability());
681                     shortcutList.remove(shortcutTarget.mWord);
682                 }
683                 assertTrue(shortcutList.isEmpty());
684             }
685             for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
686                 final String word1 = wordProperty.mBigrams.get(j).mWord;
687                 final Pair<String, String> bigram = new Pair<>(word0, word1);
688                 assertTrue(bigramSet.contains(bigram));
689                 bigramSet.remove(bigram);
690             }
691             token = result.mNextToken;
692         } while (token != 0);
693         assertTrue(wordSet.isEmpty());
694         assertTrue(bigramSet.isEmpty());
695     }
696 }
697