1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import android.test.AndroidTestCase; 20 import android.test.suitebuilder.annotation.LargeTest; 21 import android.util.Log; 22 import android.util.Pair; 23 import android.util.SparseArray; 24 25 import com.android.inputmethod.latin.BinaryDictionary; 26 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 27 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 28 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 29 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 31 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 32 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 33 34 import java.io.File; 35 import java.io.IOException; 36 import java.util.ArrayList; 37 import java.util.Arrays; 38 import java.util.HashMap; 39 import java.util.HashSet; 40 import java.util.List; 41 import java.util.Locale; 42 import java.util.Map.Entry; 43 import java.util.Random; 44 import java.util.Set; 45 import java.util.TreeMap; 46 47 /** 48 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. 49 */ 50 @LargeTest 51 public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 52 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 53 private static final int DEFAULT_MAX_UNIGRAMS = 300; 54 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 55 private static final int LARGE_CODE_POINT_SET_SIZE = 300; 56 private static final int UNIGRAM_FREQ = 10; 57 private static final int BIGRAM_FREQ = 50; 58 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 59 private static final int NUM_OF_NODES_HAVING_SHORTCUTS = 50; 60 private static final int NUM_OF_SHORTCUTS = 5; 61 62 private static final ArrayList<String> sWords = new ArrayList<>(); 63 private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>(); 64 private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>(); 65 private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>(); 66 private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>(); 67 private static final HashMap<String, List<String>> sShortcuts = new HashMap<>(); 68 BinaryDictDecoderEncoderTests()69 public BinaryDictDecoderEncoderTests() { 70 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 71 } 72 BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams)73 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 74 super(); 75 BinaryDictionaryUtils.setCurrentTimeForTest(0); 76 Log.e(TAG, "Testing dictionary: seed is " + seed); 77 final Random random = new Random(seed); 78 sWords.clear(); 79 sWordsWithVariousCodePoints.clear(); 80 generateWords(maxUnigrams, random); 81 82 for (int i = 0; i < sWords.size(); ++i) { 83 sChainBigrams.put(i, new ArrayList<Integer>()); 84 if (i > 0) { 85 sChainBigrams.get(i - 1).add(i); 86 } 87 } 88 89 sStarBigrams.put(0, new ArrayList<Integer>()); 90 // MAX - 1 because we added one above already 91 final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); 92 for (int i = 1; i < maxBigrams; ++i) { 93 sStarBigrams.get(0).add(i); 94 } 95 96 sShortcuts.clear(); 97 for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { 98 final int from = Math.abs(random.nextInt()) % sWords.size(); 99 sShortcuts.put(sWords.get(from), new ArrayList<String>()); 100 for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { 101 final int to = Math.abs(random.nextInt()) % sWords.size(); 102 sShortcuts.get(sWords.get(from)).add(sWords.get(to)); 103 } 104 } 105 } 106 107 @Override setUp()108 protected void setUp() throws Exception { 109 super.setUp(); 110 BinaryDictionaryUtils.setCurrentTimeForTest(0); 111 } 112 113 @Override tearDown()114 protected void tearDown() throws Exception { 115 // Quit test mode. 116 BinaryDictionaryUtils.setCurrentTimeForTest(-1); 117 super.tearDown(); 118 } 119 generateWords(final int number, final Random random)120 private void generateWords(final int number, final Random random) { 121 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 122 random); 123 final Set<String> wordSet = new HashSet<>(); 124 while (wordSet.size() < number) { 125 wordSet.add(CodePointUtils.generateWord(random, codePointSet)); 126 } 127 sWords.addAll(wordSet); 128 129 final int[] largeCodePointSet = CodePointUtils.generateCodePointSet( 130 LARGE_CODE_POINT_SET_SIZE, random); 131 wordSet.clear(); 132 while (wordSet.size() < number) { 133 wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet)); 134 } 135 sWordsWithVariousCodePoints.addAll(wordSet); 136 } 137 138 /** 139 * Adds unigrams to the dictionary. 140 */ addUnigrams(final int number, final FusionDictionary dict, final List<String> words, final HashMap<String, List<String>> shortcutMap)141 private void addUnigrams(final int number, final FusionDictionary dict, 142 final List<String> words, final HashMap<String, List<String>> shortcutMap) { 143 for (int i = 0; i < number; ++i) { 144 final String word = words.get(i); 145 final ArrayList<WeightedString> shortcuts = new ArrayList<>(); 146 if (shortcutMap != null && shortcutMap.containsKey(word)) { 147 for (final String shortcut : shortcutMap.get(word)) { 148 shortcuts.add(new WeightedString(shortcut, UNIGRAM_FREQ)); 149 } 150 } 151 dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), 152 (shortcutMap == null) ? null : shortcuts, false /* isNotAWord */); 153 } 154 } 155 addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)156 private void addBigrams(final FusionDictionary dict, 157 final List<String> words, 158 final SparseArray<List<Integer>> bigrams) { 159 for (int i = 0; i < bigrams.size(); ++i) { 160 final int w1 = bigrams.keyAt(i); 161 for (int w2 : bigrams.valueAt(i)) { 162 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); 163 } 164 } 165 } 166 167 // The following is useful to dump the dictionary into a textual file, but it can't compile 168 // on-device, so it's commented out. 169 // private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 170 // throws IOException { 171 // com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 172 // new java.io.FileWriter(new File(filename)), dict); 173 // } 174 timeWritingDictToFile(final File file, final FusionDictionary dict, final FormatSpec.FormatOptions formatOptions)175 private long timeWritingDictToFile(final File file, final FusionDictionary dict, 176 final FormatSpec.FormatOptions formatOptions) { 177 178 long now = -1, diff = -1; 179 180 try { 181 final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); 182 183 now = System.currentTimeMillis(); 184 // If you need to dump the dict to a textual file, uncomment the line below and the 185 // function above 186 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 187 dictEncoder.writeDictionary(dict, formatOptions); 188 diff = System.currentTimeMillis() - now; 189 } catch (IOException e) { 190 Log.e(TAG, "IO exception while writing file", e); 191 } catch (UnsupportedFormatException e) { 192 Log.e(TAG, "UnsupportedFormatException", e); 193 } 194 195 return diff; 196 } 197 checkDictionary(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcutMap)198 private void checkDictionary(final FusionDictionary dict, final List<String> words, 199 final SparseArray<List<Integer>> bigrams, 200 final HashMap<String, List<String>> shortcutMap) { 201 assertNotNull(dict); 202 203 // check unigram 204 for (final String word : words) { 205 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 206 assertNotNull(ptNode); 207 } 208 209 // check bigram 210 for (int i = 0; i < bigrams.size(); ++i) { 211 final int w1 = bigrams.keyAt(i); 212 for (final int w2 : bigrams.valueAt(i)) { 213 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 214 words.get(w1)); 215 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); 216 } 217 } 218 219 // check shortcut 220 if (shortcutMap != null) { 221 for (final Entry<String, List<String>> entry : shortcutMap.entrySet()) { 222 assertTrue(words.contains(entry.getKey())); 223 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 224 entry.getKey()); 225 for (final String word : entry.getValue()) { 226 assertNotNull("shortcut not found: " + entry.getKey() + ", " + word, 227 ptNode.getShortcut(word)); 228 } 229 } 230 } 231 } 232 outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions)233 private String outputOptions(final int bufferType, 234 final FormatSpec.FormatOptions formatOptions) { 235 String result = " : buffer type = " 236 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 237 return result + " : version = " + formatOptions.mVersion; 238 } 239 240 // Tests for readDictionaryBinary and writeDictionaryBinary 241 timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcutMap, final int bufferType)242 private long timeReadingAndCheckDict(final File file, final List<String> words, 243 final SparseArray<List<Integer>> bigrams, 244 final HashMap<String, List<String>> shortcutMap, final int bufferType) { 245 long now, diff = -1; 246 247 FusionDictionary dict = null; 248 try { 249 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 250 bufferType); 251 now = System.currentTimeMillis(); 252 dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); 253 diff = System.currentTimeMillis() - now; 254 } catch (IOException e) { 255 Log.e(TAG, "IOException while reading dictionary", e); 256 } catch (UnsupportedFormatException e) { 257 Log.e(TAG, "Unsupported format", e); 258 } 259 260 checkDictionary(dict, words, bigrams, shortcutMap); 261 return diff; 262 } 263 264 // Tests for readDictionaryBinary and writeDictionaryBinary runReadAndWrite(final List<String> words, final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message)265 private String runReadAndWrite(final List<String> words, 266 final SparseArray<List<Integer>> bigrams, final HashMap<String, List<String>> shortcuts, 267 final int bufferType, final FormatSpec.FormatOptions formatOptions, 268 final String message) { 269 270 final String dictName = "runReadAndWrite"; 271 final String dictVersion = Long.toString(System.currentTimeMillis()); 272 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 273 getContext().getCacheDir()); 274 275 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 276 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 277 addUnigrams(words.size(), dict, words, shortcuts); 278 addBigrams(dict, words, bigrams); 279 checkDictionary(dict, words, bigrams, shortcuts); 280 281 final long write = timeWritingDictToFile(file, dict, formatOptions); 282 final long read = timeReadingAndCheckDict(file, words, bigrams, shortcuts, bufferType); 283 284 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 285 + " : " + outputOptions(bufferType, formatOptions); 286 } 287 runReadAndWriteTests(final List<String> results, final int bufferType, final FormatSpec.FormatOptions formatOptions)288 private void runReadAndWriteTests(final List<String> results, final int bufferType, 289 final FormatSpec.FormatOptions formatOptions) { 290 results.add(runReadAndWrite(sWords, sEmptyBigrams, null /* shortcuts */, bufferType, 291 formatOptions, "unigram")); 292 results.add(runReadAndWrite(sWords, sChainBigrams, null /* shortcuts */, bufferType, 293 formatOptions, "chain")); 294 results.add(runReadAndWrite(sWords, sStarBigrams, null /* shortcuts */, bufferType, 295 formatOptions, "star")); 296 results.add(runReadAndWrite(sWords, sEmptyBigrams, sShortcuts, bufferType, formatOptions, 297 "unigram with shortcuts")); 298 results.add(runReadAndWrite(sWords, sChainBigrams, sShortcuts, bufferType, formatOptions, 299 "chain with shortcuts")); 300 results.add(runReadAndWrite(sWords, sStarBigrams, sShortcuts, bufferType, formatOptions, 301 "star with shortcuts")); 302 results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams, 303 null /* shortcuts */, bufferType, formatOptions, 304 "unigram with various code points")); 305 } 306 307 // Unit test for CharEncoding.readString and CharEncoding.writeString. testCharEncoding()308 public void testCharEncoding() { 309 // the max length of a word in sWords is less than 50. 310 // See generateWords. 311 final byte[] buffer = new byte[50 * 3]; 312 final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); 313 for (final String word : sWords) { 314 Arrays.fill(buffer, (byte) 0); 315 CharEncoding.writeString(buffer, 0, word); 316 dictBuffer.position(0); 317 final String str = CharEncoding.readString(dictBuffer); 318 assertEquals(word, str); 319 } 320 } 321 testReadAndWriteWithByteBuffer()322 public void testReadAndWriteWithByteBuffer() { 323 final List<String> results = new ArrayList<>(); 324 325 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 326 BinaryDictUtils.VERSION2_OPTIONS); 327 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 328 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); 329 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 330 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); 331 for (final String result : results) { 332 Log.d(TAG, result); 333 } 334 } 335 testReadAndWriteWithByteArray()336 public void testReadAndWriteWithByteArray() { 337 final List<String> results = new ArrayList<>(); 338 339 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 340 BinaryDictUtils.VERSION2_OPTIONS); 341 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 342 BinaryDictUtils.VERSION4_OPTIONS_WITHOUT_TIMESTAMP); 343 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 344 BinaryDictUtils.VERSION4_OPTIONS_WITH_TIMESTAMP); 345 346 for (final String result : results) { 347 Log.d(TAG, result); 348 } 349 } 350 351 // Tests for readUnigramsAndBigramsBinary 352 checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final TreeMap<Integer, String> resultWords, final TreeMap<Integer, Integer> resultFrequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, final boolean checkProbability)353 private void checkWordMap(final List<String> expectedWords, 354 final SparseArray<List<Integer>> expectedBigrams, 355 final TreeMap<Integer, String> resultWords, 356 final TreeMap<Integer, Integer> resultFrequencies, 357 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, 358 final boolean checkProbability) { 359 // check unigrams 360 final Set<String> actualWordsSet = new HashSet<>(resultWords.values()); 361 final Set<String> expectedWordsSet = new HashSet<>(expectedWords); 362 assertEquals(actualWordsSet, expectedWordsSet); 363 if (checkProbability) { 364 for (int freq : resultFrequencies.values()) { 365 assertEquals(freq, UNIGRAM_FREQ); 366 } 367 } 368 369 // check bigrams 370 final HashMap<String, Set<String>> expBigrams = new HashMap<>(); 371 for (int i = 0; i < expectedBigrams.size(); ++i) { 372 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 373 for (int w2 : expectedBigrams.valueAt(i)) { 374 if (expBigrams.get(word1) == null) { 375 expBigrams.put(word1, new HashSet<String>()); 376 } 377 expBigrams.get(word1).add(expectedWords.get(w2)); 378 } 379 } 380 381 final HashMap<String, Set<String>> actBigrams = new HashMap<>(); 382 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 383 final String word1 = resultWords.get(entry.getKey()); 384 final int unigramFreq = resultFrequencies.get(entry.getKey()); 385 for (PendingAttribute attr : entry.getValue()) { 386 final String word2 = resultWords.get(attr.mAddress); 387 if (actBigrams.get(word1) == null) { 388 actBigrams.put(word1, new HashSet<String>()); 389 } 390 actBigrams.get(word1).add(word2); 391 392 if (checkProbability) { 393 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 394 unigramFreq, attr.mFrequency); 395 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 396 } 397 } 398 } 399 assertEquals(actBigrams, expBigrams); 400 } 401 402 private long timeAndCheckReadUnigramsAndBigramsBinary(final File file, final List<String> words, 403 final SparseArray<List<Integer>> bigrams, final int bufferType, 404 final boolean checkProbability) { 405 final TreeMap<Integer, String> resultWords = new TreeMap<>(); 406 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>(); 407 final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>(); 408 409 long now = -1, diff = -1; 410 try { 411 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 412 bufferType); 413 now = System.currentTimeMillis(); 414 dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); 415 diff = System.currentTimeMillis() - now; 416 } catch (IOException e) { 417 Log.e(TAG, "IOException", e); 418 } catch (UnsupportedFormatException e) { 419 Log.e(TAG, "UnsupportedFormatException", e); 420 } 421 422 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); 423 return diff; 424 } 425 426 private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, 427 final SparseArray<List<Integer>> bigrams, final int bufferType, 428 final FormatSpec.FormatOptions formatOptions, final String message) { 429 final String dictName = "runReadUnigrams"; 430 final String dictVersion = Long.toString(System.currentTimeMillis()); 431 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 432 getContext().getCacheDir()); 433 434 // making the dictionary from lists of words. 435 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 436 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 437 addUnigrams(words.size(), dict, words, null /* shortcutMap */); 438 addBigrams(dict, words, bigrams); 439 440 timeWritingDictToFile(file, dict, formatOptions); 441 442 // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the 443 // probability when there's a timestamp for the entry. 444 // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. 445 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, 446 !formatOptions.mHasTimestamp /* checkProbability */); 447 long fullReading = timeReadingAndCheckDict(file, words, bigrams, null /* shortcutMap */, 448 bufferType); 449 450 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 451 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 452 } 453 454 private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, 455 final int bufferType, final FormatSpec.FormatOptions formatOptions) { 456 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 457 formatOptions, "unigram")); 458 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 459 formatOptions, "chain")); 460 results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, 461 formatOptions, "star")); 462 } 463 464 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 465 final ArrayList<String> results = new ArrayList<>(); 466 467 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 468 BinaryDictUtils.VERSION2_OPTIONS); 469 470 for (final String result : results) { 471 Log.d(TAG, result); 472 } 473 } 474 475 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 476 final ArrayList<String> results = new ArrayList<>(); 477 478 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 479 BinaryDictUtils.VERSION2_OPTIONS); 480 481 for (final String result : results) { 482 Log.d(TAG, result); 483 } 484 } 485 486 // Tests for getTerminalPosition 487 private String getWordFromBinary(final DictDecoder dictDecoder, final int address) { 488 if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); 489 490 DictionaryHeader fileHeader = null; 491 try { 492 fileHeader = dictDecoder.readHeader(); 493 } catch (IOException e) { 494 return null; 495 } catch (UnsupportedFormatException e) { 496 return null; 497 } 498 if (fileHeader == null) return null; 499 return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, 500 address).mWord; 501 } 502 503 private long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, 504 final boolean contained) { 505 long diff = -1; 506 int position = -1; 507 try { 508 final long now = System.nanoTime(); 509 position = dictDecoder.getTerminalPosition(word); 510 diff = System.nanoTime() - now; 511 } catch (IOException e) { 512 Log.e(TAG, "IOException while getTerminalPosition", e); 513 } catch (UnsupportedFormatException e) { 514 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 515 } 516 517 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 518 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 519 return diff; 520 } 521 522 private void runGetTerminalPosition(final ArrayList<String> words, 523 final SparseArray<List<Integer>> bigrams, final int bufferType, 524 final FormatOptions formatOptions, final String message) { 525 final String dictName = "testGetTerminalPosition"; 526 final String dictVersion = Long.toString(System.currentTimeMillis()); 527 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 528 getContext().getCacheDir()); 529 530 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 531 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 532 addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); 533 addBigrams(dict, words, bigrams); 534 timeWritingDictToFile(file, dict, formatOptions); 535 536 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 537 DictDecoder.USE_BYTEARRAY); 538 try { 539 dictDecoder.openDictBuffer(); 540 } catch (IOException e) { 541 Log.e(TAG, "IOException while opening the buffer", e); 542 } catch (UnsupportedFormatException e) { 543 Log.e(TAG, "IOException while opening the buffer", e); 544 } 545 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 546 547 try { 548 // too long word 549 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 550 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); 551 552 // null 553 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); 554 555 // empty string 556 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); 557 } catch (IOException e) { 558 } catch (UnsupportedFormatException e) { 559 } 560 561 // Test a word that is contained within the dictionary. 562 long sum = 0; 563 for (int i = 0; i < sWords.size(); ++i) { 564 final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); 565 sum += time == -1 ? 0 : time; 566 } 567 Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message 568 + " : " + outputOptions(bufferType, formatOptions)); 569 570 // Test a word that isn't contained within the dictionary. 571 final Random random = new Random((int)System.currentTimeMillis()); 572 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 573 random); 574 for (int i = 0; i < 1000; ++i) { 575 final String word = CodePointUtils.generateWord(random, codePointSet); 576 if (sWords.indexOf(word) != -1) continue; 577 checkGetTerminalPosition(dictDecoder, word, false); 578 } 579 } 580 581 private void runGetTerminalPositionTests(final int bufferType, 582 final FormatOptions formatOptions) { 583 runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); 584 } 585 586 public void testGetTerminalPosition() { 587 final ArrayList<String> results = new ArrayList<>(); 588 589 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, 590 BinaryDictUtils.VERSION2_OPTIONS); 591 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, 592 BinaryDictUtils.VERSION2_OPTIONS); 593 594 for (final String result : results) { 595 Log.d(TAG, result); 596 } 597 } 598 599 public void testVer2DictGetWordProperty() { 600 final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; 601 final ArrayList<String> words = sWords; 602 final HashMap<String, List<String>> shortcuts = sShortcuts; 603 final String dictName = "testGetWordProperty"; 604 final String dictVersion = Long.toString(System.currentTimeMillis()); 605 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 606 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 607 addUnigrams(words.size(), dict, words, shortcuts); 608 addBigrams(dict, words, sEmptyBigrams); 609 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 610 getContext().getCacheDir()); 611 file.delete(); 612 timeWritingDictToFile(file, dict, formatOptions); 613 final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 614 0 /* offset */, file.length(), true /* useFullEditDistance */, 615 Locale.ENGLISH, dictName, false /* isUpdatable */); 616 for (final String word : words) { 617 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 618 false /* isBeginningOfSentence */); 619 assertEquals(word, wordProperty.mWord); 620 assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); 621 if (shortcuts.containsKey(word)) { 622 assertEquals(shortcuts.get(word).size(), wordProperty.mShortcutTargets.size()); 623 final List<String> shortcutList = shortcuts.get(word); 624 assertTrue(wordProperty.mHasShortcuts); 625 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 626 assertTrue(shortcutList.contains(shortcutTarget.mWord)); 627 assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability()); 628 shortcutList.remove(shortcutTarget.mWord); 629 } 630 assertTrue(shortcutList.isEmpty()); 631 } 632 } 633 } 634 635 public void testVer2DictIteration() { 636 final FormatOptions formatOptions = BinaryDictUtils.VERSION2_OPTIONS; 637 final ArrayList<String> words = sWords; 638 final HashMap<String, List<String>> shortcuts = sShortcuts; 639 final SparseArray<List<Integer>> bigrams = sEmptyBigrams; 640 final String dictName = "testGetWordProperty"; 641 final String dictVersion = Long.toString(System.currentTimeMillis()); 642 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 643 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 644 addUnigrams(words.size(), dict, words, shortcuts); 645 addBigrams(dict, words, bigrams); 646 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 647 getContext().getCacheDir()); 648 timeWritingDictToFile(file, dict, formatOptions); 649 Log.d(TAG, file.getAbsolutePath()); 650 final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 651 0 /* offset */, file.length(), true /* useFullEditDistance */, 652 Locale.ENGLISH, dictName, false /* isUpdatable */); 653 654 final HashSet<String> wordSet = new HashSet<>(words); 655 final HashSet<Pair<String, String>> bigramSet = new HashSet<>(); 656 657 for (int i = 0; i < words.size(); i++) { 658 final List<Integer> bigramList = bigrams.get(i); 659 if (bigramList != null) { 660 for (final Integer word1Index : bigramList) { 661 final String word1 = words.get(word1Index); 662 bigramSet.add(new Pair<>(words.get(i), word1)); 663 } 664 } 665 } 666 int token = 0; 667 do { 668 final BinaryDictionary.GetNextWordPropertyResult result = 669 binaryDictionary.getNextWordProperty(token); 670 final WordProperty wordProperty = result.mWordProperty; 671 final String word0 = wordProperty.mWord; 672 assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability); 673 wordSet.remove(word0); 674 if (shortcuts.containsKey(word0)) { 675 assertEquals(shortcuts.get(word0).size(), wordProperty.mShortcutTargets.size()); 676 final List<String> shortcutList = shortcuts.get(word0); 677 assertNotNull(wordProperty.mShortcutTargets); 678 for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { 679 assertTrue(shortcutList.contains(shortcutTarget.mWord)); 680 assertEquals(UNIGRAM_FREQ, shortcutTarget.getProbability()); 681 shortcutList.remove(shortcutTarget.mWord); 682 } 683 assertTrue(shortcutList.isEmpty()); 684 } 685 for (int j = 0; j < wordProperty.mBigrams.size(); j++) { 686 final String word1 = wordProperty.mBigrams.get(j).mWord; 687 final Pair<String, String> bigram = new Pair<>(word0, word1); 688 assertTrue(bigramSet.contains(bigram)); 689 bigramSet.remove(bigram); 690 } 691 token = result.mNextToken; 692 } while (token != 0); 693 assertTrue(wordSet.isEmpty()); 694 assertTrue(bigramSet.isEmpty()); 695 } 696 } 697