1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin.makedict; 18 19 import android.test.AndroidTestCase; 20 import android.util.Log; 21 import android.util.Pair; 22 import android.util.SparseArray; 23 24 import com.android.inputmethod.latin.BinaryDictionary; 25 import com.android.inputmethod.latin.common.CodePointUtils; 26 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; 27 import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; 28 import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; 29 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; 30 import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; 31 import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; 32 import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; 33 34 import java.io.File; 35 import java.io.IOException; 36 import java.util.ArrayList; 37 import java.util.Arrays; 38 import java.util.HashMap; 39 import java.util.HashSet; 40 import java.util.List; 41 import java.util.Locale; 42 import java.util.Map.Entry; 43 import java.util.Random; 44 import java.util.Set; 45 import java.util.TreeMap; 46 47 /** 48 * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoderUtils. 49 */ 50 public class BinaryDictDecoderEncoderTests extends AndroidTestCase { 51 private static final String TAG = BinaryDictDecoderEncoderTests.class.getSimpleName(); 52 private static final int DEFAULT_MAX_UNIGRAMS = 300; 53 private static final int DEFAULT_CODE_POINT_SET_SIZE = 50; 54 private static final int LARGE_CODE_POINT_SET_SIZE = 300; 55 private static final int UNIGRAM_FREQ = 10; 56 private static final int BIGRAM_FREQ = 50; 57 private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; 58 59 private static final ArrayList<String> sWords = new ArrayList<>(); 60 private static final ArrayList<String> sWordsWithVariousCodePoints = new ArrayList<>(); 61 private static final SparseArray<List<Integer>> sEmptyBigrams = new SparseArray<>(); 62 private static final SparseArray<List<Integer>> sStarBigrams = new SparseArray<>(); 63 private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>(); 64 65 final Random mRandom; 66 BinaryDictDecoderEncoderTests()67 public BinaryDictDecoderEncoderTests() { 68 this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); 69 } 70 BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams)71 public BinaryDictDecoderEncoderTests(final long seed, final int maxUnigrams) { 72 super(); 73 BinaryDictionaryUtils.setCurrentTimeForTest(0); 74 Log.e(TAG, "Testing dictionary: seed is " + seed); 75 mRandom = new Random(seed); 76 sWords.clear(); 77 sWordsWithVariousCodePoints.clear(); 78 generateWords(maxUnigrams, mRandom); 79 80 for (int i = 0; i < sWords.size(); ++i) { 81 sChainBigrams.put(i, new ArrayList<Integer>()); 82 if (i > 0) { 83 sChainBigrams.get(i - 1).add(i); 84 } 85 } 86 87 sStarBigrams.put(0, new ArrayList<Integer>()); 88 // MAX - 1 because we added one above already 89 final int maxBigrams = Math.min(sWords.size(), FormatSpec.MAX_BIGRAMS_IN_A_PTNODE - 1); 90 for (int i = 1; i < maxBigrams; ++i) { 91 sStarBigrams.get(0).add(i); 92 } 93 } 94 95 @Override setUp()96 protected void setUp() throws Exception { 97 super.setUp(); 98 BinaryDictionaryUtils.setCurrentTimeForTest(0); 99 } 100 101 @Override tearDown()102 protected void tearDown() throws Exception { 103 // Quit test mode. 104 BinaryDictionaryUtils.setCurrentTimeForTest(-1); 105 super.tearDown(); 106 } 107 generateWords(final int number, final Random random)108 private static void generateWords(final int number, final Random random) { 109 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 110 random); 111 final Set<String> wordSet = new HashSet<>(); 112 while (wordSet.size() < number) { 113 wordSet.add(CodePointUtils.generateWord(random, codePointSet)); 114 } 115 sWords.addAll(wordSet); 116 117 final int[] largeCodePointSet = CodePointUtils.generateCodePointSet( 118 LARGE_CODE_POINT_SET_SIZE, random); 119 wordSet.clear(); 120 while (wordSet.size() < number) { 121 wordSet.add(CodePointUtils.generateWord(random, largeCodePointSet)); 122 } 123 sWordsWithVariousCodePoints.addAll(wordSet); 124 } 125 126 /** 127 * Adds unigrams to the dictionary. 128 */ addUnigrams(final int number, final FusionDictionary dict, final List<String> words)129 private static void addUnigrams(final int number, final FusionDictionary dict, 130 final List<String> words) { 131 for (int i = 0; i < number; ++i) { 132 final String word = words.get(i); 133 final ArrayList<WeightedString> shortcuts = new ArrayList<>(); 134 dict.add(word, new ProbabilityInfo(UNIGRAM_FREQ), false /* isNotAWord */, 135 false /* isPossiblyOffensive */); 136 } 137 } 138 addBigrams(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)139 private static void addBigrams(final FusionDictionary dict, 140 final List<String> words, 141 final SparseArray<List<Integer>> bigrams) { 142 for (int i = 0; i < bigrams.size(); ++i) { 143 final int w1 = bigrams.keyAt(i); 144 for (int w2 : bigrams.valueAt(i)) { 145 dict.setBigram(words.get(w1), words.get(w2), new ProbabilityInfo(BIGRAM_FREQ)); 146 } 147 } 148 } 149 150 // The following is useful to dump the dictionary into a textual file, but it can't compile 151 // on-device, so it's commented out. 152 // private void dumpToCombinedFileForDebug(final FusionDictionary dict, final String filename) 153 // throws IOException { 154 // com.android.inputmethod.latin.dicttool.CombinedInputOutput.writeDictionaryCombined( 155 // new java.io.FileWriter(new File(filename)), dict); 156 // } 157 timeWritingDictToFile(final File file, final FusionDictionary dict, final FormatSpec.FormatOptions formatOptions)158 private static long timeWritingDictToFile(final File file, final FusionDictionary dict, 159 final FormatSpec.FormatOptions formatOptions) { 160 161 long now = -1, diff = -1; 162 163 try { 164 final DictEncoder dictEncoder = BinaryDictUtils.getDictEncoder(file, formatOptions); 165 166 now = System.currentTimeMillis(); 167 // If you need to dump the dict to a textual file, uncomment the line below and the 168 // function above 169 // dumpToCombinedFileForDebug(file, "/tmp/foo"); 170 dictEncoder.writeDictionary(dict, formatOptions); 171 diff = System.currentTimeMillis() - now; 172 } catch (IOException e) { 173 Log.e(TAG, "IO exception while writing file", e); 174 } catch (UnsupportedFormatException e) { 175 Log.e(TAG, "UnsupportedFormatException", e); 176 } 177 178 return diff; 179 } 180 checkDictionary(final FusionDictionary dict, final List<String> words, final SparseArray<List<Integer>> bigrams)181 private static void checkDictionary(final FusionDictionary dict, final List<String> words, 182 final SparseArray<List<Integer>> bigrams) { 183 assertNotNull(dict); 184 185 // check unigram 186 for (final String word : words) { 187 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, word); 188 assertNotNull(ptNode); 189 } 190 191 // check bigram 192 for (int i = 0; i < bigrams.size(); ++i) { 193 final int w1 = bigrams.keyAt(i); 194 for (final int w2 : bigrams.valueAt(i)) { 195 final PtNode ptNode = FusionDictionary.findWordInTree(dict.mRootNodeArray, 196 words.get(w1)); 197 assertNotNull(words.get(w1) + "," + words.get(w2), ptNode.getBigram(words.get(w2))); 198 } 199 } 200 } 201 outputOptions(final int bufferType, final FormatSpec.FormatOptions formatOptions)202 private static String outputOptions(final int bufferType, 203 final FormatSpec.FormatOptions formatOptions) { 204 final String result = " : buffer type = " 205 + ((bufferType == BinaryDictUtils.USE_BYTE_BUFFER) ? "byte buffer" : "byte array"); 206 return result + " : version = " + formatOptions.mVersion; 207 } 208 209 // Tests for readDictionaryBinary and writeDictionaryBinary 210 timeReadingAndCheckDict(final File file, final List<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType)211 private static long timeReadingAndCheckDict(final File file, final List<String> words, 212 final SparseArray<List<Integer>> bigrams, final int bufferType) { 213 long now, diff = -1; 214 215 FusionDictionary dict = null; 216 try { 217 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 218 bufferType); 219 now = System.currentTimeMillis(); 220 dict = dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); 221 diff = System.currentTimeMillis() - now; 222 } catch (IOException e) { 223 Log.e(TAG, "IOException while reading dictionary", e); 224 } catch (UnsupportedFormatException e) { 225 Log.e(TAG, "Unsupported format", e); 226 } 227 228 checkDictionary(dict, words, bigrams); 229 return diff; 230 } 231 232 // Tests for readDictionaryBinary and writeDictionaryBinary runReadAndWrite(final List<String> words, final SparseArray<List<Integer>> bigrams, final int bufferType, final FormatSpec.FormatOptions formatOptions, final String message)233 private String runReadAndWrite(final List<String> words, 234 final SparseArray<List<Integer>> bigrams, 235 final int bufferType, final FormatSpec.FormatOptions formatOptions, 236 final String message) { 237 238 final String dictName = "runReadAndWrite"; 239 final String dictVersion = Long.toString(System.currentTimeMillis()); 240 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 241 getContext().getCacheDir()); 242 243 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 244 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 245 addUnigrams(words.size(), dict, words); 246 addBigrams(dict, words, bigrams); 247 checkDictionary(dict, words, bigrams); 248 249 final long write = timeWritingDictToFile(file, dict, formatOptions); 250 final long read = timeReadingAndCheckDict(file, words, bigrams, bufferType); 251 252 return "PROF: read=" + read + "ms, write=" + write + "ms :" + message 253 + " : " + outputOptions(bufferType, formatOptions); 254 } 255 runReadAndWriteTests(final List<String> results, final int bufferType, final FormatSpec.FormatOptions formatOptions)256 private void runReadAndWriteTests(final List<String> results, final int bufferType, 257 final FormatSpec.FormatOptions formatOptions) { 258 results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, 259 formatOptions, "unigram")); 260 results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, 261 formatOptions, "chain")); 262 results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, 263 formatOptions, "star")); 264 results.add(runReadAndWrite(sWords, sEmptyBigrams, bufferType, formatOptions, 265 "unigram with shortcuts")); 266 results.add(runReadAndWrite(sWords, sChainBigrams, bufferType, formatOptions, 267 "chain with shortcuts")); 268 results.add(runReadAndWrite(sWords, sStarBigrams, bufferType, formatOptions, 269 "star with shortcuts")); 270 results.add(runReadAndWrite(sWordsWithVariousCodePoints, sEmptyBigrams, 271 bufferType, formatOptions, 272 "unigram with various code points")); 273 } 274 testCharacterTableIsPresent()275 public void testCharacterTableIsPresent() throws IOException, UnsupportedFormatException { 276 final String[] wordSource = {"words", "used", "for", "testing", "a", "code point", "table"}; 277 final List<String> words = Arrays.asList(wordSource); 278 final String correctCodePointTable = "toesdrniawuplgfcb "; 279 final String dictName = "codePointTableTest"; 280 final String dictVersion = Long.toString(System.currentTimeMillis()); 281 final String codePointTableAttribute = DictionaryHeader.CODE_POINT_TABLE_KEY; 282 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, 283 BinaryDictUtils.STATIC_OPTIONS, getContext().getCacheDir()); 284 285 // Write a test dictionary 286 final DictEncoder dictEncoder = new Ver2DictEncoder(file, 287 Ver2DictEncoder.CODE_POINT_TABLE_ON); 288 final FormatSpec.FormatOptions formatOptions = 289 new FormatSpec.FormatOptions( 290 FormatSpec.MINIMUM_SUPPORTED_STATIC_VERSION); 291 final FusionDictionary sourcedict = new FusionDictionary(new PtNodeArray(), 292 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 293 addUnigrams(words.size(), sourcedict, words); 294 dictEncoder.writeDictionary(sourcedict, formatOptions); 295 296 // Read the dictionary 297 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 298 DictDecoder.USE_BYTEARRAY); 299 final DictionaryHeader fileHeader = dictDecoder.readHeader(); 300 // Check if codePointTable is present 301 assertTrue("codePointTable is not present", 302 fileHeader.mDictionaryOptions.mAttributes.containsKey(codePointTableAttribute)); 303 final String codePointTable = 304 fileHeader.mDictionaryOptions.mAttributes.get(codePointTableAttribute); 305 // Check if codePointTable is correct 306 assertEquals("codePointTable is incorrect", codePointTable, correctCodePointTable); 307 } 308 309 // Unit test for CharEncoding.readString and CharEncoding.writeString. testCharEncoding()310 public void testCharEncoding() { 311 // the max length of a word in sWords is less than 50. 312 // See generateWords. 313 final byte[] buffer = new byte[50 * 3]; 314 final DictBuffer dictBuffer = new ByteArrayDictBuffer(buffer); 315 for (final String word : sWords) { 316 Arrays.fill(buffer, (byte) 0); 317 CharEncoding.writeString(buffer, 0, word, null); 318 dictBuffer.position(0); 319 final String str = CharEncoding.readString(dictBuffer); 320 assertEquals(word, str); 321 } 322 } 323 testReadAndWriteWithByteBuffer()324 public void testReadAndWriteWithByteBuffer() { 325 final List<String> results = new ArrayList<>(); 326 327 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 328 BinaryDictUtils.STATIC_OPTIONS); 329 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 330 BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); 331 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 332 BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); 333 for (final String result : results) { 334 Log.d(TAG, result); 335 } 336 } 337 testReadAndWriteWithByteArray()338 public void testReadAndWriteWithByteArray() { 339 final List<String> results = new ArrayList<>(); 340 341 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 342 BinaryDictUtils.STATIC_OPTIONS); 343 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 344 BinaryDictUtils.DYNAMIC_OPTIONS_WITHOUT_TIMESTAMP); 345 runReadAndWriteTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 346 BinaryDictUtils.DYNAMIC_OPTIONS_WITH_TIMESTAMP); 347 348 for (final String result : results) { 349 Log.d(TAG, result); 350 } 351 } 352 353 // Tests for readUnigramsAndBigramsBinary 354 checkWordMap(final List<String> expectedWords, final SparseArray<List<Integer>> expectedBigrams, final TreeMap<Integer, String> resultWords, final TreeMap<Integer, Integer> resultFrequencies, final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, final boolean checkProbability)355 private static void checkWordMap(final List<String> expectedWords, 356 final SparseArray<List<Integer>> expectedBigrams, 357 final TreeMap<Integer, String> resultWords, 358 final TreeMap<Integer, Integer> resultFrequencies, 359 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams, 360 final boolean checkProbability) { 361 // check unigrams 362 final Set<String> actualWordsSet = new HashSet<>(resultWords.values()); 363 final Set<String> expectedWordsSet = new HashSet<>(expectedWords); 364 assertEquals(actualWordsSet, expectedWordsSet); 365 if (checkProbability) { 366 for (int freq : resultFrequencies.values()) { 367 assertEquals(freq, UNIGRAM_FREQ); 368 } 369 } 370 371 // check bigrams 372 final HashMap<String, Set<String>> expBigrams = new HashMap<>(); 373 for (int i = 0; i < expectedBigrams.size(); ++i) { 374 final String word1 = expectedWords.get(expectedBigrams.keyAt(i)); 375 for (int w2 : expectedBigrams.valueAt(i)) { 376 if (expBigrams.get(word1) == null) { 377 expBigrams.put(word1, new HashSet<String>()); 378 } 379 expBigrams.get(word1).add(expectedWords.get(w2)); 380 } 381 } 382 383 final HashMap<String, Set<String>> actBigrams = new HashMap<>(); 384 for (Entry<Integer, ArrayList<PendingAttribute>> entry : resultBigrams.entrySet()) { 385 final String word1 = resultWords.get(entry.getKey()); 386 final int unigramFreq = resultFrequencies.get(entry.getKey()); 387 for (PendingAttribute attr : entry.getValue()) { 388 final String word2 = resultWords.get(attr.mAddress); 389 if (actBigrams.get(word1) == null) { 390 actBigrams.put(word1, new HashSet<String>()); 391 } 392 actBigrams.get(word1).add(word2); 393 394 if (checkProbability) { 395 final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( 396 unigramFreq, attr.mFrequency); 397 assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); 398 } 399 } 400 } 401 assertEquals(actBigrams, expBigrams); 402 } 403 404 private static long timeAndCheckReadUnigramsAndBigramsBinary(final File file, 405 final List<String> words, final SparseArray<List<Integer>> bigrams, 406 final int bufferType, final boolean checkProbability) { 407 final TreeMap<Integer, String> resultWords = new TreeMap<>(); 408 final TreeMap<Integer, ArrayList<PendingAttribute>> resultBigrams = new TreeMap<>(); 409 final TreeMap<Integer, Integer> resultFreqs = new TreeMap<>(); 410 411 long now = -1, diff = -1; 412 try { 413 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 414 bufferType); 415 now = System.currentTimeMillis(); 416 dictDecoder.readUnigramsAndBigramsBinary(resultWords, resultFreqs, resultBigrams); 417 diff = System.currentTimeMillis() - now; 418 } catch (IOException e) { 419 Log.e(TAG, "IOException", e); 420 } catch (UnsupportedFormatException e) { 421 Log.e(TAG, "UnsupportedFormatException", e); 422 } 423 424 checkWordMap(words, bigrams, resultWords, resultFreqs, resultBigrams, checkProbability); 425 return diff; 426 } 427 428 private String runReadUnigramsAndBigramsBinary(final ArrayList<String> words, 429 final SparseArray<List<Integer>> bigrams, final int bufferType, 430 final FormatSpec.FormatOptions formatOptions, final String message) { 431 final String dictName = "runReadUnigrams"; 432 final String dictVersion = Long.toString(System.currentTimeMillis()); 433 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 434 getContext().getCacheDir()); 435 436 // making the dictionary from lists of words. 437 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 438 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 439 addUnigrams(words.size(), dict, words); 440 addBigrams(dict, words, bigrams); 441 442 timeWritingDictToFile(file, dict, formatOptions); 443 444 // Caveat: Currently, the Java code to read a v4 dictionary doesn't calculate the 445 // probability when there's a timestamp for the entry. 446 // TODO: Abandon the Java code, and implement the v4 dictionary reading code in native. 447 long wordMap = timeAndCheckReadUnigramsAndBigramsBinary(file, words, bigrams, bufferType, 448 !formatOptions.mHasTimestamp /* checkProbability */); 449 long fullReading = timeReadingAndCheckDict(file, words, bigrams, 450 bufferType); 451 452 return "readDictionaryBinary=" + fullReading + ", readUnigramsAndBigramsBinary=" + wordMap 453 + " : " + message + " : " + outputOptions(bufferType, formatOptions); 454 } 455 456 private void runReadUnigramsAndBigramsTests(final ArrayList<String> results, 457 final int bufferType, final FormatSpec.FormatOptions formatOptions) { 458 results.add(runReadUnigramsAndBigramsBinary(sWords, sEmptyBigrams, bufferType, 459 formatOptions, "unigram")); 460 results.add(runReadUnigramsAndBigramsBinary(sWords, sChainBigrams, bufferType, 461 formatOptions, "chain")); 462 results.add(runReadUnigramsAndBigramsBinary(sWords, sStarBigrams, bufferType, 463 formatOptions, "star")); 464 } 465 466 public void testReadUnigramsAndBigramsBinaryWithByteBuffer() { 467 final ArrayList<String> results = new ArrayList<>(); 468 469 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_BUFFER, 470 BinaryDictUtils.STATIC_OPTIONS); 471 472 for (final String result : results) { 473 Log.d(TAG, result); 474 } 475 } 476 477 public void testReadUnigramsAndBigramsBinaryWithByteArray() { 478 final ArrayList<String> results = new ArrayList<>(); 479 480 runReadUnigramsAndBigramsTests(results, BinaryDictUtils.USE_BYTE_ARRAY, 481 BinaryDictUtils.STATIC_OPTIONS); 482 483 for (final String result : results) { 484 Log.d(TAG, result); 485 } 486 } 487 488 // Tests for getTerminalPosition 489 private static String getWordFromBinary(final DictDecoder dictDecoder, final int address) { 490 if (dictDecoder.getPosition() != 0) dictDecoder.setPosition(0); 491 492 DictionaryHeader fileHeader = null; 493 try { 494 fileHeader = dictDecoder.readHeader(); 495 } catch (IOException e) { 496 return null; 497 } catch (UnsupportedFormatException e) { 498 return null; 499 } 500 if (fileHeader == null) return null; 501 return BinaryDictDecoderUtils.getWordAtPosition(dictDecoder, fileHeader.mBodyOffset, 502 address).mWord; 503 } 504 505 private static long checkGetTerminalPosition(final DictDecoder dictDecoder, final String word, 506 final boolean contained) { 507 long diff = -1; 508 int position = -1; 509 try { 510 final long now = System.nanoTime(); 511 position = dictDecoder.getTerminalPosition(word); 512 diff = System.nanoTime() - now; 513 } catch (IOException e) { 514 Log.e(TAG, "IOException while getTerminalPosition", e); 515 } catch (UnsupportedFormatException e) { 516 Log.e(TAG, "UnsupportedFormatException while getTerminalPosition", e); 517 } 518 519 assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); 520 if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); 521 return diff; 522 } 523 524 private void runGetTerminalPosition(final ArrayList<String> words, 525 final SparseArray<List<Integer>> bigrams, final int bufferType, 526 final FormatOptions formatOptions, final String message) { 527 final String dictName = "testGetTerminalPosition"; 528 final String dictVersion = Long.toString(System.currentTimeMillis()); 529 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 530 getContext().getCacheDir()); 531 532 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 533 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 534 addUnigrams(sWords.size(), dict, sWords); 535 addBigrams(dict, words, bigrams); 536 timeWritingDictToFile(file, dict, formatOptions); 537 538 final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(file, 0, file.length(), 539 DictDecoder.USE_BYTEARRAY); 540 try { 541 dictDecoder.openDictBuffer(); 542 } catch (IOException e) { 543 Log.e(TAG, "IOException while opening the buffer", e); 544 } catch (UnsupportedFormatException e) { 545 Log.e(TAG, "IOException while opening the buffer", e); 546 } 547 assertTrue("Can't get the buffer", dictDecoder.isDictBufferOpen()); 548 549 try { 550 // too long word 551 final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; 552 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(longWord)); 553 554 // null 555 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition(null)); 556 557 // empty string 558 assertEquals(FormatSpec.NOT_VALID_WORD, dictDecoder.getTerminalPosition("")); 559 } catch (IOException e) { 560 } catch (UnsupportedFormatException e) { 561 } 562 563 // Test a word that is contained within the dictionary. 564 long sum = 0; 565 for (int i = 0; i < sWords.size(); ++i) { 566 final long time = checkGetTerminalPosition(dictDecoder, sWords.get(i), true); 567 sum += time == -1 ? 0 : time; 568 } 569 Log.d(TAG, "per search : " + (((double)sum) / sWords.size() / 1000000) + " : " + message 570 + " : " + outputOptions(bufferType, formatOptions)); 571 572 // Test a word that isn't contained within the dictionary. 573 final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, 574 mRandom); 575 for (int i = 0; i < 1000; ++i) { 576 final String word = CodePointUtils.generateWord(mRandom, codePointSet); 577 if (sWords.indexOf(word) != -1) continue; 578 checkGetTerminalPosition(dictDecoder, word, false); 579 } 580 } 581 582 private void runGetTerminalPositionTests(final int bufferType, 583 final FormatOptions formatOptions) { 584 runGetTerminalPosition(sWords, sEmptyBigrams, bufferType, formatOptions, "unigram"); 585 } 586 587 public void testGetTerminalPosition() { 588 final ArrayList<String> results = new ArrayList<>(); 589 590 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_ARRAY, 591 BinaryDictUtils.STATIC_OPTIONS); 592 runGetTerminalPositionTests(BinaryDictUtils.USE_BYTE_BUFFER, 593 BinaryDictUtils.STATIC_OPTIONS); 594 595 for (final String result : results) { 596 Log.d(TAG, result); 597 } 598 } 599 600 public void testVer2DictGetWordProperty() { 601 final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; 602 final ArrayList<String> words = sWords; 603 final String dictName = "testGetWordProperty"; 604 final String dictVersion = Long.toString(System.currentTimeMillis()); 605 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 606 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 607 addUnigrams(words.size(), dict, words); 608 addBigrams(dict, words, sEmptyBigrams); 609 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 610 getContext().getCacheDir()); 611 file.delete(); 612 timeWritingDictToFile(file, dict, formatOptions); 613 final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 614 0 /* offset */, file.length(), true /* useFullEditDistance */, 615 Locale.ENGLISH, dictName, false /* isUpdatable */); 616 for (final String word : words) { 617 final WordProperty wordProperty = binaryDictionary.getWordProperty(word, 618 false /* isBeginningOfSentence */); 619 assertEquals(word, wordProperty.mWord); 620 assertEquals(UNIGRAM_FREQ, wordProperty.getProbability()); 621 } 622 } 623 624 public void testVer2DictIteration() { 625 final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS; 626 final ArrayList<String> words = sWords; 627 final SparseArray<List<Integer>> bigrams = sEmptyBigrams; 628 final String dictName = "testGetWordProperty"; 629 final String dictVersion = Long.toString(System.currentTimeMillis()); 630 final FusionDictionary dict = new FusionDictionary(new PtNodeArray(), 631 BinaryDictUtils.makeDictionaryOptions(dictName, dictVersion, formatOptions)); 632 addUnigrams(words.size(), dict, words); 633 addBigrams(dict, words, bigrams); 634 final File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions, 635 getContext().getCacheDir()); 636 timeWritingDictToFile(file, dict, formatOptions); 637 Log.d(TAG, file.getAbsolutePath()); 638 final BinaryDictionary binaryDictionary = new BinaryDictionary(file.getAbsolutePath(), 639 0 /* offset */, file.length(), true /* useFullEditDistance */, 640 Locale.ENGLISH, dictName, false /* isUpdatable */); 641 642 final HashSet<String> wordSet = new HashSet<>(words); 643 final HashSet<Pair<String, String>> bigramSet = new HashSet<>(); 644 645 for (int i = 0; i < words.size(); i++) { 646 final List<Integer> bigramList = bigrams.get(i); 647 if (bigramList != null) { 648 for (final Integer word1Index : bigramList) { 649 final String word1 = words.get(word1Index); 650 bigramSet.add(new Pair<>(words.get(i), word1)); 651 } 652 } 653 } 654 int token = 0; 655 do { 656 final BinaryDictionary.GetNextWordPropertyResult result = 657 binaryDictionary.getNextWordProperty(token); 658 final WordProperty wordProperty = result.mWordProperty; 659 final String word0 = wordProperty.mWord; 660 assertEquals(UNIGRAM_FREQ, wordProperty.mProbabilityInfo.mProbability); 661 wordSet.remove(word0); 662 if (wordProperty.mHasNgrams) { 663 for (final WeightedString bigramTarget : wordProperty.getBigrams()) { 664 final String word1 = bigramTarget.mWord; 665 final Pair<String, String> bigram = new Pair<>(word0, word1); 666 assertTrue(bigramSet.contains(bigram)); 667 bigramSet.remove(bigram); 668 } 669 } 670 token = result.mNextToken; 671 } while (token != 0); 672 assertTrue(wordSet.isEmpty()); 673 assertTrue(bigramSet.isEmpty()); 674 } 675 } 676