1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package com.android.inputmethod.latin; 18 19 import android.content.Context; 20 21 import com.android.inputmethod.keyboard.KeyDetector; 22 import com.android.inputmethod.keyboard.Keyboard; 23 import com.android.inputmethod.keyboard.ProximityInfo; 24 import com.android.inputmethod.latin.UserHistoryForgettingCurveUtils.ForgettingCurveParams; 25 26 import java.util.ArrayList; 27 import java.util.LinkedList; 28 29 /** 30 * Base class for an in-memory dictionary that can grow dynamically and can 31 * be searched for suggestions and valid words. 32 */ 33 public class ExpandableDictionary extends Dictionary { 34 35 // Bigram frequency is a fixed point number with 1 meaning 1.2 and 255 meaning 1.8. 36 protected static final int BIGRAM_MAX_FREQUENCY = 255; 37 38 private Context mContext; 39 private char[] mWordBuilder = new char[BinaryDictionary.MAX_WORD_LENGTH]; 40 private int mDicTypeId; 41 private int mMaxDepth; 42 private int mInputLength; 43 44 private boolean mRequiresReload; 45 46 private boolean mUpdatingDictionary; 47 48 // Use this lock before touching mUpdatingDictionary & mRequiresDownload 49 private Object mUpdatingLock = new Object(); 50 51 private static class Node { Node()52 Node() {} 53 char mCode; 54 int mFrequency; 55 boolean mTerminal; 56 Node mParent; 57 NodeArray mChildren; 58 ArrayList<char[]> mShortcutTargets; 59 boolean mShortcutOnly; 60 LinkedList<NextWord> mNGrams; // Supports ngram 61 } 62 63 private static class NodeArray { 64 Node[] mData; 65 int mLength = 0; 66 private static final int INCREMENT = 2; 67 NodeArray()68 NodeArray() { 69 mData = new Node[INCREMENT]; 70 } 71 add(Node n)72 void add(Node n) { 73 if (mLength + 1 > mData.length) { 74 Node[] tempData = new Node[mLength + INCREMENT]; 75 if (mLength > 0) { 76 System.arraycopy(mData, 0, tempData, 0, mLength); 77 } 78 mData = tempData; 79 } 80 mData[mLength++] = n; 81 } 82 } 83 84 protected interface NextWord { getWordNode()85 public Node getWordNode(); getFrequency()86 public int getFrequency(); getFcParams()87 public ForgettingCurveParams getFcParams(); notifyTypedAgainAndGetFrequency()88 public int notifyTypedAgainAndGetFrequency(); 89 } 90 91 private static class NextStaticWord implements NextWord { 92 public final Node mWord; 93 private final int mFrequency; NextStaticWord(Node word, int frequency)94 public NextStaticWord(Node word, int frequency) { 95 mWord = word; 96 mFrequency = frequency; 97 } 98 99 @Override getWordNode()100 public Node getWordNode() { 101 return mWord; 102 } 103 104 @Override getFrequency()105 public int getFrequency() { 106 return mFrequency; 107 } 108 109 @Override getFcParams()110 public ForgettingCurveParams getFcParams() { 111 return null; 112 } 113 114 @Override notifyTypedAgainAndGetFrequency()115 public int notifyTypedAgainAndGetFrequency() { 116 return mFrequency; 117 } 118 } 119 120 private static class NextHistoryWord implements NextWord { 121 public final Node mWord; 122 public final ForgettingCurveParams mFcp; 123 NextHistoryWord(Node word, ForgettingCurveParams fcp)124 public NextHistoryWord(Node word, ForgettingCurveParams fcp) { 125 mWord = word; 126 mFcp = fcp; 127 } 128 129 @Override getWordNode()130 public Node getWordNode() { 131 return mWord; 132 } 133 134 @Override getFrequency()135 public int getFrequency() { 136 return mFcp.getFrequency(); 137 } 138 139 @Override getFcParams()140 public ForgettingCurveParams getFcParams() { 141 return mFcp; 142 } 143 144 @Override notifyTypedAgainAndGetFrequency()145 public int notifyTypedAgainAndGetFrequency() { 146 return mFcp.notifyTypedAgainAndGetFrequency(); 147 } 148 } 149 150 private NodeArray mRoots; 151 152 private int[][] mCodes; 153 ExpandableDictionary(Context context, int dicTypeId)154 public ExpandableDictionary(Context context, int dicTypeId) { 155 mContext = context; 156 clearDictionary(); 157 mCodes = new int[BinaryDictionary.MAX_WORD_LENGTH][]; 158 mDicTypeId = dicTypeId; 159 } 160 loadDictionary()161 public void loadDictionary() { 162 synchronized (mUpdatingLock) { 163 startDictionaryLoadingTaskLocked(); 164 } 165 } 166 startDictionaryLoadingTaskLocked()167 public void startDictionaryLoadingTaskLocked() { 168 if (!mUpdatingDictionary) { 169 mUpdatingDictionary = true; 170 mRequiresReload = false; 171 new LoadDictionaryTask().start(); 172 } 173 } 174 setRequiresReload(boolean reload)175 public void setRequiresReload(boolean reload) { 176 synchronized (mUpdatingLock) { 177 mRequiresReload = reload; 178 } 179 } 180 getRequiresReload()181 public boolean getRequiresReload() { 182 return mRequiresReload; 183 } 184 185 /** Override to load your dictionary here, on a background thread. */ loadDictionaryAsync()186 public void loadDictionaryAsync() { 187 // empty base implementation 188 } 189 getContext()190 public Context getContext() { 191 return mContext; 192 } 193 getMaxWordLength()194 public int getMaxWordLength() { 195 return BinaryDictionary.MAX_WORD_LENGTH; 196 } 197 addWord(final String word, final String shortcutTarget, final int frequency)198 public void addWord(final String word, final String shortcutTarget, final int frequency) { 199 if (word.length() >= BinaryDictionary.MAX_WORD_LENGTH) { 200 return; 201 } 202 addWordRec(mRoots, word, 0, shortcutTarget, frequency, null); 203 } 204 addWordRec(NodeArray children, final String word, final int depth, final String shortcutTarget, final int frequency, Node parentNode)205 private void addWordRec(NodeArray children, final String word, final int depth, 206 final String shortcutTarget, final int frequency, Node parentNode) { 207 final int wordLength = word.length(); 208 if (wordLength <= depth) return; 209 final char c = word.charAt(depth); 210 // Does children have the current character? 211 final int childrenLength = children.mLength; 212 Node childNode = null; 213 for (int i = 0; i < childrenLength; i++) { 214 final Node node = children.mData[i]; 215 if (node.mCode == c) { 216 childNode = node; 217 break; 218 } 219 } 220 final boolean isShortcutOnly = (null != shortcutTarget); 221 if (childNode == null) { 222 childNode = new Node(); 223 childNode.mCode = c; 224 childNode.mParent = parentNode; 225 childNode.mShortcutOnly = isShortcutOnly; 226 children.add(childNode); 227 } 228 if (wordLength == depth + 1 && shortcutTarget != null) { 229 // Terminate this word 230 childNode.mTerminal = true; 231 if (isShortcutOnly) { 232 if (null == childNode.mShortcutTargets) { 233 childNode.mShortcutTargets = new ArrayList<char[]>(); 234 } 235 childNode.mShortcutTargets.add(shortcutTarget.toCharArray()); 236 } else { 237 childNode.mShortcutOnly = false; 238 } 239 childNode.mFrequency = Math.max(frequency, childNode.mFrequency); 240 if (childNode.mFrequency > 255) childNode.mFrequency = 255; 241 return; 242 } 243 if (childNode.mChildren == null) { 244 childNode.mChildren = new NodeArray(); 245 } 246 addWordRec(childNode.mChildren, word, depth + 1, shortcutTarget, frequency, childNode); 247 } 248 249 @Override getWords(final WordComposer codes, final CharSequence prevWordForBigrams, final WordCallback callback, final ProximityInfo proximityInfo)250 public void getWords(final WordComposer codes, final CharSequence prevWordForBigrams, 251 final WordCallback callback, final ProximityInfo proximityInfo) { 252 synchronized (mUpdatingLock) { 253 // If we need to update, start off a background task 254 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 255 // Currently updating contacts, don't return any results. 256 if (mUpdatingDictionary) return; 257 } 258 if (codes.size() >= BinaryDictionary.MAX_WORD_LENGTH) { 259 return; 260 } 261 getWordsInner(codes, prevWordForBigrams, callback, proximityInfo); 262 } 263 getWordsInner(final WordComposer codes, final CharSequence prevWordForBigrams, final WordCallback callback, final ProximityInfo proximityInfo)264 protected final void getWordsInner(final WordComposer codes, 265 final CharSequence prevWordForBigrams, final WordCallback callback, 266 final ProximityInfo proximityInfo) { 267 mInputLength = codes.size(); 268 if (mCodes.length < mInputLength) mCodes = new int[mInputLength][]; 269 final int[] xCoordinates = codes.getXCoordinates(); 270 final int[] yCoordinates = codes.getYCoordinates(); 271 // Cache the codes so that we don't have to lookup an array list 272 for (int i = 0; i < mInputLength; i++) { 273 // TODO: Calculate proximity info here. 274 if (mCodes[i] == null || mCodes[i].length < 1) { 275 mCodes[i] = new int[ProximityInfo.MAX_PROXIMITY_CHARS_SIZE]; 276 } 277 final int x = xCoordinates != null && i < xCoordinates.length ? 278 xCoordinates[i] : WordComposer.NOT_A_COORDINATE; 279 final int y = xCoordinates != null && i < yCoordinates.length ? 280 yCoordinates[i] : WordComposer.NOT_A_COORDINATE; 281 proximityInfo.fillArrayWithNearestKeyCodes(x, y, codes.getCodeAt(i), mCodes[i]); 282 } 283 mMaxDepth = mInputLength * 3; 284 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, -1, callback); 285 for (int i = 0; i < mInputLength; i++) { 286 getWordsRec(mRoots, codes, mWordBuilder, 0, false, 1, 0, i, callback); 287 } 288 } 289 290 @Override 291 public synchronized boolean isValidWord(CharSequence word) { 292 synchronized (mUpdatingLock) { 293 // If we need to update, start off a background task 294 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 295 if (mUpdatingDictionary) return false; 296 } 297 final Node node = searchNode(mRoots, word, 0, word.length()); 298 // If node is null, we didn't find the word, so it's not valid. 299 // If node.mShortcutOnly is true, then it exists as a shortcut but not as a word, 300 // so that means it's not a valid word. 301 // If node.mShortcutOnly is false, then it exists as a word (it may also exist as 302 // a shortcut, but this does not matter), so it's a valid word. 303 return (node == null) ? false : !node.mShortcutOnly; 304 } 305 306 protected boolean removeBigram(String word1, String word2) { 307 // Refer to addOrSetBigram() about word1.toLowerCase() 308 final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); 309 final Node secondWord = searchWord(mRoots, word2, 0, null); 310 LinkedList<NextWord> bigrams = firstWord.mNGrams; 311 NextWord bigramNode = null; 312 if (bigrams == null || bigrams.size() == 0) { 313 return false; 314 } else { 315 for (NextWord nw : bigrams) { 316 if (nw.getWordNode() == secondWord) { 317 bigramNode = nw; 318 break; 319 } 320 } 321 } 322 if (bigramNode == null) { 323 return false; 324 } 325 return bigrams.remove(bigramNode); 326 } 327 328 /** 329 * Returns the word's frequency or -1 if not found 330 */ 331 protected int getWordFrequency(CharSequence word) { 332 // Case-sensitive search 333 final Node node = searchNode(mRoots, word, 0, word.length()); 334 return (node == null) ? -1 : node.mFrequency; 335 } 336 337 protected NextWord getBigramWord(String word1, String word2) { 338 // Refer to addOrSetBigram() about word1.toLowerCase() 339 final Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); 340 final Node secondWord = searchWord(mRoots, word2, 0, null); 341 LinkedList<NextWord> bigrams = firstWord.mNGrams; 342 if (bigrams == null || bigrams.size() == 0) { 343 return null; 344 } else { 345 for (NextWord nw : bigrams) { 346 if (nw.getWordNode() == secondWord) { 347 return nw; 348 } 349 } 350 } 351 return null; 352 } 353 354 private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) { 355 // The computation itself makes sense for >= 2, but the == 2 case returns 0 356 // anyway so we may as well test against 3 instead and return the constant 357 if (inputLength >= 3) { 358 return (freq * snr * (inputLength - 2)) / (inputLength - 1); 359 } else { 360 return 0; 361 } 362 } 363 364 /** 365 * Helper method to add a word and its shortcuts. 366 * 367 * @param node the terminal node 368 * @param word the word to insert, as an array of code points 369 * @param depth the depth of the node in the tree 370 * @param finalFreq the frequency for this word 371 * @return whether there is still space for more words. 372 * @see Dictionary.WordCallback#addWord(char[], int, int, int, int, int) 373 */ 374 private boolean addWordAndShortcutsFromNode(final Node node, final char[] word, final int depth, 375 final int finalFreq, final WordCallback callback) { 376 if (finalFreq > 0 && !node.mShortcutOnly) { 377 if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, Dictionary.UNIGRAM)) { 378 return false; 379 } 380 } 381 if (null != node.mShortcutTargets) { 382 final int length = node.mShortcutTargets.size(); 383 for (int shortcutIndex = 0; shortcutIndex < length; ++shortcutIndex) { 384 final char[] shortcut = node.mShortcutTargets.get(shortcutIndex); 385 if (!callback.addWord(shortcut, 0, shortcut.length, finalFreq, mDicTypeId, 386 Dictionary.UNIGRAM)) { 387 return false; 388 } 389 } 390 } 391 return true; 392 } 393 394 /** 395 * Recursively traverse the tree for words that match the input. Input consists of 396 * a list of arrays. Each item in the list is one input character position. An input 397 * character is actually an array of multiple possible candidates. This function is not 398 * optimized for speed, assuming that the user dictionary will only be a few hundred words in 399 * size. 400 * @param roots node whose children have to be search for matches 401 * @param codes the input character codes 402 * @param word the word being composed as a possible match 403 * @param depth the depth of traversal - the length of the word being composed thus far 404 * @param completion whether the traversal is now in completion mode - meaning that we've 405 * exhausted the input and we're looking for all possible suffixes. 406 * @param snr current weight of the word being formed 407 * @param inputIndex position in the input characters. This can be off from the depth in 408 * case we skip over some punctuations such as apostrophe in the traversal. That is, if you type 409 * "wouldve", it could be matching "would've", so the depth will be one more than the 410 * inputIndex 411 * @param callback the callback class for adding a word 412 */ 413 // TODO: Share this routine with the native code for BinaryDictionary 414 protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, 415 final int depth, final boolean completion, int snr, int inputIndex, int skipPos, 416 WordCallback callback) { 417 final int count = roots.mLength; 418 final int codeSize = mInputLength; 419 // Optimization: Prune out words that are too long compared to how much was typed. 420 if (depth > mMaxDepth) { 421 return; 422 } 423 final int[] currentChars; 424 if (codeSize <= inputIndex) { 425 currentChars = null; 426 } else { 427 currentChars = mCodes[inputIndex]; 428 } 429 430 for (int i = 0; i < count; i++) { 431 final Node node = roots.mData[i]; 432 final char c = node.mCode; 433 final char lowerC = toLowerCase(c); 434 final boolean terminal = node.mTerminal; 435 final NodeArray children = node.mChildren; 436 final int freq = node.mFrequency; 437 if (completion || currentChars == null) { 438 word[depth] = c; 439 if (terminal) { 440 final int finalFreq; 441 if (skipPos < 0) { 442 finalFreq = freq * snr; 443 } else { 444 finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength); 445 } 446 if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, callback)) { 447 // No space left in the queue, bail out 448 return; 449 } 450 } 451 if (children != null) { 452 getWordsRec(children, codes, word, depth + 1, true, snr, inputIndex, 453 skipPos, callback); 454 } 455 } else if ((c == Keyboard.CODE_SINGLE_QUOTE 456 && currentChars[0] != Keyboard.CODE_SINGLE_QUOTE) || depth == skipPos) { 457 // Skip the ' and continue deeper 458 word[depth] = c; 459 if (children != null) { 460 getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex, 461 skipPos, callback); 462 } 463 } else { 464 // Don't use alternatives if we're looking for missing characters 465 final int alternativesSize = skipPos >= 0 ? 1 : currentChars.length; 466 for (int j = 0; j < alternativesSize; j++) { 467 final int addedAttenuation = (j > 0 ? 1 : 2); 468 final int currentChar = currentChars[j]; 469 if (currentChar == KeyDetector.NOT_A_CODE) { 470 break; 471 } 472 if (currentChar == lowerC || currentChar == c) { 473 word[depth] = c; 474 475 if (codeSize == inputIndex + 1) { 476 if (terminal) { 477 final int finalFreq; 478 if (skipPos < 0) { 479 finalFreq = freq * snr * addedAttenuation 480 * FULL_WORD_SCORE_MULTIPLIER; 481 } else { 482 finalFreq = computeSkippedWordFinalFreq(freq, 483 snr * addedAttenuation, mInputLength); 484 } 485 if (!addWordAndShortcutsFromNode(node, word, depth, finalFreq, 486 callback)) { 487 // No space left in the queue, bail out 488 return; 489 } 490 } 491 if (children != null) { 492 getWordsRec(children, codes, word, depth + 1, 493 true, snr * addedAttenuation, inputIndex + 1, 494 skipPos, callback); 495 } 496 } else if (children != null) { 497 getWordsRec(children, codes, word, depth + 1, 498 false, snr * addedAttenuation, inputIndex + 1, 499 skipPos, callback); 500 } 501 } 502 } 503 } 504 } 505 } 506 507 public int setBigramAndGetFrequency(String word1, String word2, int frequency) { 508 return setBigramAndGetFrequency(word1, word2, frequency, null /* unused */); 509 } 510 511 public int setBigramAndGetFrequency(String word1, String word2, ForgettingCurveParams fcp) { 512 return setBigramAndGetFrequency(word1, word2, 0 /* unused */, fcp); 513 } 514 515 /** 516 * Adds bigrams to the in-memory trie structure that is being used to retrieve any word 517 * @param frequency frequency for this bigram 518 * @param addFrequency if true, it adds to current frequency, else it overwrites the old value 519 * @return returns the final bigram frequency 520 */ 521 private int setBigramAndGetFrequency( 522 String word1, String word2, int frequency, ForgettingCurveParams fcp) { 523 // We don't want results to be different according to case of the looked up left hand side 524 // word. We do want however to return the correct case for the right hand side. 525 // So we want to squash the case of the left hand side, and preserve that of the right 526 // hand side word. 527 Node firstWord = searchWord(mRoots, word1.toLowerCase(), 0, null); 528 Node secondWord = searchWord(mRoots, word2, 0, null); 529 LinkedList<NextWord> bigrams = firstWord.mNGrams; 530 if (bigrams == null || bigrams.size() == 0) { 531 firstWord.mNGrams = new LinkedList<NextWord>(); 532 bigrams = firstWord.mNGrams; 533 } else { 534 for (NextWord nw : bigrams) { 535 if (nw.getWordNode() == secondWord) { 536 return nw.notifyTypedAgainAndGetFrequency(); 537 } 538 } 539 } 540 if (fcp != null) { 541 // history 542 firstWord.mNGrams.add(new NextHistoryWord(secondWord, fcp)); 543 } else { 544 firstWord.mNGrams.add(new NextStaticWord(secondWord, frequency)); 545 } 546 return frequency; 547 } 548 549 /** 550 * Searches for the word and add the word if it does not exist. 551 * @return Returns the terminal node of the word we are searching for. 552 */ 553 private Node searchWord(NodeArray children, String word, int depth, Node parentNode) { 554 final int wordLength = word.length(); 555 final char c = word.charAt(depth); 556 // Does children have the current character? 557 final int childrenLength = children.mLength; 558 Node childNode = null; 559 for (int i = 0; i < childrenLength; i++) { 560 final Node node = children.mData[i]; 561 if (node.mCode == c) { 562 childNode = node; 563 break; 564 } 565 } 566 if (childNode == null) { 567 childNode = new Node(); 568 childNode.mCode = c; 569 childNode.mParent = parentNode; 570 children.add(childNode); 571 } 572 if (wordLength == depth + 1) { 573 // Terminate this word 574 childNode.mTerminal = true; 575 return childNode; 576 } 577 if (childNode.mChildren == null) { 578 childNode.mChildren = new NodeArray(); 579 } 580 return searchWord(childNode.mChildren, word, depth + 1, childNode); 581 } 582 583 // @VisibleForTesting 584 boolean reloadDictionaryIfRequired() { 585 synchronized (mUpdatingLock) { 586 // If we need to update, start off a background task 587 if (mRequiresReload) startDictionaryLoadingTaskLocked(); 588 // Currently updating contacts, don't return any results. 589 return mUpdatingDictionary; 590 } 591 } 592 593 private void runBigramReverseLookUp(final CharSequence previousWord, 594 final WordCallback callback) { 595 // Search for the lowercase version of the word only, because that's where bigrams 596 // store their sons. 597 Node prevWord = searchNode(mRoots, previousWord.toString().toLowerCase(), 0, 598 previousWord.length()); 599 if (prevWord != null && prevWord.mNGrams != null) { 600 reverseLookUp(prevWord.mNGrams, callback); 601 } 602 } 603 604 @Override 605 public void getBigrams(final WordComposer codes, final CharSequence previousWord, 606 final WordCallback callback) { 607 if (!reloadDictionaryIfRequired()) { 608 runBigramReverseLookUp(previousWord, callback); 609 } 610 } 611 612 /** 613 * Used for testing purposes and in the spell checker 614 * This function will wait for loading from database to be done 615 */ 616 void waitForDictionaryLoading() { 617 while (mUpdatingDictionary) { 618 try { 619 Thread.sleep(100); 620 } catch (InterruptedException e) { 621 // 622 } 623 } 624 } 625 626 protected final void blockingReloadDictionaryIfRequired() { 627 reloadDictionaryIfRequired(); 628 waitForDictionaryLoading(); 629 } 630 631 // Local to reverseLookUp, but do not allocate each time. 632 private final char[] mLookedUpString = new char[BinaryDictionary.MAX_WORD_LENGTH]; 633 634 /** 635 * reverseLookUp retrieves the full word given a list of terminal nodes and adds those words 636 * through callback. 637 * @param terminalNodes list of terminal nodes we want to add 638 */ 639 private void reverseLookUp(LinkedList<NextWord> terminalNodes, 640 final WordCallback callback) { 641 Node node; 642 int freq; 643 for (NextWord nextWord : terminalNodes) { 644 node = nextWord.getWordNode(); 645 freq = nextWord.getFrequency(); 646 int index = BinaryDictionary.MAX_WORD_LENGTH; 647 do { 648 --index; 649 mLookedUpString[index] = node.mCode; 650 node = node.mParent; 651 } while (node != null); 652 653 if (freq >= 0) { 654 callback.addWord(mLookedUpString, index, BinaryDictionary.MAX_WORD_LENGTH - index, 655 freq, mDicTypeId, Dictionary.BIGRAM); 656 } 657 } 658 } 659 660 /** 661 * Recursively search for the terminal node of the word. 662 * 663 * One iteration takes the full word to search for and the current index of the recursion. 664 * 665 * @param children the node of the trie to search under. 666 * @param word the word to search for. Only read [offset..length] so there may be trailing chars 667 * @param offset the index in {@code word} this recursion should operate on. 668 * @param length the length of the input word. 669 * @return Returns the terminal node of the word if the word exists 670 */ 671 private Node searchNode(final NodeArray children, final CharSequence word, final int offset, 672 final int length) { 673 final int count = children.mLength; 674 final char currentChar = word.charAt(offset); 675 for (int j = 0; j < count; j++) { 676 final Node node = children.mData[j]; 677 if (node.mCode == currentChar) { 678 if (offset == length - 1) { 679 if (node.mTerminal) { 680 return node; 681 } 682 } else { 683 if (node.mChildren != null) { 684 Node returnNode = searchNode(node.mChildren, word, offset + 1, length); 685 if (returnNode != null) return returnNode; 686 } 687 } 688 } 689 } 690 return null; 691 } 692 693 protected void clearDictionary() { 694 mRoots = new NodeArray(); 695 } 696 697 private class LoadDictionaryTask extends Thread { 698 LoadDictionaryTask() {} 699 @Override 700 public void run() { 701 loadDictionaryAsync(); 702 synchronized (mUpdatingLock) { 703 mUpdatingDictionary = false; 704 } 705 } 706 } 707 708 private static char toLowerCase(char c) { 709 char baseChar = c; 710 if (c < BASE_CHARS.length) { 711 baseChar = BASE_CHARS[c]; 712 } 713 if (baseChar >= 'A' && baseChar <= 'Z') { 714 return (char)(baseChar | 32); 715 } else if (baseChar > 127) { 716 return Character.toLowerCase(baseChar); 717 } 718 return baseChar; 719 } 720 721 /** 722 * Table mapping most combined Latin, Greek, and Cyrillic characters 723 * to their base characters. If c is in range, BASE_CHARS[c] == c 724 * if c is not a combined character, or the base character if it 725 * is combined. 726 */ 727 private static final char BASE_CHARS[] = { 728 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 729 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 730 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 731 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 732 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 733 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, 734 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 735 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 736 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 737 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 738 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 739 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, 740 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 741 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 742 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 743 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, 744 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 745 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 746 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 747 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, 748 0x0020, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 749 0x0020, 0x00a9, 0x0061, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0020, 750 0x00b0, 0x00b1, 0x0032, 0x0033, 0x0020, 0x03bc, 0x00b6, 0x00b7, 751 0x0020, 0x0031, 0x006f, 0x00bb, 0x0031, 0x0031, 0x0033, 0x00bf, 752 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x0041, 0x00c6, 0x0043, 753 0x0045, 0x0045, 0x0045, 0x0045, 0x0049, 0x0049, 0x0049, 0x0049, 754 0x00d0, 0x004e, 0x004f, 0x004f, 0x004f, 0x004f, 0x004f, 0x00d7, 755 0x004f, 0x0055, 0x0055, 0x0055, 0x0055, 0x0059, 0x00de, 0x0073, // Manually changed d8 to 4f 756 // Manually changed df to 73 757 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x0061, 0x00e6, 0x0063, 758 0x0065, 0x0065, 0x0065, 0x0065, 0x0069, 0x0069, 0x0069, 0x0069, 759 0x00f0, 0x006e, 0x006f, 0x006f, 0x006f, 0x006f, 0x006f, 0x00f7, 760 0x006f, 0x0075, 0x0075, 0x0075, 0x0075, 0x0079, 0x00fe, 0x0079, // Manually changed f8 to 6f 761 0x0041, 0x0061, 0x0041, 0x0061, 0x0041, 0x0061, 0x0043, 0x0063, 762 0x0043, 0x0063, 0x0043, 0x0063, 0x0043, 0x0063, 0x0044, 0x0064, 763 0x0110, 0x0111, 0x0045, 0x0065, 0x0045, 0x0065, 0x0045, 0x0065, 764 0x0045, 0x0065, 0x0045, 0x0065, 0x0047, 0x0067, 0x0047, 0x0067, 765 0x0047, 0x0067, 0x0047, 0x0067, 0x0048, 0x0068, 0x0126, 0x0127, 766 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 0x0049, 0x0069, 767 0x0049, 0x0131, 0x0049, 0x0069, 0x004a, 0x006a, 0x004b, 0x006b, 768 0x0138, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 0x006c, 0x004c, 769 0x006c, 0x0141, 0x0142, 0x004e, 0x006e, 0x004e, 0x006e, 0x004e, 770 0x006e, 0x02bc, 0x014a, 0x014b, 0x004f, 0x006f, 0x004f, 0x006f, 771 0x004f, 0x006f, 0x0152, 0x0153, 0x0052, 0x0072, 0x0052, 0x0072, 772 0x0052, 0x0072, 0x0053, 0x0073, 0x0053, 0x0073, 0x0053, 0x0073, 773 0x0053, 0x0073, 0x0054, 0x0074, 0x0054, 0x0074, 0x0166, 0x0167, 774 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 0x0055, 0x0075, 775 0x0055, 0x0075, 0x0055, 0x0075, 0x0057, 0x0077, 0x0059, 0x0079, 776 0x0059, 0x005a, 0x007a, 0x005a, 0x007a, 0x005a, 0x007a, 0x0073, 777 0x0180, 0x0181, 0x0182, 0x0183, 0x0184, 0x0185, 0x0186, 0x0187, 778 0x0188, 0x0189, 0x018a, 0x018b, 0x018c, 0x018d, 0x018e, 0x018f, 779 0x0190, 0x0191, 0x0192, 0x0193, 0x0194, 0x0195, 0x0196, 0x0197, 780 0x0198, 0x0199, 0x019a, 0x019b, 0x019c, 0x019d, 0x019e, 0x019f, 781 0x004f, 0x006f, 0x01a2, 0x01a3, 0x01a4, 0x01a5, 0x01a6, 0x01a7, 782 0x01a8, 0x01a9, 0x01aa, 0x01ab, 0x01ac, 0x01ad, 0x01ae, 0x0055, 783 0x0075, 0x01b1, 0x01b2, 0x01b3, 0x01b4, 0x01b5, 0x01b6, 0x01b7, 784 0x01b8, 0x01b9, 0x01ba, 0x01bb, 0x01bc, 0x01bd, 0x01be, 0x01bf, 785 0x01c0, 0x01c1, 0x01c2, 0x01c3, 0x0044, 0x0044, 0x0064, 0x004c, 786 0x004c, 0x006c, 0x004e, 0x004e, 0x006e, 0x0041, 0x0061, 0x0049, 787 0x0069, 0x004f, 0x006f, 0x0055, 0x0075, 0x00dc, 0x00fc, 0x00dc, 788 0x00fc, 0x00dc, 0x00fc, 0x00dc, 0x00fc, 0x01dd, 0x00c4, 0x00e4, 789 0x0226, 0x0227, 0x00c6, 0x00e6, 0x01e4, 0x01e5, 0x0047, 0x0067, 790 0x004b, 0x006b, 0x004f, 0x006f, 0x01ea, 0x01eb, 0x01b7, 0x0292, 791 0x006a, 0x0044, 0x0044, 0x0064, 0x0047, 0x0067, 0x01f6, 0x01f7, 792 0x004e, 0x006e, 0x00c5, 0x00e5, 0x00c6, 0x00e6, 0x00d8, 0x00f8, 793 0x0041, 0x0061, 0x0041, 0x0061, 0x0045, 0x0065, 0x0045, 0x0065, 794 0x0049, 0x0069, 0x0049, 0x0069, 0x004f, 0x006f, 0x004f, 0x006f, 795 0x0052, 0x0072, 0x0052, 0x0072, 0x0055, 0x0075, 0x0055, 0x0075, 796 0x0053, 0x0073, 0x0054, 0x0074, 0x021c, 0x021d, 0x0048, 0x0068, 797 0x0220, 0x0221, 0x0222, 0x0223, 0x0224, 0x0225, 0x0041, 0x0061, 798 0x0045, 0x0065, 0x00d6, 0x00f6, 0x00d5, 0x00f5, 0x004f, 0x006f, 799 0x022e, 0x022f, 0x0059, 0x0079, 0x0234, 0x0235, 0x0236, 0x0237, 800 0x0238, 0x0239, 0x023a, 0x023b, 0x023c, 0x023d, 0x023e, 0x023f, 801 0x0240, 0x0241, 0x0242, 0x0243, 0x0244, 0x0245, 0x0246, 0x0247, 802 0x0248, 0x0249, 0x024a, 0x024b, 0x024c, 0x024d, 0x024e, 0x024f, 803 0x0250, 0x0251, 0x0252, 0x0253, 0x0254, 0x0255, 0x0256, 0x0257, 804 0x0258, 0x0259, 0x025a, 0x025b, 0x025c, 0x025d, 0x025e, 0x025f, 805 0x0260, 0x0261, 0x0262, 0x0263, 0x0264, 0x0265, 0x0266, 0x0267, 806 0x0268, 0x0269, 0x026a, 0x026b, 0x026c, 0x026d, 0x026e, 0x026f, 807 0x0270, 0x0271, 0x0272, 0x0273, 0x0274, 0x0275, 0x0276, 0x0277, 808 0x0278, 0x0279, 0x027a, 0x027b, 0x027c, 0x027d, 0x027e, 0x027f, 809 0x0280, 0x0281, 0x0282, 0x0283, 0x0284, 0x0285, 0x0286, 0x0287, 810 0x0288, 0x0289, 0x028a, 0x028b, 0x028c, 0x028d, 0x028e, 0x028f, 811 0x0290, 0x0291, 0x0292, 0x0293, 0x0294, 0x0295, 0x0296, 0x0297, 812 0x0298, 0x0299, 0x029a, 0x029b, 0x029c, 0x029d, 0x029e, 0x029f, 813 0x02a0, 0x02a1, 0x02a2, 0x02a3, 0x02a4, 0x02a5, 0x02a6, 0x02a7, 814 0x02a8, 0x02a9, 0x02aa, 0x02ab, 0x02ac, 0x02ad, 0x02ae, 0x02af, 815 0x0068, 0x0266, 0x006a, 0x0072, 0x0279, 0x027b, 0x0281, 0x0077, 816 0x0079, 0x02b9, 0x02ba, 0x02bb, 0x02bc, 0x02bd, 0x02be, 0x02bf, 817 0x02c0, 0x02c1, 0x02c2, 0x02c3, 0x02c4, 0x02c5, 0x02c6, 0x02c7, 818 0x02c8, 0x02c9, 0x02ca, 0x02cb, 0x02cc, 0x02cd, 0x02ce, 0x02cf, 819 0x02d0, 0x02d1, 0x02d2, 0x02d3, 0x02d4, 0x02d5, 0x02d6, 0x02d7, 820 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x0020, 0x02de, 0x02df, 821 0x0263, 0x006c, 0x0073, 0x0078, 0x0295, 0x02e5, 0x02e6, 0x02e7, 822 0x02e8, 0x02e9, 0x02ea, 0x02eb, 0x02ec, 0x02ed, 0x02ee, 0x02ef, 823 0x02f0, 0x02f1, 0x02f2, 0x02f3, 0x02f4, 0x02f5, 0x02f6, 0x02f7, 824 0x02f8, 0x02f9, 0x02fa, 0x02fb, 0x02fc, 0x02fd, 0x02fe, 0x02ff, 825 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 826 0x0308, 0x0309, 0x030a, 0x030b, 0x030c, 0x030d, 0x030e, 0x030f, 827 0x0310, 0x0311, 0x0312, 0x0313, 0x0314, 0x0315, 0x0316, 0x0317, 828 0x0318, 0x0319, 0x031a, 0x031b, 0x031c, 0x031d, 0x031e, 0x031f, 829 0x0320, 0x0321, 0x0322, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 830 0x0328, 0x0329, 0x032a, 0x032b, 0x032c, 0x032d, 0x032e, 0x032f, 831 0x0330, 0x0331, 0x0332, 0x0333, 0x0334, 0x0335, 0x0336, 0x0337, 832 0x0338, 0x0339, 0x033a, 0x033b, 0x033c, 0x033d, 0x033e, 0x033f, 833 0x0300, 0x0301, 0x0342, 0x0313, 0x0308, 0x0345, 0x0346, 0x0347, 834 0x0348, 0x0349, 0x034a, 0x034b, 0x034c, 0x034d, 0x034e, 0x034f, 835 0x0350, 0x0351, 0x0352, 0x0353, 0x0354, 0x0355, 0x0356, 0x0357, 836 0x0358, 0x0359, 0x035a, 0x035b, 0x035c, 0x035d, 0x035e, 0x035f, 837 0x0360, 0x0361, 0x0362, 0x0363, 0x0364, 0x0365, 0x0366, 0x0367, 838 0x0368, 0x0369, 0x036a, 0x036b, 0x036c, 0x036d, 0x036e, 0x036f, 839 0x0370, 0x0371, 0x0372, 0x0373, 0x02b9, 0x0375, 0x0376, 0x0377, 840 0x0378, 0x0379, 0x0020, 0x037b, 0x037c, 0x037d, 0x003b, 0x037f, 841 0x0380, 0x0381, 0x0382, 0x0383, 0x0020, 0x00a8, 0x0391, 0x00b7, 842 0x0395, 0x0397, 0x0399, 0x038b, 0x039f, 0x038d, 0x03a5, 0x03a9, 843 0x03ca, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 844 0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 845 0x03a0, 0x03a1, 0x03a2, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7, 846 0x03a8, 0x03a9, 0x0399, 0x03a5, 0x03b1, 0x03b5, 0x03b7, 0x03b9, 847 0x03cb, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, 848 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 849 0x03c0, 0x03c1, 0x03c2, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 850 0x03c8, 0x03c9, 0x03b9, 0x03c5, 0x03bf, 0x03c5, 0x03c9, 0x03cf, 851 0x03b2, 0x03b8, 0x03a5, 0x03d2, 0x03d2, 0x03c6, 0x03c0, 0x03d7, 852 0x03d8, 0x03d9, 0x03da, 0x03db, 0x03dc, 0x03dd, 0x03de, 0x03df, 853 0x03e0, 0x03e1, 0x03e2, 0x03e3, 0x03e4, 0x03e5, 0x03e6, 0x03e7, 854 0x03e8, 0x03e9, 0x03ea, 0x03eb, 0x03ec, 0x03ed, 0x03ee, 0x03ef, 855 0x03ba, 0x03c1, 0x03c2, 0x03f3, 0x0398, 0x03b5, 0x03f6, 0x03f7, 856 0x03f8, 0x03a3, 0x03fa, 0x03fb, 0x03fc, 0x03fd, 0x03fe, 0x03ff, 857 0x0415, 0x0415, 0x0402, 0x0413, 0x0404, 0x0405, 0x0406, 0x0406, 858 0x0408, 0x0409, 0x040a, 0x040b, 0x041a, 0x0418, 0x0423, 0x040f, 859 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 860 0x0418, 0x0418, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, 861 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 862 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 863 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 864 0x0438, 0x0438, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 865 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 866 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, 867 0x0435, 0x0435, 0x0452, 0x0433, 0x0454, 0x0455, 0x0456, 0x0456, 868 0x0458, 0x0459, 0x045a, 0x045b, 0x043a, 0x0438, 0x0443, 0x045f, 869 0x0460, 0x0461, 0x0462, 0x0463, 0x0464, 0x0465, 0x0466, 0x0467, 870 0x0468, 0x0469, 0x046a, 0x046b, 0x046c, 0x046d, 0x046e, 0x046f, 871 0x0470, 0x0471, 0x0472, 0x0473, 0x0474, 0x0475, 0x0474, 0x0475, 872 0x0478, 0x0479, 0x047a, 0x047b, 0x047c, 0x047d, 0x047e, 0x047f, 873 0x0480, 0x0481, 0x0482, 0x0483, 0x0484, 0x0485, 0x0486, 0x0487, 874 0x0488, 0x0489, 0x048a, 0x048b, 0x048c, 0x048d, 0x048e, 0x048f, 875 0x0490, 0x0491, 0x0492, 0x0493, 0x0494, 0x0495, 0x0496, 0x0497, 876 0x0498, 0x0499, 0x049a, 0x049b, 0x049c, 0x049d, 0x049e, 0x049f, 877 0x04a0, 0x04a1, 0x04a2, 0x04a3, 0x04a4, 0x04a5, 0x04a6, 0x04a7, 878 0x04a8, 0x04a9, 0x04aa, 0x04ab, 0x04ac, 0x04ad, 0x04ae, 0x04af, 879 0x04b0, 0x04b1, 0x04b2, 0x04b3, 0x04b4, 0x04b5, 0x04b6, 0x04b7, 880 0x04b8, 0x04b9, 0x04ba, 0x04bb, 0x04bc, 0x04bd, 0x04be, 0x04bf, 881 0x04c0, 0x0416, 0x0436, 0x04c3, 0x04c4, 0x04c5, 0x04c6, 0x04c7, 882 0x04c8, 0x04c9, 0x04ca, 0x04cb, 0x04cc, 0x04cd, 0x04ce, 0x04cf, 883 0x0410, 0x0430, 0x0410, 0x0430, 0x04d4, 0x04d5, 0x0415, 0x0435, 884 0x04d8, 0x04d9, 0x04d8, 0x04d9, 0x0416, 0x0436, 0x0417, 0x0437, 885 0x04e0, 0x04e1, 0x0418, 0x0438, 0x0418, 0x0438, 0x041e, 0x043e, 886 0x04e8, 0x04e9, 0x04e8, 0x04e9, 0x042d, 0x044d, 0x0423, 0x0443, 887 0x0423, 0x0443, 0x0423, 0x0443, 0x0427, 0x0447, 0x04f6, 0x04f7, 888 0x042b, 0x044b, 0x04fa, 0x04fb, 0x04fc, 0x04fd, 0x04fe, 0x04ff, 889 }; 890 891 // generated with: 892 // cat UnicodeData.txt | perl -e 'while (<>) { @foo = split(/;/); $foo[5] =~ s/<.*> //; $base[hex($foo[0])] = hex($foo[5]);} for ($i = 0; $i < 0x500; $i += 8) { for ($j = $i; $j < $i + 8; $j++) { printf("0x%04x, ", $base[$j] ? $base[$j] : $j)}; print "\n"; }' 893 894 } 895