1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef LATINIME_UNIGRAM_DICTIONARY_H 18 #define LATINIME_UNIGRAM_DICTIONARY_H 19 20 #include <map> 21 #include <stdint.h> 22 #include "defines.h" 23 #include "digraph_utils.h" 24 25 namespace latinime { 26 27 class Correction; 28 class ProximityInfo; 29 class TerminalAttributes; 30 class WordsPriorityQueuePool; 31 32 class UnigramDictionary { 33 public: 34 // Error tolerances 35 static const int DEFAULT_MAX_ERRORS = 2; 36 static const int MAX_ERRORS_FOR_TWO_WORDS = 1; 37 38 static const int FLAG_MULTIPLE_SUGGEST_ABORT = 0; 39 static const int FLAG_MULTIPLE_SUGGEST_SKIP = 1; 40 static const int FLAG_MULTIPLE_SUGGEST_CONTINUE = 2; 41 UnigramDictionary(const uint8_t *const streamStart, const unsigned int dictFlags); 42 int getProbability(const int *const inWord, const int length) const; 43 int getBigramPosition(int pos, int *word, int offset, int length) const; 44 int getSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, 45 const int *ycoordinates, const int *inputCodePoints, const int inputSize, 46 const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, 47 const bool useFullEditDistance, int *outWords, int *frequencies, 48 int *outputTypes) const; getDictFlags()49 int getDictFlags() const { return DICT_FLAGS; } 50 virtual ~UnigramDictionary(); 51 52 private: 53 DISALLOW_IMPLICIT_CONSTRUCTORS(UnigramDictionary); 54 void getWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, 55 const int *ycoordinates, const int *inputCodePoints, const int inputSize, 56 const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, 57 const bool useFullEditDistance, Correction *correction, 58 WordsPriorityQueuePool *queuePool) const; 59 int getDigraphReplacement(const int *codes, const int i, const int inputSize, 60 const DigraphUtils::digraph_t *const digraphs, const unsigned int digraphsSize) const; 61 void getWordWithDigraphSuggestionsRec(ProximityInfo *proximityInfo, const int *xcoordinates, 62 const int *ycoordinates, const int *codesBuffer, int *xCoordinatesBuffer, 63 int *yCoordinatesBuffer, const int codesBufferSize, const std::map<int, int> *bigramMap, 64 const uint8_t *bigramFilter, const bool useFullEditDistance, const int *codesSrc, 65 const int codesRemain, const int currentDepth, int *codesDest, Correction *correction, 66 WordsPriorityQueuePool *queuePool, const DigraphUtils::digraph_t *const digraphs, 67 const unsigned int digraphsSize) const; 68 void initSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, 69 const int *ycoordinates, const int *codes, const int inputSize, 70 Correction *correction) const; 71 void getOneWordSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, 72 const int *ycoordinates, const int *codes, const std::map<int, int> *bigramMap, 73 const uint8_t *bigramFilter, const bool useFullEditDistance, const int inputSize, 74 Correction *correction, WordsPriorityQueuePool *queuePool) const; 75 void getSuggestionCandidates( 76 const bool useFullEditDistance, const int inputSize, 77 const std::map<int, int> *bigramMap, const uint8_t *bigramFilter, 78 Correction *correction, WordsPriorityQueuePool *queuePool, const bool doAutoCompletion, 79 const int maxErrors, const int currentWordIndex) const; 80 void getSplitMultipleWordsSuggestions(ProximityInfo *proximityInfo, const int *xcoordinates, 81 const int *ycoordinates, const int *codes, const bool useFullEditDistance, 82 const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, 83 const bool hasAutoCorrectionCandidate) const; 84 void onTerminal(const int freq, const TerminalAttributes &terminalAttributes, 85 Correction *correction, WordsPriorityQueuePool *queuePool, const bool addToMasterQueue, 86 const int currentWordIndex) const; 87 // Process a node by considering proximity, missing and excessive character 88 bool processCurrentNode(const int initialPos, const std::map<int, int> *bigramMap, 89 const uint8_t *bigramFilter, Correction *correction, int *newCount, 90 int *newChildPosition, int *nextSiblingPosition, WordsPriorityQueuePool *queuePool, 91 const int currentWordIndex) const; 92 int getMostProbableWordLike(const int startInputIndex, const int inputSize, 93 Correction *correction, int *word) const; 94 int getMostProbableWordLikeInner(const int *const inWord, const int inputSize, 95 int *outWord) const; 96 int getSubStringSuggestion(ProximityInfo *proximityInfo, const int *xcoordinates, 97 const int *ycoordinates, const int *codes, const bool useFullEditDistance, 98 Correction *correction, WordsPriorityQueuePool *queuePool, const int inputSize, 99 const bool hasAutoCorrectionCandidate, const int currentWordIndex, 100 const int inputWordStartPos, const int inputWordLength, const int outputWordStartPos, 101 const bool isSpaceProximity, int *freqArray, int *wordLengthArray, int *outputWord, 102 int *outputWordLength) const; 103 void getMultiWordsSuggestionRec(ProximityInfo *proximityInfo, const int *xcoordinates, 104 const int *ycoordinates, const int *codes, const bool useFullEditDistance, 105 const int inputSize, Correction *correction, WordsPriorityQueuePool *queuePool, 106 const bool hasAutoCorrectionCandidate, const int startPos, const int startWordIndex, 107 const int outputWordLength, int *freqArray, int *wordLengthArray, 108 int *outputWord) const; 109 110 const uint8_t *const DICT_ROOT; 111 const int ROOT_POS; 112 const int MAX_DIGRAPH_SEARCH_DEPTH; 113 const int DICT_FLAGS; 114 }; 115 } // namespace latinime 116 #endif // LATINIME_UNIGRAM_DICTIONARY_H 117