• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_CORRECTION_H
18 #define LATINIME_CORRECTION_H
19 
20 #include <cassert>
21 #include <cstring> // for memset()
22 #include <stdint.h>
23 
24 #include "correction_state.h"
25 #include "defines.h"
26 #include "proximity_info_state.h"
27 
28 namespace latinime {
29 
30 class ProximityInfo;
31 
32 class Correction {
33  public:
34     typedef enum {
35         TRAVERSE_ALL_ON_TERMINAL,
36         TRAVERSE_ALL_NOT_ON_TERMINAL,
37         UNRELATED,
38         ON_TERMINAL,
39         NOT_ON_TERMINAL
40     } CorrectionType;
41 
Correction()42     Correction()
43             : mProximityInfo(0), mUseFullEditDistance(false), mDoAutoCompletion(false),
44               mMaxEditDistance(0), mMaxDepth(0), mInputSize(0), mSpaceProximityPos(0),
45               mMissingSpacePos(0), mTerminalInputIndex(0), mTerminalOutputIndex(0), mMaxErrors(0),
46               mTotalTraverseCount(0), mNeedsToTraverseAllNodes(false), mOutputIndex(0),
47               mInputIndex(0), mEquivalentCharCount(0), mProximityCount(0), mExcessiveCount(0),
48               mTransposedCount(0), mSkippedCount(0), mTransposedPos(0), mExcessivePos(0),
49               mSkipPos(0), mLastCharExceeded(false), mMatching(false), mProximityMatching(false),
50               mAdditionalProximityMatching(false), mExceeding(false), mTransposing(false),
51               mSkipping(false), mProximityInfoState() {
52         memset(mWord, 0, sizeof(mWord));
53         memset(mDistances, 0, sizeof(mDistances));
54         memset(mEditDistanceTable, 0, sizeof(mEditDistanceTable));
55         // NOTE: mCorrectionStates is an array of instances.
56         // No need to initialize it explicitly here.
57     }
58 
~Correction()59     virtual ~Correction() {}
60     void resetCorrection();
61     void initCorrection(
62             const ProximityInfo *pi, const int inputSize, const int maxWordLength);
63     void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
64 
65     // TODO: remove
66     void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
67             const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance,
68             const bool doAutoCompletion, const int maxErrors);
69     void checkState();
70     bool sameAsTyped();
71     bool initProcessState(const int index);
72 
73     int getInputIndex() const;
74 
75     bool needsToPrune() const;
76 
pushAndGetTotalTraverseCount()77     int pushAndGetTotalTraverseCount() {
78         return ++mTotalTraverseCount;
79     }
80 
81     int getFreqForSplitMultipleWords(
82             const int *freqArray, const int *wordLengthArray, const int wordCount,
83             const bool isSpaceProximity, const unsigned short *word);
84     int getFinalProbability(const int probability, unsigned short **word, int *wordLength);
85     int getFinalProbabilityForSubQueue(const int probability, unsigned short **word,
86             int *wordLength, const int inputSize);
87 
88     CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
89 
90     /////////////////////////
91     // Tree helper methods
92     int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
93 
getTreeSiblingPos(const int index)94     inline int getTreeSiblingPos(const int index) const {
95         return mCorrectionStates[index].mSiblingPos;
96     }
97 
setTreeSiblingPos(const int index,const int pos)98     inline void setTreeSiblingPos(const int index, const int pos) {
99         mCorrectionStates[index].mSiblingPos = pos;
100     }
101 
getTreeParentIndex(const int index)102     inline int getTreeParentIndex(const int index) const {
103         return mCorrectionStates[index].mParentIndex;
104     }
105 
106     class RankingAlgorithm {
107      public:
108         static int calculateFinalProbability(const int inputIndex, const int depth,
109                 const int probability, int *editDistanceTable, const Correction *correction,
110                 const int inputSize);
111         static int calcFreqForSplitMultipleWords(const int *freqArray, const int *wordLengthArray,
112                 const int wordCount, const Correction *correction, const bool isSpaceProximity,
113                 const unsigned short *word);
114         static float calcNormalizedScore(const unsigned short *before, const int beforeLength,
115                 const unsigned short *after, const int afterLength, const int score);
116         static int editDistance(const unsigned short *before,
117                 const int beforeLength, const unsigned short *after, const int afterLength);
118      private:
119         static const int CODE_SPACE = ' ';
120         static const int MAX_INITIAL_SCORE = 255;
121     };
122 
123     // proximity info state
initInputParams(const ProximityInfo * proximityInfo,const int32_t * inputCodes,const int inputSize,const int * xCoordinates,const int * yCoordinates)124     void initInputParams(const ProximityInfo *proximityInfo, const int32_t *inputCodes,
125             const int inputSize, const int *xCoordinates, const int *yCoordinates) {
126         mProximityInfoState.initInputParams(0, MAX_POINT_TO_KEY_LENGTH,
127                 proximityInfo, inputCodes, inputSize, xCoordinates, yCoordinates, 0, 0, false);
128     }
129 
getPrimaryInputWord()130     const unsigned short *getPrimaryInputWord() const {
131         return mProximityInfoState.getPrimaryInputWord();
132     }
133 
getPrimaryCharAt(const int index)134     unsigned short getPrimaryCharAt(const int index) const {
135         return mProximityInfoState.getPrimaryCharAt(index);
136     }
137 
138  private:
139     DISALLOW_COPY_AND_ASSIGN(Correction);
140 
141     /////////////////////////
142     // static inline utils //
143     /////////////////////////
144     static const int TWO_31ST_DIV_255 = S_INT_MAX / 255;
capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num)145     static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(const int num) {
146         return (num < TWO_31ST_DIV_255 ? 255 * num : S_INT_MAX);
147     }
148 
149     static const int TWO_31ST_DIV_2 = S_INT_MAX / 2;
multiplyIntCapped(const int multiplier,int * base)150     inline static void multiplyIntCapped(const int multiplier, int *base) {
151         const int temp = *base;
152         if (temp != S_INT_MAX) {
153             // Branch if multiplier == 2 for the optimization
154             if (multiplier < 0) {
155                 if (DEBUG_DICT) {
156                     assert(false);
157                 }
158                 AKLOGI("--- Invalid multiplier: %d", multiplier);
159             } else if (multiplier == 0) {
160                 *base = 0;
161             } else if (multiplier == 2) {
162                 *base = TWO_31ST_DIV_2 >= temp ? temp << 1 : S_INT_MAX;
163             } else {
164                 // TODO: This overflow check gives a wrong answer when, for example,
165                 //       temp = 2^16 + 1 and multiplier = 2^17 + 1.
166                 //       Fix this behavior.
167                 const int tempRetval = temp * multiplier;
168                 *base = tempRetval >= temp ? tempRetval : S_INT_MAX;
169             }
170         }
171     }
172 
powerIntCapped(const int base,const int n)173     inline static int powerIntCapped(const int base, const int n) {
174         if (n <= 0) return 1;
175         if (base == 2) {
176             return n < 31 ? 1 << n : S_INT_MAX;
177         } else {
178             int ret = base;
179             for (int i = 1; i < n; ++i) multiplyIntCapped(base, &ret);
180             return ret;
181         }
182     }
183 
multiplyRate(const int rate,int * freq)184     inline static void multiplyRate(const int rate, int *freq) {
185         if (*freq != S_INT_MAX) {
186             if (*freq > 1000000) {
187                 *freq /= 100;
188                 multiplyIntCapped(rate, freq);
189             } else {
190                 multiplyIntCapped(rate, freq);
191                 *freq /= 100;
192             }
193         }
194     }
195 
getSpaceProximityPos()196     inline int getSpaceProximityPos() const {
197         return mSpaceProximityPos;
198     }
getMissingSpacePos()199     inline int getMissingSpacePos() const {
200         return mMissingSpacePos;
201     }
202 
getSkipPos()203     inline int getSkipPos() const {
204         return mSkipPos;
205     }
206 
getExcessivePos()207     inline int getExcessivePos() const {
208         return mExcessivePos;
209     }
210 
getTransposedPos()211     inline int getTransposedPos() const {
212         return mTransposedPos;
213     }
214 
215     inline void incrementInputIndex();
216     inline void incrementOutputIndex();
217     inline void startToTraverseAllNodes();
218     inline bool isSingleQuote(const unsigned short c);
219     inline CorrectionType processSkipChar(
220             const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
221     inline CorrectionType processUnrelatedCorrectionType();
222     inline void addCharToCurrentWord(const int32_t c);
223     inline int getFinalProbabilityInternal(const int probability, unsigned short **word,
224             int *wordLength, const int inputSize);
225 
226     static const int TYPED_LETTER_MULTIPLIER = 2;
227     static const int FULL_WORD_MULTIPLIER = 2;
228     const ProximityInfo *mProximityInfo;
229 
230     bool mUseFullEditDistance;
231     bool mDoAutoCompletion;
232     int mMaxEditDistance;
233     int mMaxDepth;
234     int mInputSize;
235     int mSpaceProximityPos;
236     int mMissingSpacePos;
237     int mTerminalInputIndex;
238     int mTerminalOutputIndex;
239     int mMaxErrors;
240 
241     uint8_t mTotalTraverseCount;
242 
243     // The following arrays are state buffer.
244     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
245     int mDistances[MAX_WORD_LENGTH_INTERNAL];
246 
247     // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
248     // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
249     int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)];
250 
251     CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
252 
253     // The following member variables are being used as cache values of the correction state.
254     bool mNeedsToTraverseAllNodes;
255     int mOutputIndex;
256     int mInputIndex;
257 
258     int mEquivalentCharCount;
259     int mProximityCount;
260     int mExcessiveCount;
261     int mTransposedCount;
262     int mSkippedCount;
263 
264     int mTransposedPos;
265     int mExcessivePos;
266     int mSkipPos;
267 
268     bool mLastCharExceeded;
269 
270     bool mMatching;
271     bool mProximityMatching;
272     bool mAdditionalProximityMatching;
273     bool mExceeding;
274     bool mTransposing;
275     bool mSkipping;
276     ProximityInfoState mProximityInfoState;
277 };
278 } // namespace latinime
279 #endif // LATINIME_CORRECTION_H
280