• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_CORRECTION_H
18 #define LATINIME_CORRECTION_H
19 
20 #include <stdint.h>
21 #include "correction_state.h"
22 
23 #include "defines.h"
24 
25 namespace latinime {
26 
27 class ProximityInfo;
28 
29 class Correction {
30 
31 public:
32     typedef enum {
33         TRAVERSE_ALL_ON_TERMINAL,
34         TRAVERSE_ALL_NOT_ON_TERMINAL,
35         UNRELATED,
36         ON_TERMINAL,
37         NOT_ON_TERMINAL
38     } CorrectionType;
39 
40     Correction(const int typedLetterMultiplier, const int fullWordMultiplier);
41     void initCorrection(
42             const ProximityInfo *pi, const int inputLength, const int maxWordLength);
43     void initCorrectionState(const int rootPos, const int childCount, const bool traverseAll);
44 
45     // TODO: remove
46     void setCorrectionParams(const int skipPos, const int excessivePos, const int transposedPos,
47             const int spaceProximityPos, const int missingSpacePos, const bool useFullEditDistance);
48     void checkState();
49     bool initProcessState(const int index);
50 
51     int getOutputIndex();
52     int getInputIndex();
53 
54     virtual ~Correction();
getSpaceProximityPos()55     int getSpaceProximityPos() const {
56         return mSpaceProximityPos;
57     }
getMissingSpacePos()58     int getMissingSpacePos() const {
59         return mMissingSpacePos;
60     }
61 
getSkipPos()62     int getSkipPos() const {
63         return mSkipPos;
64     }
65 
getExcessivePos()66     int getExcessivePos() const {
67         return mExcessivePos;
68     }
69 
getTransposedPos()70     int getTransposedPos() const {
71         return mTransposedPos;
72     }
73 
74     bool needsToPrune() const;
75 
76     int getFreqForSplitTwoWords(
77             const int firstFreq, const int secondFreq, const unsigned short *word);
78     int getFinalFreq(const int freq, unsigned short **word, int* wordLength);
79 
80     CorrectionType processCharAndCalcState(const int32_t c, const bool isTerminal);
81 
82     /////////////////////////
83     // Tree helper methods
84     int goDownTree(const int parentIndex, const int childCount, const int firstChildPos);
85 
getTreeSiblingPos(const int index)86     inline int getTreeSiblingPos(const int index) const {
87         return mCorrectionStates[index].mSiblingPos;
88     }
89 
setTreeSiblingPos(const int index,const int pos)90     inline void setTreeSiblingPos(const int index, const int pos) {
91         mCorrectionStates[index].mSiblingPos = pos;
92     }
93 
getTreeParentIndex(const int index)94     inline int getTreeParentIndex(const int index) const {
95         return mCorrectionStates[index].mParentIndex;
96     }
97 private:
98     inline void incrementInputIndex();
99     inline void incrementOutputIndex();
100     inline bool needsToTraverseAllNodes();
101     inline void startToTraverseAllNodes();
102     inline bool isQuote(const unsigned short c);
103     inline CorrectionType processSkipChar(
104             const int32_t c, const bool isTerminal, const bool inputIndexIncremented);
105 
106     const int TYPED_LETTER_MULTIPLIER;
107     const int FULL_WORD_MULTIPLIER;
108     const ProximityInfo *mProximityInfo;
109 
110     bool mUseFullEditDistance;
111     int mMaxEditDistance;
112     int mMaxDepth;
113     int mInputLength;
114     int mSpaceProximityPos;
115     int mMissingSpacePos;
116     int mTerminalInputIndex;
117     int mTerminalOutputIndex;
118 
119     // The following arrays are state buffer.
120     unsigned short mWord[MAX_WORD_LENGTH_INTERNAL];
121     int mDistances[MAX_WORD_LENGTH_INTERNAL];
122 
123     // Edit distance calculation requires a buffer with (N+1)^2 length for the input length N.
124     // Caveat: Do not create multiple tables per thread as this table eats up RAM a lot.
125     int mEditDistanceTable[(MAX_WORD_LENGTH_INTERNAL + 1) * (MAX_WORD_LENGTH_INTERNAL + 1)];
126 
127     CorrectionState mCorrectionStates[MAX_WORD_LENGTH_INTERNAL];
128 
129     // The following member variables are being used as cache values of the correction state.
130     bool mNeedsToTraverseAllNodes;
131     int mOutputIndex;
132     int mInputIndex;
133 
134     int mEquivalentCharCount;
135     int mProximityCount;
136     int mExcessiveCount;
137     int mTransposedCount;
138     int mSkippedCount;
139 
140     int mTransposedPos;
141     int mExcessivePos;
142     int mSkipPos;
143 
144     bool mLastCharExceeded;
145 
146     bool mMatching;
147     bool mProximityMatching;
148     bool mExceeding;
149     bool mTransposing;
150     bool mSkipping;
151 
152     class RankingAlgorithm {
153     public:
154         static int calculateFinalFreq(const int inputIndex, const int depth,
155                 const int freq, int *editDistanceTable, const Correction* correction);
156         static int calcFreqForSplitTwoWords(const int firstFreq, const int secondFreq,
157                 const Correction* correction, const unsigned short *word);
158     };
159 };
160 } // namespace latinime
161 #endif // LATINIME_CORRECTION_H
162