• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_DICTIONARY_H
18 #define LATINIME_DICTIONARY_H
19 
20 namespace latinime {
21 
22 // 22-bit address = ~4MB dictionary size limit, which on average would be about 200k-300k words
23 #define ADDRESS_MASK 0x3FFFFF
24 
25 // The bit that decides if an address follows in the next 22 bits
26 #define FLAG_ADDRESS_MASK 0x40
27 // The bit that decides if this is a terminal node for a word. The node could still have children,
28 // if the word has other endings.
29 #define FLAG_TERMINAL_MASK 0x80
30 
31 #define FLAG_BIGRAM_READ 0x80
32 #define FLAG_BIGRAM_CHILDEXIST 0x40
33 #define FLAG_BIGRAM_CONTINUED 0x80
34 #define FLAG_BIGRAM_FREQ 0x7F
35 
36 class Dictionary {
37 public:
38     Dictionary(void *dict, int typedLetterMultipler, int fullWordMultiplier);
39     int getSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies,
40             int maxWordLength, int maxWords, int maxAlternatives, int skipPos,
41             int *nextLetters, int nextLettersSize);
42     int getBigrams(unsigned short *word, int length, int *codes, int codesSize,
43             unsigned short *outWords, int *frequencies, int maxWordLength, int maxBigrams,
44             int maxAlternatives);
45     bool isValidWord(unsigned short *word, int length);
setAsset(void * asset)46     void setAsset(void *asset) { mAsset = asset; }
getAsset()47     void *getAsset() { return mAsset; }
48     ~Dictionary();
49 
50 private:
51 
52     void getVersionNumber();
53     bool checkIfDictVersionIsLatest();
54     int getAddress(int *pos);
55     int getBigramAddress(int *pos, bool advance);
56     int getFreq(int *pos);
57     int getBigramFreq(int *pos);
58     void searchForTerminalNode(int address, int frequency);
59 
getFirstBitOfByte(int * pos)60     bool getFirstBitOfByte(int *pos) { return (mDict[*pos] & 0x80) > 0; }
getSecondBitOfByte(int * pos)61     bool getSecondBitOfByte(int *pos) { return (mDict[*pos] & 0x40) > 0; }
getTerminal(int * pos)62     bool getTerminal(int *pos) { return (mDict[*pos] & FLAG_TERMINAL_MASK) > 0; }
getCount(int * pos)63     int getCount(int *pos) { return mDict[(*pos)++] & 0xFF; }
64     unsigned short getChar(int *pos);
65     int wideStrLen(unsigned short *str);
66 
67     bool sameAsTyped(unsigned short *word, int length);
68     bool checkFirstCharacter(unsigned short *word);
69     bool addWord(unsigned short *word, int length, int frequency);
70     bool addWordBigram(unsigned short *word, int length, int frequency);
71     unsigned short toLowerCase(unsigned short c);
72     void getWordsRec(int pos, int depth, int maxDepth, bool completion, int frequency,
73             int inputIndex, int diffs);
74     int isValidWordRec(int pos, unsigned short *word, int offset, int length);
75     void registerNextLetter(unsigned short c);
76 
77     unsigned char *mDict;
78     void *mAsset;
79 
80     int *mFrequencies;
81     int *mBigramFreq;
82     int mMaxWords;
83     int mMaxBigrams;
84     int mMaxWordLength;
85     unsigned short *mOutputChars;
86     unsigned short *mBigramChars;
87     int *mInputCodes;
88     int mInputLength;
89     int mMaxAlternatives;
90     unsigned short mWord[128];
91     int mSkipPos;
92     int mMaxEditDistance;
93 
94     int mFullWordMultiplier;
95     int mTypedLetterMultiplier;
96     int *mNextLettersFrequencies;
97     int mNextLettersSize;
98     int mVersion;
99     int mBigram;
100 };
101 
102 // ----------------------------------------------------------------------------
103 
104 }; // namespace latinime
105 
106 #endif // LATINIME_DICTIONARY_H
107