• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  *   Copyright (C) 2005-2015, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  */
7 
8 #include "unicode/utypes.h"
9 
10 #include "cmemory.h"
11 
12 #if !UCONFIG_NO_CONVERSION
13 #include "csrsbcs.h"
14 #include "csmatch.h"
15 
16 #define N_GRAM_SIZE 3
17 #define N_GRAM_MASK 0xFFFFFF
18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
19 
20 U_NAMESPACE_BEGIN
21 
NGramParser(const int32_t * theNgramList,const uint8_t * theCharMap)22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
23  : ngram(0), byteIndex(0)
24 {
25     ngramList = theNgramList;
26     charMap   = theCharMap;
27 
28     ngramCount = hitCount = 0;
29 }
30 
~NGramParser()31 NGramParser::~NGramParser()
32 {
33 }
34 
35 /*
36  * Binary search for value in table, which must have exactly 64 entries.
37  */
38 
search(const int32_t * table,int32_t value)39 int32_t NGramParser::search(const int32_t *table, int32_t value)
40 {
41     int32_t index = 0;
42 
43     if (table[index + 32] <= value) {
44         index += 32;
45     }
46 
47     if (table[index + 16] <= value) {
48         index += 16;
49     }
50 
51     if (table[index + 8] <= value) {
52         index += 8;
53     }
54 
55     if (table[index + 4] <= value) {
56         index += 4;
57     }
58 
59     if (table[index + 2] <= value) {
60         index += 2;
61     }
62 
63     if (table[index + 1] <= value) {
64         index += 1;
65     }
66 
67     if (table[index] > value) {
68         index -= 1;
69     }
70 
71     if (index < 0 || table[index] != value) {
72         return -1;
73     }
74 
75     return index;
76 }
77 
lookup(int32_t thisNgram)78 void NGramParser::lookup(int32_t thisNgram)
79 {
80     ngramCount += 1;
81 
82     if (search(ngramList, thisNgram) >= 0) {
83         hitCount += 1;
84     }
85 
86 }
87 
addByte(int32_t b)88 void NGramParser::addByte(int32_t b)
89 {
90     ngram = ((ngram << 8) + b) & N_GRAM_MASK;
91     lookup(ngram);
92 }
93 
nextByte(InputText * det)94 int32_t NGramParser::nextByte(InputText *det)
95 {
96     if (byteIndex >= det->fInputLen) {
97         return -1;
98     }
99 
100     return det->fInputBytes[byteIndex++];
101 }
102 
parseCharacters(InputText * det)103 void NGramParser::parseCharacters(InputText *det)
104 {
105     int32_t b;
106     bool ignoreSpace = FALSE;
107 
108     while ((b = nextByte(det)) >= 0) {
109         uint8_t mb = charMap[b];
110 
111         // TODO: 0x20 might not be a space in all character sets...
112         if (mb != 0) {
113             if (!(mb == 0x20 && ignoreSpace)) {
114                 addByte(mb);
115             }
116 
117             ignoreSpace = (mb == 0x20);
118         }
119     }
120 }
121 
parse(InputText * det)122 int32_t NGramParser::parse(InputText *det)
123 {
124     parseCharacters(det);
125 
126     // TODO: Is this OK? The buffer could have ended in the middle of a word...
127     addByte(0x20);
128 
129     double rawPercent = (double) hitCount / (double) ngramCount;
130 
131     //            if (rawPercent <= 2.0) {
132     //                return 0;
133     //            }
134 
135     // TODO - This is a bit of a hack to take care of a case
136     // were we were getting a confidence of 135...
137     if (rawPercent > 0.33) {
138         return 98;
139     }
140 
141     return (int32_t) (rawPercent * 300.0);
142 }
143 
144 #if !UCONFIG_ONLY_HTML_CONVERSION
145 static const uint8_t unshapeMap_IBM420[] = {
146 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
147 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
148 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
149 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
150 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
151 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
152 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
153 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
154 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
155 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
156 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
157 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
158 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
159 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
160 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
161 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
162 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
163 };
164 
NGramParser_IBM420(const int32_t * theNgramList,const uint8_t * theCharMap)165 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
166 {
167 	alef = 0x00;
168 }
169 
~NGramParser_IBM420()170 NGramParser_IBM420::~NGramParser_IBM420() {}
171 
isLamAlef(int32_t b)172 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
173 {
174 	if(b == 0xB2 || b == 0xB3){
175          	return 0x47;
176         }else if(b == 0xB4 || b == 0xB5){
177          	return 0x49;
178         }else if(b == 0xB8 || b == 0xB9){
179          	return 0x56;
180         }else
181          	return 0x00;
182 }
183 
184 /*
185 * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
186 * because CharsetDetector is dealing with bytes not Unicode code points. We could
187 * convert the bytes to Unicode code points but that would leave us dependent
188 * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
189 * of JDK can produce different results and therefore is also avoided.
190 */
nextByte(InputText * det)191 int32_t NGramParser_IBM420::nextByte(InputText *det)
192 {
193 
194     if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
195         return -1;
196     }
197     int next;
198 
199     alef = isLamAlef(det->fInputBytes[byteIndex]);
200     if(alef != 0x00)
201         next = 0xB1 & 0xFF;
202     else
203         next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
204 
205     byteIndex++;
206 
207     return next;
208 }
209 
parseCharacters(InputText * det)210 void NGramParser_IBM420::parseCharacters(InputText *det)
211 {
212 	int32_t b;
213     bool ignoreSpace = FALSE;
214 
215     while ((b = nextByte(det)) >= 0) {
216         uint8_t mb = charMap[b];
217 
218         // TODO: 0x20 might not be a space in all character sets...
219         if (mb != 0) {
220             if (!(mb == 0x20 && ignoreSpace)) {
221                 addByte(mb);
222             }
223             ignoreSpace = (mb == 0x20);
224         }
225 
226 		if(alef != 0x00){
227             mb = charMap[alef & 0xFF];
228 
229             // TODO: 0x20 might not be a space in all character sets...
230             if (mb != 0) {
231                 if (!(mb == 0x20 && ignoreSpace)) {
232                     addByte(mb);
233                 }
234 
235                 ignoreSpace = (mb == 0x20);
236             }
237 
238         }
239     }
240 }
241 #endif
242 
CharsetRecog_sbcs()243 CharsetRecog_sbcs::CharsetRecog_sbcs()
244 {
245     // nothing else to do
246 }
247 
~CharsetRecog_sbcs()248 CharsetRecog_sbcs::~CharsetRecog_sbcs()
249 {
250     // nothing to do
251 }
252 
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[]) const253 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
254 {
255     NGramParser parser(ngrams, byteMap);
256     int32_t result;
257 
258     result = parser.parse(det);
259 
260     return result;
261 }
262 
263 static const uint8_t charMap_8859_1[] = {
264     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
266     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
267     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
268     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
269     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
270     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
271     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
272     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
273     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
274     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
275     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
276     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
277     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
278     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
279     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
280     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
281     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
282     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
283     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
284     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
285     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
286     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
287     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
288     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
289     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
290     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
291     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
292     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
293     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
294     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
295     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
296 };
297 
298 static const uint8_t charMap_8859_2[] = {
299     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
302     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
303     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
304     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
305     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
306     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
307     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
308     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
309     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
310     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
311     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
312     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
313     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
314     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
315     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
316     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
317     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
318     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
319     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
320     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
321     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
322     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
323     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
324     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
325     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
326     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
327     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
328     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
329     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
330     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
331 };
332 
333 static const uint8_t charMap_8859_5[] = {
334     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
336     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
337     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
338     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
339     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
340     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
341     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
342     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
343     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
344     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
345     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
346     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
347     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
348     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
349     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
350     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
351     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
352     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
353     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
354     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
355     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
356     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
357     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
358     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
359     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
360     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
361     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
362     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
363     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
364     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
365     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
366 };
367 
368 static const uint8_t charMap_8859_6[] = {
369     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
371     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
372     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
373     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
374     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
375     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
376     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
377     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
378     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
379     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
380     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
381     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
382     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
383     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
384     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
385     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
387     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
388     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
389     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
390     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
391     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
392     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
393     0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
394     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
395     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
396     0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
397     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
398     0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
399     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
400     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
401 };
402 
403 static const uint8_t charMap_8859_7[] = {
404     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
405     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
406     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
407     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
408     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
409     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
410     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
411     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
412     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
413     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
414     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
415     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
416     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
417     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
418     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
419     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
420     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
421     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
422     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
423     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
424     0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
425     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
426     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
427     0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
428     0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
429     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
430     0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
431     0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
432     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
433     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
434     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
435     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
436 };
437 
438 static const uint8_t charMap_8859_8[] = {
439     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
440     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
441     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
442     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
443     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
444     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
445     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
446     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
447     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
448     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
449     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
450     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
451     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
452     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
453     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
454     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
455     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
456     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
459     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
460     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
461     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
462     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
463     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
464     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
465     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
466     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
467     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
468     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
469     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
470     0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
471 };
472 
473 static const uint8_t charMap_8859_9[] = {
474     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
475     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
476     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
477     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
478     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
479     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
480     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
481     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
482     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
483     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
484     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
485     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
486     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
487     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
488     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
489     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
490     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
491     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
492     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
493     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
494     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
495     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
496     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
497     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
498     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
499     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
500     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
501     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
502     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
503     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
504     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
505     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
506 };
507 
508 static const int32_t ngrams_windows_1251[] = {
509     0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
510     0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
511     0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
512     0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
513 };
514 
515 static const uint8_t charMap_windows_1251[] = {
516     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
517     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
518     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
519     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
520     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
521     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
522     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
523     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
524     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
525     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
526     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
527     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
528     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
529     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
530     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
531     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
532     0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
533     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
534     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
535     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
536     0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
537     0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
538     0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
539     0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
540     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
541     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
542     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
543     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
544     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
545     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
546     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
547     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
548 };
549 
550 static const int32_t ngrams_windows_1256[] = {
551     0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
552     0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
553     0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
554     0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
555 };
556 
557 static const uint8_t charMap_windows_1256[] = {
558     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
559     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
560     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
561     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
562     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
563     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
564     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
565     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
566     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
567     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
568     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
569     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
570     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
571     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
572     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
573     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
574     0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
575     0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
576     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
577     0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
578     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
579     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
580     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
581     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
582     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
583     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
584     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
585     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
586     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
587     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
588     0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
589     0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
590 };
591 
592 static const int32_t ngrams_KOI8_R[] = {
593     0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
594     0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
595     0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
596     0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
597 };
598 
599 static const uint8_t charMap_KOI8_R[] = {
600     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
601     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
602     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
603     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
604     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
605     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
606     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
607     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
608     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
609     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
610     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
611     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
612     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
613     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
614     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
615     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
616     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
617     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
618     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
619     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
620     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
621     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
622     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
623     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
624     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
625     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
626     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
627     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
628     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
629     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
630     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
631     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
632 };
633 
634 #if !UCONFIG_ONLY_HTML_CONVERSION
635 static const int32_t ngrams_IBM424_he_rtl[] = {
636     0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
637     0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
638     0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
639     0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
640 };
641 
642 static const int32_t ngrams_IBM424_he_ltr[] = {
643     0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
644     0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
645     0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
646     0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
647 };
648 
649 static const uint8_t charMap_IBM424_he[] = {
650 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
651 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
652 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
653 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
654 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
655 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
656 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
657 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
658 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
659 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
660 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
661 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
662 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
663 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
664 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
665 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
666 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
667 };
668 
669 static const int32_t ngrams_IBM420_ar_rtl[] = {
670     0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
671     0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
672     0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
673     0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
674 };
675 
676 static const int32_t ngrams_IBM420_ar_ltr[] = {
677     0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
678     0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
679     0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
680     0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
681 };
682 
683 static const uint8_t charMap_IBM420_ar[]= {
684 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
685 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
686 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
687 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
688 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
689 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
690 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
691 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
692 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
693 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
694 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
695 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
696 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
697 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
698 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
699 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
700 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
701 };
702 #endif
703 
704 //ISO-8859-1,2,5,6,7,8,9 Ngrams
705 
706 struct NGramsPlusLang {
707     const int32_t ngrams[64];
708     const char *  lang;
709 };
710 
711 static const NGramsPlusLang ngrams_8859_1[] =  {
712   {
713     {
714     0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
715     0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
716     0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
717     0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
718     },
719     "en"
720   },
721   {
722     {
723     0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
724     0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
725     0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
726     0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
727     },
728     "da"
729   },
730   {
731     {
732     0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
733     0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
734     0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
735     0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
736     },
737     "de"
738   },
739   {
740     {
741     0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
742     0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
743     0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
744     0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
745     },
746     "es"
747   },
748   {
749     {
750     0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
751     0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
752     0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
753     0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
754     },
755     "fr"
756   },
757   {
758     {
759     0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
760     0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
761     0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
762     0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
763     },
764     "it"
765   },
766   {
767     {
768     0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
769     0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
770     0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
771     0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
772     },
773     "nl"
774   },
775   {
776     {
777     0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
778     0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
779     0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
780     0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
781     },
782     "no"
783   },
784   {
785     {
786     0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
787     0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
788     0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
789     0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
790     },
791     "pt"
792   },
793   {
794     {
795     0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
796     0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
797     0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
798     0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
799     },
800     "sv"
801   }
802 };
803 
804 
805 static const NGramsPlusLang ngrams_8859_2[] =  {
806   {
807     {
808     0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
809     0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
810     0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
811     0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
812     },
813     "cs"
814   },
815   {
816     {
817     0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
818     0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
819     0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
820     0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
821     },
822     "hu"
823   },
824   {
825     {
826     0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
827     0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
828     0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
829     0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
830     },
831     "pl"
832   },
833   {
834     {
835     0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
836     0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
837     0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
838     0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
839     },
840     "ro"
841   }
842 };
843 
844 static const int32_t ngrams_8859_5_ru[] = {
845     0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
846     0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
847     0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
848     0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
849 };
850 
851 static const int32_t ngrams_8859_6_ar[] = {
852     0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
853     0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
854     0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
855     0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
856 };
857 
858 static const int32_t ngrams_8859_7_el[] = {
859     0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
860     0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
861     0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
862     0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
863 };
864 
865 static const int32_t ngrams_8859_8_I_he[] = {
866     0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
867     0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
868     0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
869     0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
870 };
871 
872 static const int32_t ngrams_8859_8_he[] = {
873     0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
874     0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
875     0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
876     0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
877 };
878 
879 static const int32_t ngrams_8859_9_tr[] = {
880     0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
881     0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
882     0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
883     0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
884 };
885 
~CharsetRecog_8859_1()886 CharsetRecog_8859_1::~CharsetRecog_8859_1()
887 {
888     // nothing to do
889 }
890 
match(InputText * textIn,CharsetMatch * results) const891 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
892     const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
893     uint32_t i;
894     int32_t bestConfidenceSoFar = -1;
895     for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
896         const int32_t *ngrams = ngrams_8859_1[i].ngrams;
897         const char    *lang   = ngrams_8859_1[i].lang;
898         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
899         if (confidence > bestConfidenceSoFar) {
900             results->set(textIn, this, confidence, name, lang);
901             bestConfidenceSoFar = confidence;
902         }
903     }
904     return (bestConfidenceSoFar > 0);
905 }
906 
getName() const907 const char *CharsetRecog_8859_1::getName() const
908 {
909     return "ISO-8859-1";
910 }
911 
912 
~CharsetRecog_8859_2()913 CharsetRecog_8859_2::~CharsetRecog_8859_2()
914 {
915     // nothing to do
916 }
917 
match(InputText * textIn,CharsetMatch * results) const918 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
919     const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
920     uint32_t i;
921     int32_t bestConfidenceSoFar = -1;
922     for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
923         const int32_t *ngrams = ngrams_8859_2[i].ngrams;
924         const char    *lang   = ngrams_8859_2[i].lang;
925         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
926         if (confidence > bestConfidenceSoFar) {
927             results->set(textIn, this, confidence, name, lang);
928             bestConfidenceSoFar = confidence;
929         }
930     }
931     return (bestConfidenceSoFar > 0);
932 }
933 
getName() const934 const char *CharsetRecog_8859_2::getName() const
935 {
936     return "ISO-8859-2";
937 }
938 
939 
~CharsetRecog_8859_5()940 CharsetRecog_8859_5::~CharsetRecog_8859_5()
941 {
942     // nothing to do
943 }
944 
getName() const945 const char *CharsetRecog_8859_5::getName() const
946 {
947     return "ISO-8859-5";
948 }
949 
~CharsetRecog_8859_5_ru()950 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
951 {
952     // nothing to do
953 }
954 
getLanguage() const955 const char *CharsetRecog_8859_5_ru::getLanguage() const
956 {
957     return "ru";
958 }
959 
match(InputText * textIn,CharsetMatch * results) const960 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
961 {
962     int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
963     results->set(textIn, this, confidence);
964     return (confidence > 0);
965 }
966 
~CharsetRecog_8859_6()967 CharsetRecog_8859_6::~CharsetRecog_8859_6()
968 {
969     // nothing to do
970 }
971 
getName() const972 const char *CharsetRecog_8859_6::getName() const
973 {
974     return "ISO-8859-6";
975 }
976 
~CharsetRecog_8859_6_ar()977 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
978 {
979     // nothing to do
980 }
981 
getLanguage() const982 const char *CharsetRecog_8859_6_ar::getLanguage() const
983 {
984     return "ar";
985 }
986 
match(InputText * textIn,CharsetMatch * results) const987 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
988 {
989     int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
990     results->set(textIn, this, confidence);
991     return (confidence > 0);
992 }
993 
~CharsetRecog_8859_7()994 CharsetRecog_8859_7::~CharsetRecog_8859_7()
995 {
996     // nothing to do
997 }
998 
getName() const999 const char *CharsetRecog_8859_7::getName() const
1000 {
1001     return "ISO-8859-7";
1002 }
1003 
~CharsetRecog_8859_7_el()1004 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
1005 {
1006     // nothing to do
1007 }
1008 
getLanguage() const1009 const char *CharsetRecog_8859_7_el::getLanguage() const
1010 {
1011     return "el";
1012 }
1013 
match(InputText * textIn,CharsetMatch * results) const1014 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
1015 {
1016     const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
1017     int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
1018     results->set(textIn, this, confidence, name, "el");
1019     return (confidence > 0);
1020 }
1021 
~CharsetRecog_8859_8()1022 CharsetRecog_8859_8::~CharsetRecog_8859_8()
1023 {
1024     // nothing to do
1025 }
1026 
getName() const1027 const char *CharsetRecog_8859_8::getName() const
1028 {
1029     return "ISO-8859-8";
1030 }
1031 
~CharsetRecog_8859_8_I_he()1032 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
1033 {
1034     // nothing to do
1035 }
1036 
getName() const1037 const char *CharsetRecog_8859_8_I_he::getName() const
1038 {
1039     return "ISO-8859-8-I";
1040 }
1041 
getLanguage() const1042 const char *CharsetRecog_8859_8_I_he::getLanguage() const
1043 {
1044     return "he";
1045 }
1046 
match(InputText * textIn,CharsetMatch * results) const1047 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
1048 {
1049     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
1050     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
1051     results->set(textIn, this, confidence, name, "he");
1052     return (confidence > 0);
1053 }
1054 
~CharsetRecog_8859_8_he()1055 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
1056 {
1057     // od ot gnihton
1058 }
1059 
getLanguage() const1060 const char *CharsetRecog_8859_8_he::getLanguage() const
1061 {
1062     return "he";
1063 }
1064 
match(InputText * textIn,CharsetMatch * results) const1065 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
1066 {
1067     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
1068     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1069     results->set(textIn, this, confidence, name, "he");
1070     return (confidence > 0);
1071 }
1072 
~CharsetRecog_8859_9()1073 CharsetRecog_8859_9::~CharsetRecog_8859_9()
1074 {
1075     // nothing to do
1076 }
1077 
getName() const1078 const char *CharsetRecog_8859_9::getName() const
1079 {
1080     return "ISO-8859-9";
1081 }
1082 
~CharsetRecog_8859_9_tr()1083 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1084 {
1085     // nothing to do
1086 }
1087 
getLanguage() const1088 const char *CharsetRecog_8859_9_tr::getLanguage() const
1089 {
1090     return "tr";
1091 }
1092 
match(InputText * textIn,CharsetMatch * results) const1093 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
1094 {
1095     const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
1096     int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1097     results->set(textIn, this, confidence, name, "tr");
1098     return (confidence > 0);
1099 }
1100 
~CharsetRecog_windows_1256()1101 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1102 {
1103     // nothing to do
1104 }
1105 
getName() const1106 const char *CharsetRecog_windows_1256::getName() const
1107 {
1108     return  "windows-1256";
1109 }
1110 
getLanguage() const1111 const char *CharsetRecog_windows_1256::getLanguage() const
1112 {
1113     return "ar";
1114 }
1115 
match(InputText * textIn,CharsetMatch * results) const1116 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
1117 {
1118     int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1119     results->set(textIn, this, confidence);
1120     return (confidence > 0);
1121 }
1122 
~CharsetRecog_windows_1251()1123 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1124 {
1125     // nothing to do
1126 }
1127 
getName() const1128 const char *CharsetRecog_windows_1251::getName() const
1129 {
1130     return  "windows-1251";
1131 }
1132 
getLanguage() const1133 const char *CharsetRecog_windows_1251::getLanguage() const
1134 {
1135     return "ru";
1136 }
1137 
match(InputText * textIn,CharsetMatch * results) const1138 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
1139 {
1140     int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1141     results->set(textIn, this, confidence);
1142     return (confidence > 0);
1143 }
1144 
~CharsetRecog_KOI8_R()1145 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1146 {
1147     // nothing to do
1148 }
1149 
getName() const1150 const char *CharsetRecog_KOI8_R::getName() const
1151 {
1152     return  "KOI8-R";
1153 }
1154 
getLanguage() const1155 const char *CharsetRecog_KOI8_R::getLanguage() const
1156 {
1157     return "ru";
1158 }
1159 
match(InputText * textIn,CharsetMatch * results) const1160 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
1161 {
1162     int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1163     results->set(textIn, this, confidence);
1164     return (confidence > 0);
1165 }
1166 
1167 #if !UCONFIG_ONLY_HTML_CONVERSION
~CharsetRecog_IBM424_he()1168 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1169 {
1170     // nothing to do
1171 }
1172 
getLanguage() const1173 const char *CharsetRecog_IBM424_he::getLanguage() const
1174 {
1175     return "he";
1176 }
1177 
~CharsetRecog_IBM424_he_rtl()1178 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
1179 {
1180     // nothing to do
1181 }
1182 
getName() const1183 const char *CharsetRecog_IBM424_he_rtl::getName() const
1184 {
1185     return  "IBM424_rtl";
1186 }
1187 
match(InputText * textIn,CharsetMatch * results) const1188 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
1189 {
1190     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
1191     results->set(textIn, this, confidence);
1192     return (confidence > 0);
1193 }
1194 
~CharsetRecog_IBM424_he_ltr()1195 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
1196 {
1197     // nothing to do
1198 }
1199 
getName() const1200 const char *CharsetRecog_IBM424_he_ltr::getName() const
1201 {
1202     return  "IBM424_ltr";
1203 }
1204 
match(InputText * textIn,CharsetMatch * results) const1205 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
1206 {
1207     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
1208     results->set(textIn, this, confidence);
1209     return (confidence > 0);
1210 }
1211 
~CharsetRecog_IBM420_ar()1212 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
1213 {
1214     // nothing to do
1215 }
1216 
getLanguage() const1217 const char *CharsetRecog_IBM420_ar::getLanguage() const
1218 {
1219     return "ar";
1220 }
1221 
1222 
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[]) const1223 int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
1224 {
1225     NGramParser_IBM420 parser(ngrams, byteMap);
1226     int32_t result;
1227 
1228     result = parser.parse(det);
1229 
1230     return result;
1231 }
1232 
~CharsetRecog_IBM420_ar_rtl()1233 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
1234 {
1235     // nothing to do
1236 }
1237 
getName() const1238 const char *CharsetRecog_IBM420_ar_rtl::getName() const
1239 {
1240     return  "IBM420_rtl";
1241 }
1242 
match(InputText * textIn,CharsetMatch * results) const1243 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
1244 {
1245     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
1246     results->set(textIn, this, confidence);
1247     return (confidence > 0);
1248 }
1249 
~CharsetRecog_IBM420_ar_ltr()1250 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
1251 {
1252     // nothing to do
1253 }
1254 
getName() const1255 const char *CharsetRecog_IBM420_ar_ltr::getName() const
1256 {
1257     return  "IBM420_ltr";
1258 }
1259 
match(InputText * textIn,CharsetMatch * results) const1260 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
1261 {
1262     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
1263     results->set(textIn, this, confidence);
1264     return (confidence > 0);
1265 }
1266 #endif
1267 
1268 U_NAMESPACE_END
1269 #endif
1270 
1271