• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  *   Copyright (C) 2005-2013, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  */
7 
8 #include "unicode/utypes.h"
9 
10 #include "cmemory.h"
11 
12 #if !UCONFIG_NO_CONVERSION
13 #include "csrsbcs.h"
14 #include "csmatch.h"
15 
16 #define N_GRAM_SIZE 3
17 #define N_GRAM_MASK 0xFFFFFF
18 #define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
19 
20 U_NAMESPACE_BEGIN
21 
NGramParser(const int32_t * theNgramList,const uint8_t * theCharMap)22 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
23  : ngram(0), byteIndex(0)
24 {
25     ngramList = theNgramList;
26     charMap   = theCharMap;
27 
28     ngramCount = hitCount = 0;
29 }
30 
31 /*
32  * Binary search for value in table, which must have exactly 64 entries.
33  */
34 
search(const int32_t * table,int32_t value)35 int32_t NGramParser::search(const int32_t *table, int32_t value)
36 {
37     int32_t index = 0;
38 
39     if (table[index + 32] <= value) {
40         index += 32;
41     }
42 
43     if (table[index + 16] <= value) {
44         index += 16;
45     }
46 
47     if (table[index + 8] <= value) {
48         index += 8;
49     }
50 
51     if (table[index + 4] <= value) {
52         index += 4;
53     }
54 
55     if (table[index + 2] <= value) {
56         index += 2;
57     }
58 
59     if (table[index + 1] <= value) {
60         index += 1;
61     }
62 
63     if (table[index] > value) {
64         index -= 1;
65     }
66 
67     if (index < 0 || table[index] != value) {
68         return -1;
69     }
70 
71     return index;
72 }
73 
lookup(int32_t thisNgram)74 void NGramParser::lookup(int32_t thisNgram)
75 {
76     ngramCount += 1;
77 
78     if (search(ngramList, thisNgram) >= 0) {
79         hitCount += 1;
80     }
81 
82 }
83 
addByte(int32_t b)84 void NGramParser::addByte(int32_t b)
85 {
86     ngram = ((ngram << 8) + b) & N_GRAM_MASK;
87     lookup(ngram);
88 }
89 
nextByte(InputText * det)90 int32_t NGramParser::nextByte(InputText *det)
91 {
92     if (byteIndex >= det->fInputLen) {
93         return -1;
94     }
95 
96     return det->fInputBytes[byteIndex++];
97 }
98 
parseCharacters(InputText * det)99 void NGramParser::parseCharacters(InputText *det)
100 {
101     int32_t b;
102     bool ignoreSpace = FALSE;
103 
104     while ((b = nextByte(det)) >= 0) {
105         uint8_t mb = charMap[b];
106 
107         // TODO: 0x20 might not be a space in all character sets...
108         if (mb != 0) {
109             if (!(mb == 0x20 && ignoreSpace)) {
110                 addByte(mb);
111             }
112 
113             ignoreSpace = (mb == 0x20);
114         }
115     }
116 }
117 
parse(InputText * det)118 int32_t NGramParser::parse(InputText *det)
119 {
120     parseCharacters(det);
121 
122     // TODO: Is this OK? The buffer could have ended in the middle of a word...
123     addByte(0x20);
124 
125     double rawPercent = (double) hitCount / (double) ngramCount;
126 
127     //            if (rawPercent <= 2.0) {
128     //                return 0;
129     //            }
130 
131     // TODO - This is a bit of a hack to take care of a case
132     // were we were getting a confidence of 135...
133     if (rawPercent > 0.33) {
134         return 98;
135     }
136 
137     return (int32_t) (rawPercent * 300.0);
138 }
139 
140 static const uint8_t unshapeMap_IBM420[] = {
141 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
142 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
143 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
144 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
145 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
146 /* 4- */    0x40, 0x40, 0x42, 0x42, 0x44, 0x45, 0x46, 0x47, 0x47, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
147 /* 5- */    0x50, 0x49, 0x52, 0x53, 0x54, 0x55, 0x56, 0x56, 0x58, 0x58, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
148 /* 6- */    0x60, 0x61, 0x62, 0x63, 0x63, 0x65, 0x65, 0x67, 0x67, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
149 /* 7- */    0x69, 0x71, 0x71, 0x73, 0x74, 0x75, 0x76, 0x77, 0x77, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
150 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x80, 0x8B, 0x8B, 0x8D, 0x8D, 0x8F,
151 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9A, 0x9A, 0x9A, 0x9E, 0x9E,
152 /* A- */    0x9E, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x9E, 0xAB, 0xAB, 0xAD, 0xAD, 0xAF,
153 /* B- */    0xAF, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xB1, 0xBB, 0xBB, 0xBD, 0xBD, 0xBF,
154 /* C- */    0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xBF, 0xCC, 0xBF, 0xCE, 0xCF,
155 /* D- */    0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDA, 0xDC, 0xDC, 0xDC, 0xDF,
156 /* E- */    0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
157 /* F- */    0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
158 };
159 
NGramParser_IBM420(const int32_t * theNgramList,const uint8_t * theCharMap)160 NGramParser_IBM420::NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap):NGramParser(theNgramList, theCharMap)
161 {
162 	alef = 0x00;
163 }
164 
165 
isLamAlef(int32_t b)166 int32_t NGramParser_IBM420::isLamAlef(int32_t b)
167 {
168 	if(b == 0xB2 || b == 0xB3){
169          	return 0x47;
170         }else if(b == 0xB4 || b == 0xB5){
171          	return 0x49;
172         }else if(b == 0xB8 || b == 0xB9){
173          	return 0x56;
174         }else
175          	return 0x00;
176 }
177 
178 /*
179 * Arabic shaping needs to be done manually. Cannot call ArabicShaping class
180 * because CharsetDetector is dealing with bytes not Unicode code points. We could
181 * convert the bytes to Unicode code points but that would leave us dependent
182 * on CharsetICU which we try to avoid. IBM420 converter amongst different versions
183 * of JDK can produce different results and therefore is also avoided.
184 */
nextByte(InputText * det)185 int32_t NGramParser_IBM420::nextByte(InputText *det)
186 {
187 
188     if (byteIndex >= det->fInputLen || det->fInputBytes[byteIndex] == 0) {
189         return -1;
190     }
191     int next;
192 
193     alef = isLamAlef(det->fInputBytes[byteIndex]);
194     if(alef != 0x00)
195         next = 0xB1 & 0xFF;
196     else
197         next = unshapeMap_IBM420[det->fInputBytes[byteIndex]& 0xFF] & 0xFF;
198 
199     byteIndex++;
200 
201     return next;
202 }
203 
parseCharacters(InputText * det)204 void NGramParser_IBM420::parseCharacters(InputText *det)
205 {
206 	int32_t b;
207     bool ignoreSpace = FALSE;
208 
209     while ((b = nextByte(det)) >= 0) {
210         uint8_t mb = charMap[b];
211 
212         // TODO: 0x20 might not be a space in all character sets...
213         if (mb != 0) {
214             if (!(mb == 0x20 && ignoreSpace)) {
215                 addByte(mb);
216             }
217             ignoreSpace = (mb == 0x20);
218         }
219 
220 		if(alef != 0x00){
221             mb = charMap[alef & 0xFF];
222 
223             // TODO: 0x20 might not be a space in all character sets...
224             if (mb != 0) {
225                 if (!(mb == 0x20 && ignoreSpace)) {
226                     addByte(mb);
227                 }
228 
229                 ignoreSpace = (mb == 0x20);
230             }
231 
232         }
233     }
234 }
235 
CharsetRecog_sbcs()236 CharsetRecog_sbcs::CharsetRecog_sbcs()
237 {
238     // nothing else to do
239 }
240 
~CharsetRecog_sbcs()241 CharsetRecog_sbcs::~CharsetRecog_sbcs()
242 {
243     // nothing to do
244 }
245 
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[]) const246 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
247 {
248     NGramParser parser(ngrams, byteMap);
249     int32_t result;
250 
251     result = parser.parse(det);
252 
253     return result;
254 }
255 
256 static const uint8_t charMap_8859_1[] = {
257     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
258     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
259     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
260     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
261     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
262     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
263     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
264     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
266     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
267     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
268     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
269     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
270     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
271     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
272     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
273     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
274     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
275     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
276     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
277     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
278     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
279     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
280     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
281     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
282     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
283     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
284     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
285     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
286     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
287     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
288     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
289 };
290 
291 static const uint8_t charMap_8859_2[] = {
292     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
293     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
294     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
295     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
296     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
297     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
298     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
299     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
301     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
302     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
303     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
304     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
305     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
306     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
307     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
308     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
309     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
310     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
311     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
312     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
313     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
314     0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
315     0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
316     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
317     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
318     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
319     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
320     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
321     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
322     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
323     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
324 };
325 
326 static const uint8_t charMap_8859_5[] = {
327     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
328     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
329     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
330     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
331     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
332     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
333     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
334     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
336     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
337     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
338     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
339     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
340     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
341     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
342     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
343     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
344     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
345     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
346     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
347     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
348     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
349     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
350     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
351     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
352     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
353     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
354     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
355     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
356     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
357     0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
358     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
359 };
360 
361 static const uint8_t charMap_8859_6[] = {
362     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
363     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
364     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
365     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
366     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
367     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
368     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
369     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
371     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
372     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
373     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
374     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
375     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
376     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
377     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
378     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
379     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
380     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
381     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
382     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
383     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
384     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
385     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386     0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
387     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
388     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
389     0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
390     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
391     0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
392     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
393     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
394 };
395 
396 static const uint8_t charMap_8859_7[] = {
397     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
398     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
399     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
400     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
401     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
402     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
403     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
404     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
405     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
406     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
407     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
408     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
409     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
410     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
411     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
412     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
413     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
414     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
415     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
416     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
417     0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
418     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
419     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
420     0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
421     0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
422     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
423     0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
424     0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
425     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
426     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
427     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
428     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
429 };
430 
431 static const uint8_t charMap_8859_8[] = {
432     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
433     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
434     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
435     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
436     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
437     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
438     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
439     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
440     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
441     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
442     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
443     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
444     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
445     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
446     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
447     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
448     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
449     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
450     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
451     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
452     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
453     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
454     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
455     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
456     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
459     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
460     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
461     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
462     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
463     0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
464 };
465 
466 static const uint8_t charMap_8859_9[] = {
467     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
468     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
469     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
470     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
471     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
472     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
473     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
474     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
475     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
476     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
477     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
478     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
479     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
480     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
481     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
482     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
483     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
484     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
485     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
486     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
487     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
488     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
489     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
490     0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
491     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
492     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
493     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
494     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
495     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
496     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
497     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
498     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
499 };
500 
501 static const int32_t ngrams_windows_1251[] = {
502     0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
503     0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
504     0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
505     0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
506 };
507 
508 static const uint8_t charMap_windows_1251[] = {
509     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
510     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
511     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
512     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
513     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
514     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
515     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
516     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
517     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
518     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
519     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
520     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
521     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
522     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
523     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
524     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
525     0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
526     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
527     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
528     0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
529     0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
530     0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
531     0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
532     0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
533     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
534     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
535     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
536     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
537     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
538     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
539     0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
540     0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
541 };
542 
543 static const int32_t ngrams_windows_1256[] = {
544     0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
545     0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
546     0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
547     0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
548 };
549 
550 static const uint8_t charMap_windows_1256[] = {
551     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
552     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
553     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
554     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
555     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
556     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
557     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
558     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
559     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
560     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
561     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
562     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
563     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
564     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
565     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
566     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
567     0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
568     0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
569     0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
570     0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
571     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
572     0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
573     0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
574     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
575     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
576     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
577     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
578     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
579     0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
580     0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
581     0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
582     0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
583 };
584 
585 static const int32_t ngrams_KOI8_R[] = {
586     0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
587     0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
588     0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
589     0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
590 };
591 
592 static const uint8_t charMap_KOI8_R[] = {
593     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
594     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
595     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
596     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
597     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
598     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
599     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
600     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
601     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
602     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
603     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
604     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
605     0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
606     0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
607     0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
608     0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
609     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
610     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
611     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
612     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
613     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
614     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
615     0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
616     0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
617     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
618     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
619     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
620     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
621     0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
622     0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
623     0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
624     0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
625 };
626 
627 static const int32_t ngrams_IBM424_he_rtl[] = {
628     0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641,
629     0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045,
630     0x514540, 0x514671, 0x515155, 0x515540, 0x515740, 0x516840, 0x517140, 0x544041, 0x544045, 0x544140, 0x544540, 0x554041, 0x554042, 0x554045, 0x554054, 0x554056,
631     0x554069, 0x564540, 0x574045, 0x584540, 0x585140, 0x585155, 0x625440, 0x684045, 0x685155, 0x695440, 0x714041, 0x714042, 0x714045, 0x714054, 0x714056, 0x714069,
632 };
633 
634 static const int32_t ngrams_IBM424_he_ltr[] = {
635     0x404146, 0x404154, 0x404551, 0x404554, 0x404556, 0x404558, 0x405158, 0x405462, 0x405469, 0x405546, 0x405551, 0x405746, 0x405751, 0x406846, 0x406851, 0x407141,
636     0x407146, 0x407151, 0x414045, 0x414054, 0x414055, 0x414071, 0x414540, 0x414645, 0x415440, 0x415640, 0x424045, 0x424055, 0x424071, 0x454045, 0x454051, 0x454054,
637     0x454055, 0x454057, 0x454068, 0x454071, 0x455440, 0x464140, 0x464540, 0x484140, 0x514140, 0x514240, 0x514540, 0x544045, 0x544055, 0x544071, 0x546240, 0x546940,
638     0x555151, 0x555158, 0x555168, 0x564045, 0x564055, 0x564071, 0x564240, 0x564540, 0x624540, 0x694045, 0x694055, 0x694071, 0x694540, 0x714140, 0x714540, 0x714651,
639 };
640 
641 static const uint8_t charMap_IBM424_he[] = {
642 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
643 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
644 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
645 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
646 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
647 /* 4- */    0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
648 /* 5- */    0x40, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
649 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
650 /* 7- */    0x40, 0x71, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x00, 0x40, 0x40,
651 /* 8- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
652 /* 9- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
653 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
654 /* B- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
655 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
656 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
657 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
658 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
659 };
660 
661 static const int32_t ngrams_IBM420_ar_rtl[] = {
662     0x4056B1, 0x4056BD, 0x405856, 0x409AB1, 0x40ABDC, 0x40B1B1, 0x40BBBD, 0x40CF56, 0x564056, 0x564640, 0x566340, 0x567540, 0x56B140, 0x56B149, 0x56B156, 0x56B158,
663     0x56B163, 0x56B167, 0x56B169, 0x56B173, 0x56B178, 0x56B19A, 0x56B1AD, 0x56B1BB, 0x56B1CF, 0x56B1DC, 0x56BB40, 0x56BD40, 0x56BD63, 0x584056, 0x624056, 0x6240AB,
664     0x6240B1, 0x6240BB, 0x6240CF, 0x634056, 0x734056, 0x736240, 0x754056, 0x756240, 0x784056, 0x9A4056, 0x9AB1DA, 0xABDC40, 0xB14056, 0xB16240, 0xB1DA40, 0xB1DC40,
665     0xBB4056, 0xBB5640, 0xBB6240, 0xBBBD40, 0xBD4056, 0xBF4056, 0xBF5640, 0xCF56B1, 0xCFBD40, 0xDA4056, 0xDC4056, 0xDC40BB, 0xDC40CF, 0xDC6240, 0xDC7540, 0xDCBD40,
666 };
667 
668 static const int32_t ngrams_IBM420_ar_ltr[] = {
669     0x404656, 0x4056BB, 0x4056BF, 0x406273, 0x406275, 0x4062B1, 0x4062BB, 0x4062DC, 0x406356, 0x407556, 0x4075DC, 0x40B156, 0x40BB56, 0x40BD56, 0x40BDBB, 0x40BDCF,
670     0x40BDDC, 0x40DAB1, 0x40DCAB, 0x40DCB1, 0x49B156, 0x564056, 0x564058, 0x564062, 0x564063, 0x564073, 0x564075, 0x564078, 0x56409A, 0x5640B1, 0x5640BB, 0x5640BD,
671     0x5640BF, 0x5640DA, 0x5640DC, 0x565840, 0x56B156, 0x56CF40, 0x58B156, 0x63B156, 0x63BD56, 0x67B156, 0x69B156, 0x73B156, 0x78B156, 0x9AB156, 0xAB4062, 0xADB156,
672     0xB14062, 0xB15640, 0xB156CF, 0xB19A40, 0xB1B140, 0xBB4062, 0xBB40DC, 0xBBB156, 0xBD5640, 0xBDBB40, 0xCF4062, 0xCF40DC, 0xCFB156, 0xDAB19A, 0xDCAB40, 0xDCB156
673 };
674 
675 static const uint8_t charMap_IBM420_ar[]= {
676 /*           -0    -1    -2    -3    -4    -5    -6    -7    -8    -9    -A    -B    -C    -D    -E    -F   */
677 /* 0- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
678 /* 1- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
679 /* 2- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
680 /* 3- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
681 /* 4- */    0x40, 0x40, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
682 /* 5- */    0x40, 0x51, 0x52, 0x40, 0x40, 0x55, 0x56, 0x57, 0x58, 0x59, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
683 /* 6- */    0x40, 0x40, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
684 /* 7- */    0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
685 /* 8- */    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
686 /* 9- */    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
687 /* A- */    0xA0, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
688 /* B- */    0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0x40, 0x40, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
689 /* C- */    0x40, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x40, 0xCB, 0x40, 0xCD, 0x40, 0xCF,
690 /* D- */    0x40, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
691 /* E- */    0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF,
692 /* F- */    0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40,
693 };
694 
695 //ISO-8859-1,2,5,6,7,8,9 Ngrams
696 
697 struct NGramsPlusLang {
698     const int32_t ngrams[64];
699     const char *  lang;
700 };
701 
702 static const NGramsPlusLang ngrams_8859_1[] =  {
703   {
704     {
705     0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
706     0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
707     0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
708     0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
709     },
710     "en"
711   },
712   {
713     {
714     0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
715     0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
716     0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
717     0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
718     },
719     "da"
720   },
721   {
722     {
723     0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
724     0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
725     0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
726     0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
727     },
728     "de"
729   },
730   {
731     {
732     0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
733     0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
734     0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
735     0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
736     },
737     "es"
738   },
739   {
740     {
741     0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
742     0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
743     0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
744     0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
745     },
746     "fr"
747   },
748   {
749     {
750     0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
751     0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
752     0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
753     0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
754     },
755     "it"
756   },
757   {
758     {
759     0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
760     0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
761     0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
762     0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
763     },
764     "nl"
765   },
766   {
767     {
768     0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
769     0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
770     0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
771     0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
772     },
773     "no"
774   },
775   {
776     {
777     0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
778     0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
779     0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
780     0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
781     },
782     "pt"
783   },
784   {
785     {
786     0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
787     0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
788     0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
789     0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
790     },
791     "sv"
792   }
793 };
794 
795 
796 static const NGramsPlusLang ngrams_8859_2[] =  {
797   {
798     {
799     0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
800     0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
801     0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
802     0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
803     },
804     "cs"
805   },
806   {
807     {
808     0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
809     0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
810     0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
811     0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
812     },
813     "hu"
814   },
815   {
816     {
817     0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
818     0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
819     0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
820     0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
821     },
822     "pl"
823   },
824   {
825     {
826     0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
827     0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
828     0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
829     0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
830     },
831     "ro"
832   }
833 };
834 
835 static const int32_t ngrams_8859_5_ru[] = {
836     0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
837     0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
838     0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
839     0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
840 };
841 
842 static const int32_t ngrams_8859_6_ar[] = {
843     0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
844     0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
845     0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
846     0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
847 };
848 
849 static const int32_t ngrams_8859_7_el[] = {
850     0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
851     0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
852     0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
853     0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
854 };
855 
856 static const int32_t ngrams_8859_8_I_he[] = {
857     0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
858     0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
859     0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
860     0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
861 };
862 
863 static const int32_t ngrams_8859_8_he[] = {
864     0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
865     0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
866     0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
867     0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
868 };
869 
870 static const int32_t ngrams_8859_9_tr[] = {
871     0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
872     0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
873     0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
874     0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
875 };
876 
~CharsetRecog_8859_1()877 CharsetRecog_8859_1::~CharsetRecog_8859_1()
878 {
879     // nothing to do
880 }
881 
match(InputText * textIn,CharsetMatch * results) const882 UBool CharsetRecog_8859_1::match(InputText *textIn, CharsetMatch *results) const {
883     const char *name = textIn->fC1Bytes? "windows-1252" : "ISO-8859-1";
884     uint32_t i;
885     int32_t bestConfidenceSoFar = -1;
886     for (i=0; i < ARRAY_SIZE(ngrams_8859_1) ; i++) {
887         const int32_t *ngrams = ngrams_8859_1[i].ngrams;
888         const char    *lang   = ngrams_8859_1[i].lang;
889         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_1);
890         if (confidence > bestConfidenceSoFar) {
891             results->set(textIn, this, confidence, name, lang);
892             bestConfidenceSoFar = confidence;
893         }
894     }
895     return (bestConfidenceSoFar > 0);
896 }
897 
getName() const898 const char *CharsetRecog_8859_1::getName() const
899 {
900     return "ISO-8859-1";
901 }
902 
903 
~CharsetRecog_8859_2()904 CharsetRecog_8859_2::~CharsetRecog_8859_2()
905 {
906     // nothing to do
907 }
908 
match(InputText * textIn,CharsetMatch * results) const909 UBool CharsetRecog_8859_2::match(InputText *textIn, CharsetMatch *results) const {
910     const char *name = textIn->fC1Bytes? "windows-1250" : "ISO-8859-2";
911     uint32_t i;
912     int32_t bestConfidenceSoFar = -1;
913     for (i=0; i < ARRAY_SIZE(ngrams_8859_2) ; i++) {
914         const int32_t *ngrams = ngrams_8859_2[i].ngrams;
915         const char    *lang   = ngrams_8859_2[i].lang;
916         int32_t confidence = match_sbcs(textIn, ngrams, charMap_8859_2);
917         if (confidence > bestConfidenceSoFar) {
918             results->set(textIn, this, confidence, name, lang);
919             bestConfidenceSoFar = confidence;
920         }
921     }
922     return (bestConfidenceSoFar > 0);
923 }
924 
getName() const925 const char *CharsetRecog_8859_2::getName() const
926 {
927     return "ISO-8859-2";
928 }
929 
930 
~CharsetRecog_8859_5()931 CharsetRecog_8859_5::~CharsetRecog_8859_5()
932 {
933     // nothing to do
934 }
935 
getName() const936 const char *CharsetRecog_8859_5::getName() const
937 {
938     return "ISO-8859-5";
939 }
940 
~CharsetRecog_8859_5_ru()941 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
942 {
943     // nothing to do
944 }
945 
getLanguage() const946 const char *CharsetRecog_8859_5_ru::getLanguage() const
947 {
948     return "ru";
949 }
950 
match(InputText * textIn,CharsetMatch * results) const951 UBool CharsetRecog_8859_5_ru::match(InputText *textIn, CharsetMatch *results) const
952 {
953     int32_t confidence = match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
954     results->set(textIn, this, confidence);
955     return (confidence > 0);
956 }
957 
~CharsetRecog_8859_6()958 CharsetRecog_8859_6::~CharsetRecog_8859_6()
959 {
960     // nothing to do
961 }
962 
getName() const963 const char *CharsetRecog_8859_6::getName() const
964 {
965     return "ISO-8859-6";
966 }
967 
~CharsetRecog_8859_6_ar()968 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
969 {
970     // nothing to do
971 }
972 
getLanguage() const973 const char *CharsetRecog_8859_6_ar::getLanguage() const
974 {
975     return "ar";
976 }
977 
match(InputText * textIn,CharsetMatch * results) const978 UBool CharsetRecog_8859_6_ar::match(InputText *textIn, CharsetMatch *results) const
979 {
980     int32_t confidence = match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
981     results->set(textIn, this, confidence);
982     return (confidence > 0);
983 }
984 
~CharsetRecog_8859_7()985 CharsetRecog_8859_7::~CharsetRecog_8859_7()
986 {
987     // nothing to do
988 }
989 
getName() const990 const char *CharsetRecog_8859_7::getName() const
991 {
992     return "ISO-8859-7";
993 }
994 
~CharsetRecog_8859_7_el()995 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
996 {
997     // nothing to do
998 }
999 
getLanguage() const1000 const char *CharsetRecog_8859_7_el::getLanguage() const
1001 {
1002     return "el";
1003 }
1004 
match(InputText * textIn,CharsetMatch * results) const1005 UBool CharsetRecog_8859_7_el::match(InputText *textIn, CharsetMatch *results) const
1006 {
1007     const char *name = textIn->fC1Bytes? "windows-1253" : "ISO-8859-7";
1008     int32_t confidence = match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
1009     results->set(textIn, this, confidence, name, "el");
1010     return (confidence > 0);
1011 }
1012 
~CharsetRecog_8859_8()1013 CharsetRecog_8859_8::~CharsetRecog_8859_8()
1014 {
1015     // nothing to do
1016 }
1017 
getName() const1018 const char *CharsetRecog_8859_8::getName() const
1019 {
1020     return "ISO-8859-8";
1021 }
1022 
~CharsetRecog_8859_8_I_he()1023 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
1024 {
1025     // nothing to do
1026 }
1027 
getName() const1028 const char *CharsetRecog_8859_8_I_he::getName() const
1029 {
1030     return "ISO-8859-8-I";
1031 }
1032 
getLanguage() const1033 const char *CharsetRecog_8859_8_I_he::getLanguage() const
1034 {
1035     return "he";
1036 }
1037 
match(InputText * textIn,CharsetMatch * results) const1038 UBool CharsetRecog_8859_8_I_he::match(InputText *textIn, CharsetMatch *results) const
1039 {
1040     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8-I";
1041     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
1042     results->set(textIn, this, confidence, name, "he");
1043     return (confidence > 0);
1044 }
1045 
~CharsetRecog_8859_8_he()1046 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
1047 {
1048     // od ot gnihton
1049 }
1050 
getLanguage() const1051 const char *CharsetRecog_8859_8_he::getLanguage() const
1052 {
1053     return "he";
1054 }
1055 
match(InputText * textIn,CharsetMatch * results) const1056 UBool CharsetRecog_8859_8_he::match(InputText *textIn, CharsetMatch *results) const
1057 {
1058     const char *name = textIn->fC1Bytes? "windows-1255" : "ISO-8859-8";
1059     int32_t confidence = match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1060     results->set(textIn, this, confidence, name, "he");
1061     return (confidence > 0);
1062 }
1063 
~CharsetRecog_8859_9()1064 CharsetRecog_8859_9::~CharsetRecog_8859_9()
1065 {
1066     // nothing to do
1067 }
1068 
getName() const1069 const char *CharsetRecog_8859_9::getName() const
1070 {
1071     return "ISO-8859-9";
1072 }
1073 
~CharsetRecog_8859_9_tr()1074 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1075 {
1076     // nothing to do
1077 }
1078 
getLanguage() const1079 const char *CharsetRecog_8859_9_tr::getLanguage() const
1080 {
1081     return "tr";
1082 }
1083 
match(InputText * textIn,CharsetMatch * results) const1084 UBool CharsetRecog_8859_9_tr::match(InputText *textIn, CharsetMatch *results) const
1085 {
1086     const char *name = textIn->fC1Bytes? "windows-1254" : "ISO-8859-9";
1087     int32_t confidence = match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1088     results->set(textIn, this, confidence, name, "tr");
1089     return (confidence > 0);
1090 }
1091 
~CharsetRecog_windows_1256()1092 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1093 {
1094     // nothing to do
1095 }
1096 
getName() const1097 const char *CharsetRecog_windows_1256::getName() const
1098 {
1099     return  "windows-1256";
1100 }
1101 
getLanguage() const1102 const char *CharsetRecog_windows_1256::getLanguage() const
1103 {
1104     return "ar";
1105 }
1106 
match(InputText * textIn,CharsetMatch * results) const1107 UBool CharsetRecog_windows_1256::match(InputText *textIn, CharsetMatch *results) const
1108 {
1109     int32_t confidence = match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1110     results->set(textIn, this, confidence);
1111     return (confidence > 0);
1112 }
1113 
~CharsetRecog_windows_1251()1114 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1115 {
1116     // nothing to do
1117 }
1118 
getName() const1119 const char *CharsetRecog_windows_1251::getName() const
1120 {
1121     return  "windows-1251";
1122 }
1123 
getLanguage() const1124 const char *CharsetRecog_windows_1251::getLanguage() const
1125 {
1126     return "ru";
1127 }
1128 
match(InputText * textIn,CharsetMatch * results) const1129 UBool CharsetRecog_windows_1251::match(InputText *textIn, CharsetMatch *results) const
1130 {
1131     int32_t confidence = match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1132     results->set(textIn, this, confidence);
1133     return (confidence > 0);
1134 }
1135 
~CharsetRecog_KOI8_R()1136 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1137 {
1138     // nothing to do
1139 }
1140 
getName() const1141 const char *CharsetRecog_KOI8_R::getName() const
1142 {
1143     return  "KOI8-R";
1144 }
1145 
getLanguage() const1146 const char *CharsetRecog_KOI8_R::getLanguage() const
1147 {
1148     return "ru";
1149 }
1150 
match(InputText * textIn,CharsetMatch * results) const1151 UBool CharsetRecog_KOI8_R::match(InputText *textIn, CharsetMatch *results) const
1152 {
1153     int32_t confidence = match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1154     results->set(textIn, this, confidence);
1155     return (confidence > 0);
1156 }
1157 
~CharsetRecog_IBM424_he()1158 CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he()
1159 {
1160     // nothing to do
1161 }
1162 
getLanguage() const1163 const char *CharsetRecog_IBM424_he::getLanguage() const
1164 {
1165     return "he";
1166 }
1167 
~CharsetRecog_IBM424_he_rtl()1168 CharsetRecog_IBM424_he_rtl::~CharsetRecog_IBM424_he_rtl()
1169 {
1170     // nothing to do
1171 }
1172 
getName() const1173 const char *CharsetRecog_IBM424_he_rtl::getName() const
1174 {
1175     return  "IBM424_rtl";
1176 }
1177 
match(InputText * textIn,CharsetMatch * results) const1178 UBool CharsetRecog_IBM424_he_rtl::match(InputText *textIn, CharsetMatch *results) const
1179 {
1180     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_rtl, charMap_IBM424_he);
1181     results->set(textIn, this, confidence);
1182     return (confidence > 0);
1183 }
1184 
~CharsetRecog_IBM424_he_ltr()1185 CharsetRecog_IBM424_he_ltr::~CharsetRecog_IBM424_he_ltr()
1186 {
1187     // nothing to do
1188 }
1189 
getName() const1190 const char *CharsetRecog_IBM424_he_ltr::getName() const
1191 {
1192     return  "IBM424_ltr";
1193 }
1194 
match(InputText * textIn,CharsetMatch * results) const1195 UBool CharsetRecog_IBM424_he_ltr::match(InputText *textIn, CharsetMatch *results) const
1196 {
1197     int32_t confidence = match_sbcs(textIn, ngrams_IBM424_he_ltr, charMap_IBM424_he);
1198     results->set(textIn, this, confidence);
1199     return (confidence > 0);
1200 }
1201 
~CharsetRecog_IBM420_ar()1202 CharsetRecog_IBM420_ar::~CharsetRecog_IBM420_ar()
1203 {
1204     // nothing to do
1205 }
1206 
getLanguage() const1207 const char *CharsetRecog_IBM420_ar::getLanguage() const
1208 {
1209     return "ar";
1210 }
1211 
1212 
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[]) const1213 int32_t CharsetRecog_IBM420_ar::match_sbcs(InputText *det, const int32_t ngrams[],  const uint8_t byteMap[]) const
1214 {
1215     NGramParser_IBM420 parser(ngrams, byteMap);
1216     int32_t result;
1217 
1218     result = parser.parse(det);
1219 
1220     return result;
1221 }
1222 
~CharsetRecog_IBM420_ar_rtl()1223 CharsetRecog_IBM420_ar_rtl::~CharsetRecog_IBM420_ar_rtl()
1224 {
1225     // nothing to do
1226 }
1227 
getName() const1228 const char *CharsetRecog_IBM420_ar_rtl::getName() const
1229 {
1230     return  "IBM420_rtl";
1231 }
1232 
match(InputText * textIn,CharsetMatch * results) const1233 UBool CharsetRecog_IBM420_ar_rtl::match(InputText *textIn, CharsetMatch *results) const
1234 {
1235     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_rtl, charMap_IBM420_ar);
1236     results->set(textIn, this, confidence);
1237     return (confidence > 0);
1238 }
1239 
~CharsetRecog_IBM420_ar_ltr()1240 CharsetRecog_IBM420_ar_ltr::~CharsetRecog_IBM420_ar_ltr()
1241 {
1242     // nothing to do
1243 }
1244 
getName() const1245 const char *CharsetRecog_IBM420_ar_ltr::getName() const
1246 {
1247     return  "IBM420_ltr";
1248 }
1249 
match(InputText * textIn,CharsetMatch * results) const1250 UBool CharsetRecog_IBM420_ar_ltr::match(InputText *textIn, CharsetMatch *results) const
1251 {
1252     int32_t confidence = match_sbcs(textIn, ngrams_IBM420_ar_ltr, charMap_IBM420_ar);
1253     results->set(textIn, this, confidence);
1254     return (confidence > 0);
1255 }
1256 
1257 U_NAMESPACE_END
1258 #endif
1259 
1260