1 /*
2 **********************************************************************
3 * Copyright (C) 2005-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_CONVERSION
11 #include "csrsbcs.h"
12
13 #include <stdio.h>
14
15 #define N_GRAM_SIZE 3
16 #define N_GRAM_MASK 0xFFFFFF
17
18 U_NAMESPACE_BEGIN
19
NGramParser(const int32_t * theNgramList,const uint8_t * theCharMap)20 NGramParser::NGramParser(const int32_t *theNgramList, const uint8_t *theCharMap)
21 :byteIndex(0), ngram(0)
22 {
23 ngramList = theNgramList;
24 charMap = theCharMap;
25
26 ngramCount = hitCount = 0;
27 }
28
29 /*
30 * Binary search for value in table, which must have exactly 64 entries.
31 */
32
search(const int32_t * table,int32_t value)33 int32_t NGramParser::search(const int32_t *table, int32_t value)
34 {
35 int32_t index = 0;
36
37 if (table[index + 32] <= value) {
38 index += 32;
39 }
40
41 if (table[index + 16] <= value) {
42 index += 16;
43 }
44
45 if (table[index + 8] <= value) {
46 index += 8;
47 }
48
49 if (table[index + 4] <= value) {
50 index += 4;
51 }
52
53 if (table[index + 2] <= value) {
54 index += 2;
55 }
56
57 if (table[index + 1] <= value) {
58 index += 1;
59 }
60
61 if (table[index] > value) {
62 index -= 1;
63 }
64
65 if (index < 0 || table[index] != value) {
66 return -1;
67 }
68
69 return index;
70 }
71
lookup(int32_t thisNgram)72 void NGramParser::lookup(int32_t thisNgram)
73 {
74 ngramCount += 1;
75
76 if (search(ngramList, thisNgram) >= 0) {
77 hitCount += 1;
78 }
79
80 }
81
addByte(int32_t b)82 void NGramParser::addByte(int32_t b)
83 {
84 ngram = ((ngram << 8) + b) & N_GRAM_MASK;
85 lookup(ngram);
86 }
87
nextByte(InputText * det)88 int32_t NGramParser::nextByte(InputText *det)
89 {
90 if (byteIndex >= det->fInputLen) {
91 return -1;
92 }
93
94 return det->fInputBytes[byteIndex++];
95 }
96
parse(InputText * det)97 int32_t NGramParser::parse(InputText *det)
98 {
99 int32_t b;
100 bool ignoreSpace = FALSE;
101
102 while ((b = nextByte(det)) >= 0) {
103 uint8_t mb = charMap[b];
104
105 // TODO: 0x20 might not be a space in all character sets...
106 if (mb != 0) {
107 if (!(mb == 0x20 && ignoreSpace)) {
108 addByte(mb);
109 }
110
111 ignoreSpace = (mb == 0x20);
112 }
113 }
114
115 // TODO: Is this OK? The buffer could have ended in the middle of a word...
116 addByte(0x20);
117
118 double rawPercent = (double) hitCount / (double) ngramCount;
119
120 // if (rawPercent <= 2.0) {
121 // return 0;
122 // }
123
124 // TODO - This is a bit of a hack to take care of a case
125 // were we were getting a confidence of 135...
126 if (rawPercent > 0.33) {
127 return 98;
128 }
129
130 return (int32_t) (rawPercent * 300.0);
131 }
132
CharsetRecog_sbcs()133 CharsetRecog_sbcs::CharsetRecog_sbcs()
134 : haveC1Bytes(FALSE)
135 {
136 // nothing else to do
137 }
138
~CharsetRecog_sbcs()139 CharsetRecog_sbcs::~CharsetRecog_sbcs()
140 {
141 // nothing to do
142 }
143
match_sbcs(InputText * det,const int32_t ngrams[],const uint8_t byteMap[])144 int32_t CharsetRecog_sbcs::match_sbcs(InputText *det, const int32_t ngrams[], const uint8_t byteMap[])
145 {
146 NGramParser *parser = new NGramParser(ngrams, byteMap);
147 int32_t result;
148
149 haveC1Bytes = det->fC1Bytes;
150 result = parser->parse(det);
151 delete parser;
152
153 return result;
154 }
155
156 static const uint8_t charMap_8859_1[] = {
157 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
158 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
159 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
160 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
161 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
162 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
163 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
164 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
165 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
166 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
167 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
168 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
169 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
170 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
171 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
172 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
173 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
174 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
175 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
176 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
177 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
178 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
179 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
180 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
181 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
182 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
183 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
184 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
185 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
186 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
187 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
188 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
189 };
190
191 static const uint8_t charMap_8859_2[] = {
192 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
193 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
194 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
195 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
196 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
197 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
198 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
199 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
200 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
201 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
202 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
203 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
204 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
205 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
206 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
207 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
208 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
209 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
210 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
211 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
212 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0x20,
213 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
214 0x20, 0xB1, 0x20, 0xB3, 0x20, 0xB5, 0xB6, 0xB7,
215 0x20, 0xB9, 0xBA, 0xBB, 0xBC, 0x20, 0xBE, 0xBF,
216 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
217 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
218 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
219 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xDF,
220 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
221 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
222 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
223 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
224 };
225
226 static const uint8_t charMap_8859_5[] = {
227 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
228 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
229 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
230 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
231 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
232 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
233 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
234 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
235 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
236 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
237 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
238 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
239 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
240 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
241 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
242 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
243 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
244 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
245 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
246 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
247 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
248 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
249 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
250 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
251 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
252 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
253 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
254 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
255 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
256 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
257 0x20, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
258 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x20, 0xFE, 0xFF,
259 };
260
261 static const uint8_t charMap_8859_6[] = {
262 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
263 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
264 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
265 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
266 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
267 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
268 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
269 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
270 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
271 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
272 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
273 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
274 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
275 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
276 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
277 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
278 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
279 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
280 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
281 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
282 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
283 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
284 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
285 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
286 0x20, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
287 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
288 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
289 0xD8, 0xD9, 0xDA, 0x20, 0x20, 0x20, 0x20, 0x20,
290 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
291 0xE8, 0xE9, 0xEA, 0x20, 0x20, 0x20, 0x20, 0x20,
292 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
293 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
294 };
295
296 static const uint8_t charMap_8859_7[] = {
297 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
298 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
299 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
300 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
301 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
302 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
303 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
304 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
305 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
306 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
307 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
308 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
309 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
310 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
311 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
312 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
313 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
314 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
315 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
316 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
317 0x20, 0xA1, 0xA2, 0x20, 0x20, 0x20, 0x20, 0x20,
318 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
319 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0xDC, 0x20,
320 0xDD, 0xDE, 0xDF, 0x20, 0xFC, 0x20, 0xFD, 0xFE,
321 0xC0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
322 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
323 0xF0, 0xF1, 0x20, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
324 0xF8, 0xF9, 0xFA, 0xFB, 0xDC, 0xDD, 0xDE, 0xDF,
325 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
326 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
327 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
328 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0x20,
329 };
330
331 static const uint8_t charMap_8859_8[] = {
332 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
333 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
334 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
335 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
336 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
337 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
338 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
339 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
340 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
341 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
342 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
343 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
344 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
345 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
346 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
347 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
348 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
349 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
350 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
351 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
352 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
353 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
354 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
355 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
356 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
357 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
358 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
359 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
360 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
361 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
362 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
363 0xF8, 0xF9, 0xFA, 0x20, 0x20, 0x20, 0x20, 0x20,
364 };
365
366 static const uint8_t charMap_8859_9[] = {
367 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
368 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
369 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
370 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
371 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
372 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
373 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
374 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
375 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
376 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
377 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
378 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
379 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
380 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
381 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
382 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
383 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
384 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
385 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
386 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
387 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
388 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
389 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
390 0x20, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0x20,
391 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
392 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
393 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
394 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0x69, 0xFE, 0xDF,
395 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
396 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
397 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0x20,
398 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
399 };
400
401 static const int32_t ngrams_windows_1251[] = {
402 0x20E220, 0x20E2EE, 0x20E4EE, 0x20E7E0, 0x20E820, 0x20EAE0, 0x20EAEE, 0x20EDE0, 0x20EDE5, 0x20EEE1, 0x20EFEE, 0x20EFF0, 0x20F0E0, 0x20F1EE, 0x20F1F2, 0x20F2EE,
403 0x20F7F2, 0x20FDF2, 0xE0EDE8, 0xE0F2FC, 0xE3EE20, 0xE5EBFC, 0xE5EDE8, 0xE5F1F2, 0xE5F220, 0xE820EF, 0xE8E520, 0xE8E820, 0xE8FF20, 0xEBE5ED, 0xEBE820, 0xEBFCED,
404 0xEDE020, 0xEDE520, 0xEDE8E5, 0xEDE8FF, 0xEDEE20, 0xEDEEE2, 0xEE20E2, 0xEE20EF, 0xEE20F1, 0xEEE220, 0xEEE2E0, 0xEEE3EE, 0xEEE920, 0xEEEBFC, 0xEEEC20, 0xEEF1F2,
405 0xEFEEEB, 0xEFF0E5, 0xEFF0E8, 0xEFF0EE, 0xF0E0E2, 0xF0E5E4, 0xF1F2E0, 0xF1F2E2, 0xF1F2E8, 0xF1FF20, 0xF2E5EB, 0xF2EE20, 0xF2EEF0, 0xF2FC20, 0xF7F2EE, 0xFBF520,
406 };
407
408 static const uint8_t charMap_windows_1251[] = {
409 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
410 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
411 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
412 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
413 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
414 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
415 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
416 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
417 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
418 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
419 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
420 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
421 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
422 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
423 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
424 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
425 0x90, 0x83, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
426 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
427 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
428 0x20, 0x20, 0x9A, 0x20, 0x9C, 0x9D, 0x9E, 0x9F,
429 0x20, 0xA2, 0xA2, 0xBC, 0x20, 0xB4, 0x20, 0x20,
430 0xB8, 0x20, 0xBA, 0x20, 0x20, 0x20, 0x20, 0xBF,
431 0x20, 0x20, 0xB3, 0xB3, 0xB4, 0xB5, 0x20, 0x20,
432 0xB8, 0x20, 0xBA, 0x20, 0xBC, 0xBE, 0xBE, 0xBF,
433 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
434 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
435 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
436 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
437 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
438 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
439 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
440 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
441 };
442
443 static const int32_t ngrams_windows_1256[] = {
444 0x20C7E1, 0x20C7E4, 0x20C8C7, 0x20DAE1, 0x20DDED, 0x20E1E1, 0x20E3E4, 0x20E6C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E120, 0xC7E1C3, 0xC7E1C7, 0xC7E1C8,
445 0xC7E1CA, 0xC7E1CC, 0xC7E1CD, 0xC7E1CF, 0xC7E1D3, 0xC7E1DA, 0xC7E1DE, 0xC7E1E3, 0xC7E1E6, 0xC7E1ED, 0xC7E320, 0xC7E420, 0xC7E4CA, 0xC820C7, 0xC920C7, 0xC920DD,
446 0xC920E1, 0xC920E3, 0xC920E6, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xDA20C7, 0xDAE1EC, 0xDDED20, 0xE120C7, 0xE1C920, 0xE1EC20, 0xE1ED20,
447 0xE320C7, 0xE3C720, 0xE3C920, 0xE3E420, 0xE420C7, 0xE520C7, 0xE5C720, 0xE6C7E1, 0xE6E420, 0xEC20C7, 0xED20C7, 0xED20E3, 0xED20E6, 0xEDC920, 0xEDD120, 0xEDE420,
448 };
449
450 static const uint8_t charMap_windows_1256[] = {
451 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
452 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
453 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
454 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
455 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
456 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
457 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
458 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
459 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
460 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
461 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
462 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
463 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
464 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
465 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
466 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
467 0x20, 0x81, 0x20, 0x83, 0x20, 0x20, 0x20, 0x20,
468 0x88, 0x20, 0x8A, 0x20, 0x9C, 0x8D, 0x8E, 0x8F,
469 0x90, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
470 0x98, 0x20, 0x9A, 0x20, 0x9C, 0x20, 0x20, 0x9F,
471 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
472 0x20, 0x20, 0xAA, 0x20, 0x20, 0x20, 0x20, 0x20,
473 0x20, 0x20, 0x20, 0x20, 0x20, 0xB5, 0x20, 0x20,
474 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
475 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
476 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
477 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0x20,
478 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
479 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
480 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
481 0x20, 0x20, 0x20, 0x20, 0xF4, 0x20, 0x20, 0x20,
482 0x20, 0xF9, 0x20, 0xFB, 0xFC, 0x20, 0x20, 0xFF,
483 };
484
485 static const int32_t ngrams_KOI8_R[] = {
486 0x20C4CF, 0x20C920, 0x20CBC1, 0x20CBCF, 0x20CEC1, 0x20CEC5, 0x20CFC2, 0x20D0CF, 0x20D0D2, 0x20D2C1, 0x20D3CF, 0x20D3D4, 0x20D4CF, 0x20D720, 0x20D7CF, 0x20DAC1,
487 0x20DCD4, 0x20DED4, 0xC1CEC9, 0xC1D4D8, 0xC5CCD8, 0xC5CEC9, 0xC5D3D4, 0xC5D420, 0xC7CF20, 0xC920D0, 0xC9C520, 0xC9C920, 0xC9D120, 0xCCC5CE, 0xCCC920, 0xCCD8CE,
488 0xCEC120, 0xCEC520, 0xCEC9C5, 0xCEC9D1, 0xCECF20, 0xCECFD7, 0xCF20D0, 0xCF20D3, 0xCF20D7, 0xCFC7CF, 0xCFCA20, 0xCFCCD8, 0xCFCD20, 0xCFD3D4, 0xCFD720, 0xCFD7C1,
489 0xD0CFCC, 0xD0D2C5, 0xD0D2C9, 0xD0D2CF, 0xD2C1D7, 0xD2C5C4, 0xD3D120, 0xD3D4C1, 0xD3D4C9, 0xD3D4D7, 0xD4C5CC, 0xD4CF20, 0xD4CFD2, 0xD4D820, 0xD9C820, 0xDED4CF,
490 };
491
492 static const uint8_t charMap_KOI8_R[] = {
493 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
494 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
495 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
496 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
497 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00,
498 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
499 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
500 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
501 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
502 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
503 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
504 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
505 0x20, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
506 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
507 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
508 0x78, 0x79, 0x7A, 0x20, 0x20, 0x20, 0x20, 0x20,
509 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
510 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
511 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
512 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
513 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
514 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
515 0x20, 0x20, 0x20, 0xA3, 0x20, 0x20, 0x20, 0x20,
516 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
517 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
518 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
519 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
520 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
521 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
522 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
523 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
524 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
525 };
526
527 //ISO-8859-1,2,5,6,7,8,9 Ngrams
528 static const int32_t ngrams_8859_1_en[] = {
529 0x206120, 0x20616E, 0x206265, 0x20636F, 0x20666F, 0x206861, 0x206865, 0x20696E, 0x206D61, 0x206F66, 0x207072, 0x207265, 0x207361, 0x207374, 0x207468, 0x20746F,
530 0x207768, 0x616964, 0x616C20, 0x616E20, 0x616E64, 0x617320, 0x617420, 0x617465, 0x617469, 0x642061, 0x642074, 0x652061, 0x652073, 0x652074, 0x656420, 0x656E74,
531 0x657220, 0x657320, 0x666F72, 0x686174, 0x686520, 0x686572, 0x696420, 0x696E20, 0x696E67, 0x696F6E, 0x697320, 0x6E2061, 0x6E2074, 0x6E6420, 0x6E6720, 0x6E7420,
532 0x6F6620, 0x6F6E20, 0x6F7220, 0x726520, 0x727320, 0x732061, 0x732074, 0x736169, 0x737420, 0x742074, 0x746572, 0x746861, 0x746865, 0x74696F, 0x746F20, 0x747320,
533 };
534
535 static const int32_t ngrams_8859_1_da[] = {
536 0x206166, 0x206174, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207369, 0x207374, 0x207469, 0x207669, 0x616620,
537 0x616E20, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646572, 0x646574, 0x652073, 0x656420, 0x656465, 0x656E20, 0x656E64, 0x657220, 0x657265, 0x657320,
538 0x657420, 0x666F72, 0x676520, 0x67656E, 0x676572, 0x696765, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6572, 0x6C6967, 0x6C6C65, 0x6D6564, 0x6E6465, 0x6E6520,
539 0x6E6720, 0x6E6765, 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722064, 0x722065, 0x722073, 0x726520, 0x737465, 0x742073, 0x746520, 0x746572, 0x74696C, 0x766572,
540 };
541
542 static const int32_t ngrams_8859_1_de[] = {
543 0x20616E, 0x206175, 0x206265, 0x206461, 0x206465, 0x206469, 0x206569, 0x206765, 0x206861, 0x20696E, 0x206D69, 0x207363, 0x207365, 0x20756E, 0x207665, 0x20766F,
544 0x207765, 0x207A75, 0x626572, 0x636820, 0x636865, 0x636874, 0x646173, 0x64656E, 0x646572, 0x646965, 0x652064, 0x652073, 0x65696E, 0x656974, 0x656E20, 0x657220,
545 0x657320, 0x67656E, 0x68656E, 0x687420, 0x696368, 0x696520, 0x696E20, 0x696E65, 0x697420, 0x6C6963, 0x6C6C65, 0x6E2061, 0x6E2064, 0x6E2073, 0x6E6420, 0x6E6465,
546 0x6E6520, 0x6E6720, 0x6E6765, 0x6E7465, 0x722064, 0x726465, 0x726569, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x756E64, 0x756E67, 0x766572,
547 };
548
549 static const int32_t ngrams_8859_1_es[] = {
550 0x206120, 0x206361, 0x20636F, 0x206465, 0x20656C, 0x20656E, 0x206573, 0x20696E, 0x206C61, 0x206C6F, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
551 0x20756E, 0x207920, 0x612063, 0x612064, 0x612065, 0x61206C, 0x612070, 0x616369, 0x61646F, 0x616C20, 0x617220, 0x617320, 0x6369F3, 0x636F6E, 0x646520, 0x64656C,
552 0x646F20, 0x652064, 0x652065, 0x65206C, 0x656C20, 0x656E20, 0x656E74, 0x657320, 0x657374, 0x69656E, 0x69F36E, 0x6C6120, 0x6C6F73, 0x6E2065, 0x6E7465, 0x6F2064,
553 0x6F2065, 0x6F6E20, 0x6F7220, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732064, 0x732065, 0x732070, 0x736520, 0x746520, 0x746F20, 0x756520, 0xF36E20,
554 };
555
556 static const int32_t ngrams_8859_1_fr[] = {
557 0x206175, 0x20636F, 0x206461, 0x206465, 0x206475, 0x20656E, 0x206574, 0x206C61, 0x206C65, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207365, 0x20736F, 0x20756E,
558 0x20E020, 0x616E74, 0x617469, 0x636520, 0x636F6E, 0x646520, 0x646573, 0x647520, 0x652061, 0x652063, 0x652064, 0x652065, 0x65206C, 0x652070, 0x652073, 0x656E20,
559 0x656E74, 0x657220, 0x657320, 0x657420, 0x657572, 0x696F6E, 0x697320, 0x697420, 0x6C6120, 0x6C6520, 0x6C6573, 0x6D656E, 0x6E2064, 0x6E6520, 0x6E7320, 0x6E7420,
560 0x6F6E20, 0x6F6E74, 0x6F7572, 0x717565, 0x72206C, 0x726520, 0x732061, 0x732064, 0x732065, 0x73206C, 0x732070, 0x742064, 0x746520, 0x74696F, 0x756520, 0x757220,
561 };
562
563 static const int32_t ngrams_8859_1_it[] = {
564 0x20616C, 0x206368, 0x20636F, 0x206465, 0x206469, 0x206520, 0x20696C, 0x20696E, 0x206C61, 0x207065, 0x207072, 0x20756E, 0x612063, 0x612064, 0x612070, 0x612073,
565 0x61746F, 0x636865, 0x636F6E, 0x64656C, 0x646920, 0x652061, 0x652063, 0x652064, 0x652069, 0x65206C, 0x652070, 0x652073, 0x656C20, 0x656C6C, 0x656E74, 0x657220,
566 0x686520, 0x692061, 0x692063, 0x692064, 0x692073, 0x696120, 0x696C20, 0x696E20, 0x696F6E, 0x6C6120, 0x6C6520, 0x6C6920, 0x6C6C61, 0x6E6520, 0x6E6920, 0x6E6F20,
567 0x6E7465, 0x6F2061, 0x6F2064, 0x6F2069, 0x6F2073, 0x6F6E20, 0x6F6E65, 0x706572, 0x726120, 0x726520, 0x736920, 0x746120, 0x746520, 0x746920, 0x746F20, 0x7A696F,
568 };
569
570 static const int32_t ngrams_8859_1_nl[] = {
571 0x20616C, 0x206265, 0x206461, 0x206465, 0x206469, 0x206565, 0x20656E, 0x206765, 0x206865, 0x20696E, 0x206D61, 0x206D65, 0x206F70, 0x207465, 0x207661, 0x207665,
572 0x20766F, 0x207765, 0x207A69, 0x61616E, 0x616172, 0x616E20, 0x616E64, 0x617220, 0x617420, 0x636874, 0x646520, 0x64656E, 0x646572, 0x652062, 0x652076, 0x65656E,
573 0x656572, 0x656E20, 0x657220, 0x657273, 0x657420, 0x67656E, 0x686574, 0x696520, 0x696E20, 0x696E67, 0x697320, 0x6E2062, 0x6E2064, 0x6E2065, 0x6E2068, 0x6E206F,
574 0x6E2076, 0x6E6465, 0x6E6720, 0x6F6E64, 0x6F6F72, 0x6F7020, 0x6F7220, 0x736368, 0x737465, 0x742064, 0x746520, 0x74656E, 0x746572, 0x76616E, 0x766572, 0x766F6F,
575 };
576
577 static const int32_t ngrams_8859_1_no[] = {
578 0x206174, 0x206176, 0x206465, 0x20656E, 0x206572, 0x20666F, 0x206861, 0x206920, 0x206D65, 0x206F67, 0x2070E5, 0x207365, 0x20736B, 0x20736F, 0x207374, 0x207469,
579 0x207669, 0x20E520, 0x616E64, 0x617220, 0x617420, 0x646520, 0x64656E, 0x646574, 0x652073, 0x656420, 0x656E20, 0x656E65, 0x657220, 0x657265, 0x657420, 0x657474,
580 0x666F72, 0x67656E, 0x696B6B, 0x696C20, 0x696E67, 0x6B6520, 0x6B6B65, 0x6C6520, 0x6C6C65, 0x6D6564, 0x6D656E, 0x6E2073, 0x6E6520, 0x6E6720, 0x6E6765, 0x6E6E65,
581 0x6F6720, 0x6F6D20, 0x6F7220, 0x70E520, 0x722073, 0x726520, 0x736F6D, 0x737465, 0x742073, 0x746520, 0x74656E, 0x746572, 0x74696C, 0x747420, 0x747465, 0x766572,
582 };
583
584 static const int32_t ngrams_8859_1_pt[] = {
585 0x206120, 0x20636F, 0x206461, 0x206465, 0x20646F, 0x206520, 0x206573, 0x206D61, 0x206E6F, 0x206F20, 0x207061, 0x20706F, 0x207072, 0x207175, 0x207265, 0x207365,
586 0x20756D, 0x612061, 0x612063, 0x612064, 0x612070, 0x616465, 0x61646F, 0x616C20, 0x617220, 0x617261, 0x617320, 0x636F6D, 0x636F6E, 0x646120, 0x646520, 0x646F20,
587 0x646F73, 0x652061, 0x652064, 0x656D20, 0x656E74, 0x657320, 0x657374, 0x696120, 0x696361, 0x6D656E, 0x6E7465, 0x6E746F, 0x6F2061, 0x6F2063, 0x6F2064, 0x6F2065,
588 0x6F2070, 0x6F7320, 0x706172, 0x717565, 0x726120, 0x726573, 0x732061, 0x732064, 0x732065, 0x732070, 0x737461, 0x746520, 0x746F20, 0x756520, 0xE36F20, 0xE7E36F,
589 };
590
591 static const int32_t ngrams_8859_1_sv[] = {
592 0x206174, 0x206176, 0x206465, 0x20656E, 0x2066F6, 0x206861, 0x206920, 0x20696E, 0x206B6F, 0x206D65, 0x206F63, 0x2070E5, 0x20736B, 0x20736F, 0x207374, 0x207469,
593 0x207661, 0x207669, 0x20E472, 0x616465, 0x616E20, 0x616E64, 0x617220, 0x617474, 0x636820, 0x646520, 0x64656E, 0x646572, 0x646574, 0x656420, 0x656E20, 0x657220,
594 0x657420, 0x66F672, 0x67656E, 0x696C6C, 0x696E67, 0x6B6120, 0x6C6C20, 0x6D6564, 0x6E2073, 0x6E6120, 0x6E6465, 0x6E6720, 0x6E6765, 0x6E696E, 0x6F6368, 0x6F6D20,
595 0x6F6E20, 0x70E520, 0x722061, 0x722073, 0x726120, 0x736B61, 0x736F6D, 0x742073, 0x746120, 0x746520, 0x746572, 0x74696C, 0x747420, 0x766172, 0xE47220, 0xF67220,
596 };
597
598 static const int32_t ngrams_8859_2_cs[] = {
599 0x206120, 0x206279, 0x20646F, 0x206A65, 0x206E61, 0x206E65, 0x206F20, 0x206F64, 0x20706F, 0x207072, 0x2070F8, 0x20726F, 0x207365, 0x20736F, 0x207374, 0x20746F,
600 0x207620, 0x207679, 0x207A61, 0x612070, 0x636520, 0x636820, 0x652070, 0x652073, 0x652076, 0x656D20, 0x656EED, 0x686F20, 0x686F64, 0x697374, 0x6A6520, 0x6B7465,
601 0x6C6520, 0x6C6920, 0x6E6120, 0x6EE920, 0x6EEC20, 0x6EED20, 0x6F2070, 0x6F646E, 0x6F6A69, 0x6F7374, 0x6F7520, 0x6F7661, 0x706F64, 0x706F6A, 0x70726F, 0x70F865,
602 0x736520, 0x736F75, 0x737461, 0x737469, 0x73746E, 0x746572, 0x746EED, 0x746F20, 0x752070, 0xBE6520, 0xE16EED, 0xE9686F, 0xED2070, 0xED2073, 0xED6D20, 0xF86564,
603 };
604
605 static const int32_t ngrams_8859_2_hu[] = {
606 0x206120, 0x20617A, 0x206265, 0x206567, 0x20656C, 0x206665, 0x206861, 0x20686F, 0x206973, 0x206B65, 0x206B69, 0x206BF6, 0x206C65, 0x206D61, 0x206D65, 0x206D69,
607 0x206E65, 0x20737A, 0x207465, 0x20E973, 0x612061, 0x61206B, 0x61206D, 0x612073, 0x616B20, 0x616E20, 0x617A20, 0x62616E, 0x62656E, 0x656779, 0x656B20, 0x656C20,
608 0x656C65, 0x656D20, 0x656E20, 0x657265, 0x657420, 0x657465, 0x657474, 0x677920, 0x686F67, 0x696E74, 0x697320, 0x6B2061, 0x6BF67A, 0x6D6567, 0x6D696E, 0x6E2061,
609 0x6E616B, 0x6E656B, 0x6E656D, 0x6E7420, 0x6F6779, 0x732061, 0x737A65, 0x737A74, 0x737AE1, 0x73E967, 0x742061, 0x747420, 0x74E173, 0x7A6572, 0xE16E20, 0xE97320,
610 };
611
612 static const int32_t ngrams_8859_2_pl[] = {
613 0x20637A, 0x20646F, 0x206920, 0x206A65, 0x206B6F, 0x206D61, 0x206D69, 0x206E61, 0x206E69, 0x206F64, 0x20706F, 0x207072, 0x207369, 0x207720, 0x207769, 0x207779,
614 0x207A20, 0x207A61, 0x612070, 0x612077, 0x616E69, 0x636820, 0x637A65, 0x637A79, 0x646F20, 0x647A69, 0x652070, 0x652073, 0x652077, 0x65207A, 0x65676F, 0x656A20,
615 0x656D20, 0x656E69, 0x676F20, 0x696120, 0x696520, 0x69656A, 0x6B6120, 0x6B6920, 0x6B6965, 0x6D6965, 0x6E6120, 0x6E6961, 0x6E6965, 0x6F2070, 0x6F7761, 0x6F7769,
616 0x706F6C, 0x707261, 0x70726F, 0x70727A, 0x727A65, 0x727A79, 0x7369EA, 0x736B69, 0x737461, 0x776965, 0x796368, 0x796D20, 0x7A6520, 0x7A6965, 0x7A7920, 0xF37720,
617 };
618
619 static const int32_t ngrams_8859_2_ro[] = {
620 0x206120, 0x206163, 0x206361, 0x206365, 0x20636F, 0x206375, 0x206465, 0x206469, 0x206C61, 0x206D61, 0x207065, 0x207072, 0x207365, 0x2073E3, 0x20756E, 0x20BA69,
621 0x20EE6E, 0x612063, 0x612064, 0x617265, 0x617420, 0x617465, 0x617520, 0x636172, 0x636F6E, 0x637520, 0x63E320, 0x646520, 0x652061, 0x652063, 0x652064, 0x652070,
622 0x652073, 0x656120, 0x656920, 0x656C65, 0x656E74, 0x657374, 0x692061, 0x692063, 0x692064, 0x692070, 0x696520, 0x696920, 0x696E20, 0x6C6120, 0x6C6520, 0x6C6F72,
623 0x6C7569, 0x6E6520, 0x6E7472, 0x6F7220, 0x70656E, 0x726520, 0x726561, 0x727520, 0x73E320, 0x746520, 0x747275, 0x74E320, 0x756920, 0x756C20, 0xBA6920, 0xEE6E20,
624 };
625
626 static const int32_t ngrams_8859_5_ru[] = {
627 0x20D220, 0x20D2DE, 0x20D4DE, 0x20D7D0, 0x20D820, 0x20DAD0, 0x20DADE, 0x20DDD0, 0x20DDD5, 0x20DED1, 0x20DFDE, 0x20DFE0, 0x20E0D0, 0x20E1DE, 0x20E1E2, 0x20E2DE,
628 0x20E7E2, 0x20EDE2, 0xD0DDD8, 0xD0E2EC, 0xD3DE20, 0xD5DBEC, 0xD5DDD8, 0xD5E1E2, 0xD5E220, 0xD820DF, 0xD8D520, 0xD8D820, 0xD8EF20, 0xDBD5DD, 0xDBD820, 0xDBECDD,
629 0xDDD020, 0xDDD520, 0xDDD8D5, 0xDDD8EF, 0xDDDE20, 0xDDDED2, 0xDE20D2, 0xDE20DF, 0xDE20E1, 0xDED220, 0xDED2D0, 0xDED3DE, 0xDED920, 0xDEDBEC, 0xDEDC20, 0xDEE1E2,
630 0xDFDEDB, 0xDFE0D5, 0xDFE0D8, 0xDFE0DE, 0xE0D0D2, 0xE0D5D4, 0xE1E2D0, 0xE1E2D2, 0xE1E2D8, 0xE1EF20, 0xE2D5DB, 0xE2DE20, 0xE2DEE0, 0xE2EC20, 0xE7E2DE, 0xEBE520,
631 };
632
633 static const int32_t ngrams_8859_6_ar[] = {
634 0x20C7E4, 0x20C7E6, 0x20C8C7, 0x20D9E4, 0x20E1EA, 0x20E4E4, 0x20E5E6, 0x20E8C7, 0xC720C7, 0xC7C120, 0xC7CA20, 0xC7D120, 0xC7E420, 0xC7E4C3, 0xC7E4C7, 0xC7E4C8,
635 0xC7E4CA, 0xC7E4CC, 0xC7E4CD, 0xC7E4CF, 0xC7E4D3, 0xC7E4D9, 0xC7E4E2, 0xC7E4E5, 0xC7E4E8, 0xC7E4EA, 0xC7E520, 0xC7E620, 0xC7E6CA, 0xC820C7, 0xC920C7, 0xC920E1,
636 0xC920E4, 0xC920E5, 0xC920E8, 0xCA20C7, 0xCF20C7, 0xCFC920, 0xD120C7, 0xD1C920, 0xD320C7, 0xD920C7, 0xD9E4E9, 0xE1EA20, 0xE420C7, 0xE4C920, 0xE4E920, 0xE4EA20,
637 0xE520C7, 0xE5C720, 0xE5C920, 0xE5E620, 0xE620C7, 0xE720C7, 0xE7C720, 0xE8C7E4, 0xE8E620, 0xE920C7, 0xEA20C7, 0xEA20E5, 0xEA20E8, 0xEAC920, 0xEAD120, 0xEAE620,
638 };
639
640 static const int32_t ngrams_8859_7_el[] = {
641 0x20E1ED, 0x20E1F0, 0x20E3E9, 0x20E4E9, 0x20E5F0, 0x20E720, 0x20EAE1, 0x20ECE5, 0x20EDE1, 0x20EF20, 0x20F0E1, 0x20F0EF, 0x20F0F1, 0x20F3F4, 0x20F3F5, 0x20F4E7,
642 0x20F4EF, 0xDFE120, 0xE120E1, 0xE120F4, 0xE1E920, 0xE1ED20, 0xE1F0FC, 0xE1F220, 0xE3E9E1, 0xE5E920, 0xE5F220, 0xE720F4, 0xE7ED20, 0xE7F220, 0xE920F4, 0xE9E120,
643 0xE9EADE, 0xE9F220, 0xEAE1E9, 0xEAE1F4, 0xECE520, 0xED20E1, 0xED20E5, 0xED20F0, 0xEDE120, 0xEFF220, 0xEFF520, 0xF0EFF5, 0xF0F1EF, 0xF0FC20, 0xF220E1, 0xF220E5,
644 0xF220EA, 0xF220F0, 0xF220F4, 0xF3E520, 0xF3E720, 0xF3F4EF, 0xF4E120, 0xF4E1E9, 0xF4E7ED, 0xF4E7F2, 0xF4E9EA, 0xF4EF20, 0xF4EFF5, 0xF4F9ED, 0xF9ED20, 0xFEED20,
645 };
646
647 static const int32_t ngrams_8859_8_I_he[] = {
648 0x20E0E5, 0x20E0E7, 0x20E0E9, 0x20E0FA, 0x20E1E9, 0x20E1EE, 0x20E4E0, 0x20E4E5, 0x20E4E9, 0x20E4EE, 0x20E4F2, 0x20E4F9, 0x20E4FA, 0x20ECE0, 0x20ECE4, 0x20EEE0,
649 0x20F2EC, 0x20F9EC, 0xE0FA20, 0xE420E0, 0xE420E1, 0xE420E4, 0xE420EC, 0xE420EE, 0xE420F9, 0xE4E5E0, 0xE5E020, 0xE5ED20, 0xE5EF20, 0xE5F820, 0xE5FA20, 0xE920E4,
650 0xE9E420, 0xE9E5FA, 0xE9E9ED, 0xE9ED20, 0xE9EF20, 0xE9F820, 0xE9FA20, 0xEC20E0, 0xEC20E4, 0xECE020, 0xECE420, 0xED20E0, 0xED20E1, 0xED20E4, 0xED20EC, 0xED20EE,
651 0xED20F9, 0xEEE420, 0xEF20E4, 0xF0E420, 0xF0E920, 0xF0E9ED, 0xF2EC20, 0xF820E4, 0xF8E9ED, 0xF9EC20, 0xFA20E0, 0xFA20E1, 0xFA20E4, 0xFA20EC, 0xFA20EE, 0xFA20F9,
652 };
653
654 static const int32_t ngrams_8859_8_he[] = {
655 0x20E0E5, 0x20E0EC, 0x20E4E9, 0x20E4EC, 0x20E4EE, 0x20E4F0, 0x20E9F0, 0x20ECF2, 0x20ECF9, 0x20EDE5, 0x20EDE9, 0x20EFE5, 0x20EFE9, 0x20F8E5, 0x20F8E9, 0x20FAE0,
656 0x20FAE5, 0x20FAE9, 0xE020E4, 0xE020EC, 0xE020ED, 0xE020FA, 0xE0E420, 0xE0E5E4, 0xE0EC20, 0xE0EE20, 0xE120E4, 0xE120ED, 0xE120FA, 0xE420E4, 0xE420E9, 0xE420EC,
657 0xE420ED, 0xE420EF, 0xE420F8, 0xE420FA, 0xE4EC20, 0xE5E020, 0xE5E420, 0xE7E020, 0xE9E020, 0xE9E120, 0xE9E420, 0xEC20E4, 0xEC20ED, 0xEC20FA, 0xECF220, 0xECF920,
658 0xEDE9E9, 0xEDE9F0, 0xEDE9F8, 0xEE20E4, 0xEE20ED, 0xEE20FA, 0xEEE120, 0xEEE420, 0xF2E420, 0xF920E4, 0xF920ED, 0xF920FA, 0xF9E420, 0xFAE020, 0xFAE420, 0xFAE5E9,
659 };
660
661 static const int32_t ngrams_8859_9_tr[] = {
662 0x206261, 0x206269, 0x206275, 0x206461, 0x206465, 0x206765, 0x206861, 0x20696C, 0x206B61, 0x206B6F, 0x206D61, 0x206F6C, 0x207361, 0x207461, 0x207665, 0x207961,
663 0x612062, 0x616B20, 0x616C61, 0x616D61, 0x616E20, 0x616EFD, 0x617220, 0x617261, 0x6172FD, 0x6173FD, 0x617961, 0x626972, 0x646120, 0x646520, 0x646920, 0x652062,
664 0x65206B, 0x656469, 0x656E20, 0x657220, 0x657269, 0x657369, 0x696C65, 0x696E20, 0x696E69, 0x697220, 0x6C616E, 0x6C6172, 0x6C6520, 0x6C6572, 0x6E2061, 0x6E2062,
665 0x6E206B, 0x6E6461, 0x6E6465, 0x6E6520, 0x6E6920, 0x6E696E, 0x6EFD20, 0x72696E, 0x72FD6E, 0x766520, 0x796120, 0x796F72, 0xFD6E20, 0xFD6E64, 0xFD6EFD, 0xFDF0FD,
666 };
667
~CharsetRecog_8859_1()668 CharsetRecog_8859_1::~CharsetRecog_8859_1()
669 {
670 // nothing to do
671 }
672
getName() const673 const char *CharsetRecog_8859_1::getName() const
674 {
675 return haveC1Bytes? "windows-1252" : "ISO-8859-1";
676 }
677
getLanguage() const678 const char *CharsetRecog_8859_1_en::getLanguage() const
679 {
680 return "en";
681 }
682
~CharsetRecog_8859_1_en()683 CharsetRecog_8859_1_en::~CharsetRecog_8859_1_en()
684 {
685 // nothing to do
686 }
687
match(InputText * textIn)688 int32_t CharsetRecog_8859_1_en::match(InputText *textIn)
689 {
690 int32_t result = match_sbcs(textIn, ngrams_8859_1_en, charMap_8859_1);
691
692 // printf("8859_1_en: result = %d\n", result);
693 return result; //match_sbcs(textIn, ngrams, charMap);
694 }
695
~CharsetRecog_8859_1_da()696 CharsetRecog_8859_1_da::~CharsetRecog_8859_1_da()
697 {
698 // nothing to do
699 }
700
getLanguage() const701 const char *CharsetRecog_8859_1_da::getLanguage() const
702 {
703 return "da";
704 }
705
match(InputText * textIn)706 int32_t CharsetRecog_8859_1_da::match(InputText *textIn)
707 {
708 return match_sbcs(textIn, ngrams_8859_1_da, charMap_8859_1);
709 }
710
~CharsetRecog_8859_1_de()711 CharsetRecog_8859_1_de::~CharsetRecog_8859_1_de() {}
712
getLanguage() const713 const char *CharsetRecog_8859_1_de::getLanguage() const
714 {
715 return "de";
716 }
717
match(InputText * textIn)718 int32_t CharsetRecog_8859_1_de::match(InputText *textIn)
719 {
720 return match_sbcs(textIn, ngrams_8859_1_de, charMap_8859_1);
721 }
722
~CharsetRecog_8859_1_es()723 CharsetRecog_8859_1_es::~CharsetRecog_8859_1_es()
724 {
725 // nothing to do
726 }
727
getLanguage() const728 const char *CharsetRecog_8859_1_es::getLanguage() const
729 {
730 return "es";
731 }
732
match(InputText * textIn)733 int32_t CharsetRecog_8859_1_es::match(InputText *textIn)
734 {
735 return match_sbcs(textIn, ngrams_8859_1_es, charMap_8859_1);
736 }
737
~CharsetRecog_8859_1_fr()738 CharsetRecog_8859_1_fr::~CharsetRecog_8859_1_fr()
739 {
740 // nothing to do
741 }
742
getLanguage() const743 const char *CharsetRecog_8859_1_fr::getLanguage() const
744 {
745 return "fr";
746 }
747
match(InputText * textIn)748 int32_t CharsetRecog_8859_1_fr::match(InputText *textIn)
749 {
750 return match_sbcs(textIn, ngrams_8859_1_fr, charMap_8859_1);
751 }
752
~CharsetRecog_8859_1_it()753 CharsetRecog_8859_1_it::~CharsetRecog_8859_1_it()
754 {
755 // nothing to do
756 }
757
getLanguage() const758 const char *CharsetRecog_8859_1_it::getLanguage() const
759 {
760 return "it";
761 }
762
match(InputText * textIn)763 int32_t CharsetRecog_8859_1_it::match(InputText *textIn)
764 {
765 return match_sbcs(textIn, ngrams_8859_1_it, charMap_8859_1);
766 }
767
~CharsetRecog_8859_1_nl()768 CharsetRecog_8859_1_nl::~CharsetRecog_8859_1_nl()
769 {
770 // nothing to do
771 }
772
getLanguage() const773 const char *CharsetRecog_8859_1_nl::getLanguage() const
774 {
775 return "nl";
776 }
777
match(InputText * textIn)778 int32_t CharsetRecog_8859_1_nl::match(InputText *textIn)
779 {
780 return match_sbcs(textIn, ngrams_8859_1_nl, charMap_8859_1);
781 }
782
~CharsetRecog_8859_1_no()783 CharsetRecog_8859_1_no::~CharsetRecog_8859_1_no() {}
784
getLanguage() const785 const char *CharsetRecog_8859_1_no::getLanguage() const
786 {
787 return "no";
788 }
789
match(InputText * textIn)790 int32_t CharsetRecog_8859_1_no::match(InputText *textIn)
791 {
792 return match_sbcs(textIn, ngrams_8859_1_no, charMap_8859_1);
793 }
794
~CharsetRecog_8859_1_pt()795 CharsetRecog_8859_1_pt::~CharsetRecog_8859_1_pt()
796 {
797 // nothing to do
798 }
799
getLanguage() const800 const char *CharsetRecog_8859_1_pt::getLanguage() const
801 {
802 return "pt";
803 }
804
match(InputText * textIn)805 int32_t CharsetRecog_8859_1_pt::match(InputText *textIn)
806 {
807 return match_sbcs(textIn, ngrams_8859_1_pt, charMap_8859_1);
808 }
809
~CharsetRecog_8859_1_sv()810 CharsetRecog_8859_1_sv::~CharsetRecog_8859_1_sv() {}
811
getLanguage() const812 const char *CharsetRecog_8859_1_sv::getLanguage() const
813 {
814 return "sv";
815 }
816
match(InputText * textIn)817 int32_t CharsetRecog_8859_1_sv::match(InputText *textIn)
818 {
819 return match_sbcs(textIn, ngrams_8859_1_sv, charMap_8859_1);
820 }
821
~CharsetRecog_8859_2()822 CharsetRecog_8859_2::~CharsetRecog_8859_2()
823 {
824 // nothing to do
825 }
826
getName() const827 const char *CharsetRecog_8859_2::getName() const
828 {
829 return haveC1Bytes? "windows-1250" : "ISO-8859-2";
830 }
831
~CharsetRecog_8859_2_cs()832 CharsetRecog_8859_2_cs::~CharsetRecog_8859_2_cs()
833 {
834 // nothing to do
835 }
836
getLanguage() const837 const char *CharsetRecog_8859_2_cs::getLanguage() const
838 {
839 return "cs";
840 }
841
match(InputText * textIn)842 int32_t CharsetRecog_8859_2_cs::match(InputText *textIn)
843 {
844 return match_sbcs(textIn, ngrams_8859_2_cs, charMap_8859_2);
845 }
846
~CharsetRecog_8859_2_hu()847 CharsetRecog_8859_2_hu::~CharsetRecog_8859_2_hu()
848 {
849 // nothing to do
850 }
851
getLanguage() const852 const char *CharsetRecog_8859_2_hu::getLanguage() const
853 {
854 return "hu";
855 }
856
match(InputText * textIn)857 int32_t CharsetRecog_8859_2_hu::match(InputText *textIn)
858 {
859 return match_sbcs(textIn, ngrams_8859_2_hu, charMap_8859_2);
860 }
861
~CharsetRecog_8859_2_pl()862 CharsetRecog_8859_2_pl::~CharsetRecog_8859_2_pl()
863 {
864 // nothing to do
865 }
866
getLanguage() const867 const char *CharsetRecog_8859_2_pl::getLanguage() const
868 {
869 return "pl";
870 }
871
match(InputText * textIn)872 int32_t CharsetRecog_8859_2_pl::match(InputText *textIn)
873 {
874 return match_sbcs(textIn, ngrams_8859_2_pl, charMap_8859_2);
875 }
876
~CharsetRecog_8859_2_ro()877 CharsetRecog_8859_2_ro::~CharsetRecog_8859_2_ro()
878 {
879 // nothing to do
880 }
881
getLanguage() const882 const char *CharsetRecog_8859_2_ro::getLanguage() const
883 {
884 return "ro";
885 }
886
match(InputText * textIn)887 int32_t CharsetRecog_8859_2_ro::match(InputText *textIn)
888 {
889 return match_sbcs(textIn, ngrams_8859_2_ro, charMap_8859_2);
890 }
891
~CharsetRecog_8859_5()892 CharsetRecog_8859_5::~CharsetRecog_8859_5()
893 {
894 // nothing to do
895 }
896
getName() const897 const char *CharsetRecog_8859_5::getName() const
898 {
899 return "ISO-8859-5";
900 }
901
~CharsetRecog_8859_5_ru()902 CharsetRecog_8859_5_ru::~CharsetRecog_8859_5_ru()
903 {
904 // nothing to do
905 }
906
getLanguage() const907 const char *CharsetRecog_8859_5_ru::getLanguage() const
908 {
909 return "ru";
910 }
911
match(InputText * textIn)912 int32_t CharsetRecog_8859_5_ru::match(InputText *textIn)
913 {
914 return match_sbcs(textIn, ngrams_8859_5_ru, charMap_8859_5);
915 }
916
~CharsetRecog_8859_6()917 CharsetRecog_8859_6::~CharsetRecog_8859_6()
918 {
919 // nothing to do
920 }
921
getName() const922 const char *CharsetRecog_8859_6::getName() const
923 {
924 return "ISO-8859-6";
925 }
926
~CharsetRecog_8859_6_ar()927 CharsetRecog_8859_6_ar::~CharsetRecog_8859_6_ar()
928 {
929 // nothing to do
930 }
931
getLanguage() const932 const char *CharsetRecog_8859_6_ar::getLanguage() const
933 {
934 return "ar";
935 }
936
match(InputText * textIn)937 int32_t CharsetRecog_8859_6_ar::match(InputText *textIn)
938 {
939 return match_sbcs(textIn, ngrams_8859_6_ar, charMap_8859_6);
940 }
941
~CharsetRecog_8859_7()942 CharsetRecog_8859_7::~CharsetRecog_8859_7()
943 {
944 // nothing to do
945 }
946
getName() const947 const char *CharsetRecog_8859_7::getName() const
948 {
949 return haveC1Bytes? "windows-1253" : "ISO-8859-7";
950 }
951
~CharsetRecog_8859_7_el()952 CharsetRecog_8859_7_el::~CharsetRecog_8859_7_el()
953 {
954 // nothing to do
955 }
956
getLanguage() const957 const char *CharsetRecog_8859_7_el::getLanguage() const
958 {
959 return "el";
960 }
961
match(InputText * textIn)962 int32_t CharsetRecog_8859_7_el::match(InputText *textIn)
963 {
964 return match_sbcs(textIn, ngrams_8859_7_el, charMap_8859_7);
965 }
966
~CharsetRecog_8859_8()967 CharsetRecog_8859_8::~CharsetRecog_8859_8()
968 {
969 // nothing to do
970 }
971
getName() const972 const char *CharsetRecog_8859_8::getName() const
973 {
974 return haveC1Bytes? "windows-1255" : "ISO-8859-8";
975 }
976
~CharsetRecog_8859_8_I_he()977 CharsetRecog_8859_8_I_he::~CharsetRecog_8859_8_I_he ()
978 {
979 // nothing to do
980 }
981
getName() const982 const char *CharsetRecog_8859_8_I_he::getName() const
983 {
984 return haveC1Bytes? "windows-1255" : "ISO-8859-8-I";
985 }
986
getLanguage() const987 const char *CharsetRecog_8859_8_I_he::getLanguage() const
988 {
989 return "he";
990 }
991
match(InputText * textIn)992 int32_t CharsetRecog_8859_8_I_he::match(InputText *textIn)
993 {
994 return match_sbcs(textIn, ngrams_8859_8_I_he, charMap_8859_8);
995 }
996
~CharsetRecog_8859_8_he()997 CharsetRecog_8859_8_he::~CharsetRecog_8859_8_he()
998 {
999 // od ot gnihton
1000 }
1001
getLanguage() const1002 const char *CharsetRecog_8859_8_he::getLanguage() const
1003 {
1004 return "he";
1005 }
1006
match(InputText * textIn)1007 int32_t CharsetRecog_8859_8_he::match(InputText *textIn)
1008 {
1009 return match_sbcs(textIn, ngrams_8859_8_he, charMap_8859_8);
1010 }
1011
~CharsetRecog_8859_9()1012 CharsetRecog_8859_9::~CharsetRecog_8859_9()
1013 {
1014 // nothing to do
1015 }
1016
getName() const1017 const char *CharsetRecog_8859_9::getName() const
1018 {
1019 return haveC1Bytes? "windows-1254" : "ISO-8859-9";
1020 }
1021
~CharsetRecog_8859_9_tr()1022 CharsetRecog_8859_9_tr::~CharsetRecog_8859_9_tr ()
1023 {
1024 // nothing to do
1025 }
1026
getLanguage() const1027 const char *CharsetRecog_8859_9_tr::getLanguage() const
1028 {
1029 return "tr";
1030 }
1031
match(InputText * textIn)1032 int32_t CharsetRecog_8859_9_tr::match(InputText *textIn)
1033 {
1034 return match_sbcs(textIn, ngrams_8859_9_tr, charMap_8859_9);
1035 }
1036
~CharsetRecog_windows_1256()1037 CharsetRecog_windows_1256::~CharsetRecog_windows_1256()
1038 {
1039 // nothing to do
1040 }
1041
getName() const1042 const char *CharsetRecog_windows_1256::getName() const
1043 {
1044 return "windows-1256";
1045 }
1046
getLanguage() const1047 const char *CharsetRecog_windows_1256::getLanguage() const
1048 {
1049 return "ar";
1050 }
1051
match(InputText * textIn)1052 int32_t CharsetRecog_windows_1256::match(InputText *textIn)
1053 {
1054 return match_sbcs(textIn, ngrams_windows_1256, charMap_windows_1256);
1055 }
1056
~CharsetRecog_windows_1251()1057 CharsetRecog_windows_1251::~CharsetRecog_windows_1251()
1058 {
1059 // nothing to do
1060 }
1061
getName() const1062 const char *CharsetRecog_windows_1251::getName() const
1063 {
1064 return "windows-1251";
1065 }
1066
getLanguage() const1067 const char *CharsetRecog_windows_1251::getLanguage() const
1068 {
1069 return "ru";
1070 }
1071
match(InputText * textIn)1072 int32_t CharsetRecog_windows_1251::match(InputText *textIn)
1073 {
1074 return match_sbcs(textIn, ngrams_windows_1251, charMap_windows_1251);
1075 }
1076
~CharsetRecog_KOI8_R()1077 CharsetRecog_KOI8_R::~CharsetRecog_KOI8_R()
1078 {
1079 // nothing to do
1080 }
1081
getName() const1082 const char *CharsetRecog_KOI8_R::getName() const
1083 {
1084 return "KOI8-R";
1085 }
1086
getLanguage() const1087 const char *CharsetRecog_KOI8_R::getLanguage() const
1088 {
1089 return "ru";
1090 }
1091
match(InputText * textIn)1092 int32_t CharsetRecog_KOI8_R::match(InputText *textIn)
1093 {
1094 return match_sbcs(textIn, ngrams_KOI8_R, charMap_KOI8_R);
1095 }
1096
1097 U_NAMESPACE_END
1098 #endif
1099
1100