1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "autodetect.h"
18
19 struct CharRange {
20 uint16_t first;
21 uint16_t last;
22 };
23
24 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*x))
25
26 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
27 static const CharRange kShiftJISRanges[] = {
28 { 0x8140, 0x817E },
29 { 0x8180, 0x81AC },
30 { 0x81B8, 0x81BF },
31 { 0x81C8, 0x81CE },
32 { 0x81DA, 0x81E8 },
33 { 0x81F0, 0x81F7 },
34 { 0x81FC, 0x81FC },
35 { 0x824F, 0x8258 },
36 { 0x8260, 0x8279 },
37 { 0x8281, 0x829A },
38 { 0x829F, 0x82F1 },
39 { 0x8340, 0x837E },
40 { 0x8380, 0x8396 },
41 { 0x839F, 0x83B6 },
42 { 0x83BF, 0x83D6 },
43 { 0x8440, 0x8460 },
44 { 0x8470, 0x847E },
45 { 0x8480, 0x8491 },
46 { 0x849F, 0x84BE },
47 { 0x8740, 0x875D },
48 { 0x875F, 0x8775 },
49 { 0x877E, 0x877E },
50 { 0x8780, 0x879C },
51 { 0x889F, 0x88FC },
52 { 0x8940, 0x897E },
53 { 0x8980, 0x89FC },
54 { 0x8A40, 0x8A7E },
55 { 0x8A80, 0x8AFC },
56 { 0x8B40, 0x8B7E },
57 { 0x8B80, 0x8BFC },
58 { 0x8C40, 0x8C7E },
59 { 0x8C80, 0x8CFC },
60 { 0x8D40, 0x8D7E },
61 { 0x8D80, 0x8DFC },
62 { 0x8E40, 0x8E7E },
63 { 0x8E80, 0x8EFC },
64 { 0x8F40, 0x8F7E },
65 { 0x8F80, 0x8FFC },
66 { 0x9040, 0x907E },
67 { 0x9080, 0x90FC },
68 { 0x9140, 0x917E },
69 { 0x9180, 0x91FC },
70 { 0x9240, 0x927E },
71 { 0x9280, 0x92FC },
72 { 0x9340, 0x937E },
73 { 0x9380, 0x93FC },
74 { 0x9440, 0x947E },
75 { 0x9480, 0x94FC },
76 { 0x9540, 0x957E },
77 { 0x9580, 0x95FC },
78 { 0x9640, 0x967E },
79 { 0x9680, 0x96FC },
80 { 0x9740, 0x977E },
81 { 0x9780, 0x97FC },
82 { 0x9840, 0x9872 },
83 { 0x989F, 0x98FC },
84 { 0x9940, 0x997E },
85 { 0x9980, 0x99FC },
86 { 0x9A40, 0x9A7E },
87 { 0x9A80, 0x9AFC },
88 { 0x9B40, 0x9B7E },
89 { 0x9B80, 0x9BFC },
90 { 0x9C40, 0x9C7E },
91 { 0x9C80, 0x9CFC },
92 { 0x9D40, 0x9D7E },
93 { 0x9D80, 0x9DFC },
94 { 0x9E40, 0x9E7E },
95 { 0x9E80, 0x9EFC },
96 { 0x9F40, 0x9F7E },
97 { 0x9F80, 0x9FFC },
98 { 0xE040, 0xE07E },
99 { 0xE080, 0xE0FC },
100 { 0xE140, 0xE17E },
101 { 0xE180, 0xE1FC },
102 { 0xE240, 0xE27E },
103 { 0xE280, 0xE2FC },
104 { 0xE340, 0xE37E },
105 { 0xE380, 0xE3FC },
106 { 0xE440, 0xE47E },
107 { 0xE480, 0xE4FC },
108 { 0xE540, 0xE57E },
109 { 0xE580, 0xE5FC },
110 { 0xE640, 0xE67E },
111 { 0xE680, 0xE6FC },
112 { 0xE740, 0xE77E },
113 { 0xE780, 0xE7FC },
114 { 0xE840, 0xE87E },
115 { 0xE880, 0xE8FC },
116 { 0xE940, 0xE97E },
117 { 0xE980, 0xE9FC },
118 { 0xEA40, 0xEA7E },
119 { 0xEA80, 0xEAA4 },
120 { 0xED40, 0xED7E },
121 { 0xED80, 0xEDFC },
122 { 0xEE40, 0xEE7E },
123 { 0xEE80, 0xEEEC },
124 { 0xEEEF, 0xEEFC },
125 { 0xFA40, 0xFA7E },
126 { 0xFA80, 0xFAFC },
127 { 0xFB40, 0xFB7E },
128 { 0xFB80, 0xFBFC },
129 { 0xFC40, 0xFC4B },
130 };
131
132 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT
133 static const CharRange kGBKRanges[] = {
134 { 0x8140, 0x817E },
135 { 0x8180, 0x81FE },
136 { 0x8240, 0x827E },
137 { 0x8280, 0x82FE },
138 { 0x8340, 0x837E },
139 { 0x8380, 0x83FE },
140 { 0x8440, 0x847E },
141 { 0x8480, 0x84FE },
142 { 0x8540, 0x857E },
143 { 0x8580, 0x85FE },
144 { 0x8640, 0x867E },
145 { 0x8680, 0x86FE },
146 { 0x8740, 0x877E },
147 { 0x8780, 0x87FE },
148 { 0x8840, 0x887E },
149 { 0x8880, 0x88FE },
150 { 0x8940, 0x897E },
151 { 0x8980, 0x89FE },
152 { 0x8A40, 0x8A7E },
153 { 0x8A80, 0x8AFE },
154 { 0x8B40, 0x8B7E },
155 { 0x8B80, 0x8BFE },
156 { 0x8C40, 0x8C7E },
157 { 0x8C80, 0x8CFE },
158 { 0x8D40, 0x8D7E },
159 { 0x8D80, 0x8DFE },
160 { 0x8E40, 0x8E7E },
161 { 0x8E80, 0x8EFE },
162 { 0x8F40, 0x8F7E },
163 { 0x8F80, 0x8FFE },
164 { 0x9040, 0x907E },
165 { 0x9080, 0x90FE },
166 { 0x9140, 0x917E },
167 { 0x9180, 0x91FE },
168 { 0x9240, 0x927E },
169 { 0x9280, 0x92FE },
170 { 0x9340, 0x937E },
171 { 0x9380, 0x93FE },
172 { 0x9440, 0x947E },
173 { 0x9480, 0x94FE },
174 { 0x9540, 0x957E },
175 { 0x9580, 0x95FE },
176 { 0x9640, 0x967E },
177 { 0x9680, 0x96FE },
178 { 0x9740, 0x977E },
179 { 0x9780, 0x97FE },
180 { 0x9840, 0x987E },
181 { 0x9880, 0x98FE },
182 { 0x9940, 0x997E },
183 { 0x9980, 0x99FE },
184 { 0x9A40, 0x9A7E },
185 { 0x9A80, 0x9AFE },
186 { 0x9B40, 0x9B7E },
187 { 0x9B80, 0x9BFE },
188 { 0x9C40, 0x9C7E },
189 { 0x9C80, 0x9CFE },
190 { 0x9D40, 0x9D7E },
191 { 0x9D80, 0x9DFE },
192 { 0x9E40, 0x9E7E },
193 { 0x9E80, 0x9EFE },
194 { 0x9F40, 0x9F7E },
195 { 0x9F80, 0x9FFE },
196 { 0xA040, 0xA07E },
197 { 0xA080, 0xA0FE },
198 { 0xA1A1, 0xA1FE },
199 { 0xA2A1, 0xA2AA },
200 { 0xA2B1, 0xA2E2 },
201 { 0xA2E5, 0xA2EE },
202 { 0xA2F1, 0xA2FC },
203 { 0xA3A1, 0xA3FE },
204 { 0xA4A1, 0xA4F3 },
205 { 0xA5A1, 0xA5F6 },
206 { 0xA6A1, 0xA6B8 },
207 { 0xA6C1, 0xA6D8 },
208 { 0xA6E0, 0xA6EB },
209 { 0xA6EE, 0xA6F2 },
210 { 0xA6F4, 0xA6F5 },
211 { 0xA7A1, 0xA7C1 },
212 { 0xA7D1, 0xA7F1 },
213 { 0xA840, 0xA87E },
214 { 0xA880, 0xA895 },
215 { 0xA8A1, 0xA8BB },
216 { 0xA8BD, 0xA8BE },
217 { 0xA8C0, 0xA8C0 },
218 { 0xA8C5, 0xA8E9 },
219 { 0xA940, 0xA957 },
220 { 0xA959, 0xA95A },
221 { 0xA95C, 0xA95C },
222 { 0xA960, 0xA97E },
223 { 0xA980, 0xA988 },
224 { 0xA996, 0xA996 },
225 { 0xA9A4, 0xA9EF },
226 { 0xAA40, 0xAA7E },
227 { 0xAA80, 0xAAA0 },
228 { 0xAB40, 0xAB7E },
229 { 0xAB80, 0xABA0 },
230 { 0xAC40, 0xAC7E },
231 { 0xAC80, 0xACA0 },
232 { 0xAD40, 0xAD7E },
233 { 0xAD80, 0xADA0 },
234 { 0xAE40, 0xAE7E },
235 { 0xAE80, 0xAEA0 },
236 { 0xAF40, 0xAF7E },
237 { 0xAF80, 0xAFA0 },
238 { 0xB040, 0xB07E },
239 { 0xB080, 0xB0FE },
240 { 0xB140, 0xB17E },
241 { 0xB180, 0xB1FE },
242 { 0xB240, 0xB27E },
243 { 0xB280, 0xB2FE },
244 { 0xB340, 0xB37E },
245 { 0xB380, 0xB3FE },
246 { 0xB440, 0xB47E },
247 { 0xB480, 0xB4FE },
248 { 0xB540, 0xB57E },
249 { 0xB580, 0xB5FE },
250 { 0xB640, 0xB67E },
251 { 0xB680, 0xB6FE },
252 { 0xB740, 0xB77E },
253 { 0xB780, 0xB7FE },
254 { 0xB840, 0xB87E },
255 { 0xB880, 0xB8FE },
256 { 0xB940, 0xB97E },
257 { 0xB980, 0xB9FE },
258 { 0xBA40, 0xBA7E },
259 { 0xBA80, 0xBAFE },
260 { 0xBB40, 0xBB7E },
261 { 0xBB80, 0xBBFE },
262 { 0xBC40, 0xBC7E },
263 { 0xBC80, 0xBCFE },
264 { 0xBD40, 0xBD7E },
265 { 0xBD80, 0xBDFE },
266 { 0xBE40, 0xBE7E },
267 { 0xBE80, 0xBEFE },
268 { 0xBF40, 0xBF7E },
269 { 0xBF80, 0xBFFE },
270 { 0xC040, 0xC07E },
271 { 0xC080, 0xC0FE },
272 { 0xC140, 0xC17E },
273 { 0xC180, 0xC1FE },
274 { 0xC240, 0xC27E },
275 { 0xC280, 0xC2FE },
276 { 0xC340, 0xC37E },
277 { 0xC380, 0xC3FE },
278 { 0xC440, 0xC47E },
279 { 0xC480, 0xC4FE },
280 { 0xC540, 0xC57E },
281 { 0xC580, 0xC5FE },
282 { 0xC640, 0xC67E },
283 { 0xC680, 0xC6FE },
284 { 0xC740, 0xC77E },
285 { 0xC780, 0xC7FE },
286 { 0xC840, 0xC87E },
287 { 0xC880, 0xC8FE },
288 { 0xC940, 0xC97E },
289 { 0xC980, 0xC9FE },
290 { 0xCA40, 0xCA7E },
291 { 0xCA80, 0xCAFE },
292 { 0xCB40, 0xCB7E },
293 { 0xCB80, 0xCBFE },
294 { 0xCC40, 0xCC7E },
295 { 0xCC80, 0xCCFE },
296 { 0xCD40, 0xCD7E },
297 { 0xCD80, 0xCDFE },
298 { 0xCE40, 0xCE7E },
299 { 0xCE80, 0xCEFE },
300 { 0xCF40, 0xCF7E },
301 { 0xCF80, 0xCFFE },
302 { 0xD040, 0xD07E },
303 { 0xD080, 0xD0FE },
304 { 0xD140, 0xD17E },
305 { 0xD180, 0xD1FE },
306 { 0xD240, 0xD27E },
307 { 0xD280, 0xD2FE },
308 { 0xD340, 0xD37E },
309 { 0xD380, 0xD3FE },
310 { 0xD440, 0xD47E },
311 { 0xD480, 0xD4FE },
312 { 0xD540, 0xD57E },
313 { 0xD580, 0xD5FE },
314 { 0xD640, 0xD67E },
315 { 0xD680, 0xD6FE },
316 { 0xD740, 0xD77E },
317 { 0xD780, 0xD7F9 },
318 { 0xD840, 0xD87E },
319 { 0xD880, 0xD8FE },
320 { 0xD940, 0xD97E },
321 { 0xD980, 0xD9FE },
322 { 0xDA40, 0xDA7E },
323 { 0xDA80, 0xDAFE },
324 { 0xDB40, 0xDB7E },
325 { 0xDB80, 0xDBFE },
326 { 0xDC40, 0xDC7E },
327 { 0xDC80, 0xDCFE },
328 { 0xDD40, 0xDD7E },
329 { 0xDD80, 0xDDFE },
330 { 0xDE40, 0xDE7E },
331 { 0xDE80, 0xDEFE },
332 { 0xDF40, 0xDF7E },
333 { 0xDF80, 0xDFFE },
334 { 0xE040, 0xE07E },
335 { 0xE080, 0xE0FE },
336 { 0xE140, 0xE17E },
337 { 0xE180, 0xE1FE },
338 { 0xE240, 0xE27E },
339 { 0xE280, 0xE2FE },
340 { 0xE340, 0xE37E },
341 { 0xE380, 0xE3FE },
342 { 0xE440, 0xE47E },
343 { 0xE480, 0xE4FE },
344 { 0xE540, 0xE57E },
345 { 0xE580, 0xE5FE },
346 { 0xE640, 0xE67E },
347 { 0xE680, 0xE6FE },
348 { 0xE740, 0xE77E },
349 { 0xE780, 0xE7FE },
350 { 0xE840, 0xE87E },
351 { 0xE880, 0xE8FE },
352 { 0xE940, 0xE97E },
353 { 0xE980, 0xE9FE },
354 { 0xEA40, 0xEA7E },
355 { 0xEA80, 0xEAFE },
356 { 0xEB40, 0xEB7E },
357 { 0xEB80, 0xEBFE },
358 { 0xEC40, 0xEC7E },
359 { 0xEC80, 0xECFE },
360 { 0xED40, 0xED7E },
361 { 0xED80, 0xEDFE },
362 { 0xEE40, 0xEE7E },
363 { 0xEE80, 0xEEFE },
364 { 0xEF40, 0xEF7E },
365 { 0xEF80, 0xEFFE },
366 { 0xF040, 0xF07E },
367 { 0xF080, 0xF0FE },
368 { 0xF140, 0xF17E },
369 { 0xF180, 0xF1FE },
370 { 0xF240, 0xF27E },
371 { 0xF280, 0xF2FE },
372 { 0xF340, 0xF37E },
373 { 0xF380, 0xF3FE },
374 { 0xF440, 0xF47E },
375 { 0xF480, 0xF4FE },
376 { 0xF540, 0xF57E },
377 { 0xF580, 0xF5FE },
378 { 0xF640, 0xF67E },
379 { 0xF680, 0xF6FE },
380 { 0xF740, 0xF77E },
381 { 0xF780, 0xF7FE },
382 { 0xF840, 0xF87E },
383 { 0xF880, 0xF8A0 },
384 { 0xF940, 0xF97E },
385 { 0xF980, 0xF9A0 },
386 { 0xFA40, 0xFA7E },
387 { 0xFA80, 0xFAA0 },
388 { 0xFB40, 0xFB7E },
389 { 0xFB80, 0xFBA0 },
390 { 0xFC40, 0xFC7E },
391 { 0xFC80, 0xFCA0 },
392 { 0xFD40, 0xFD7E },
393 { 0xFD80, 0xFDA0 },
394 { 0xFE40, 0xFE4F },
395 };
396
397 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT
398 static const CharRange kEUCKRRanges[] = {
399 { 0x8141, 0x815A },
400 { 0x8161, 0x817A },
401 { 0x8181, 0x81FE },
402 { 0x8241, 0x825A },
403 { 0x8261, 0x827A },
404 { 0x8281, 0x82FE },
405 { 0x8341, 0x835A },
406 { 0x8361, 0x837A },
407 { 0x8381, 0x83FE },
408 { 0x8441, 0x845A },
409 { 0x8461, 0x847A },
410 { 0x8481, 0x84FE },
411 { 0x8541, 0x855A },
412 { 0x8561, 0x857A },
413 { 0x8581, 0x85FE },
414 { 0x8641, 0x865A },
415 { 0x8661, 0x867A },
416 { 0x8681, 0x86FE },
417 { 0x8741, 0x875A },
418 { 0x8761, 0x877A },
419 { 0x8781, 0x87FE },
420 { 0x8841, 0x885A },
421 { 0x8861, 0x887A },
422 { 0x8881, 0x88FE },
423 { 0x8941, 0x895A },
424 { 0x8961, 0x897A },
425 { 0x8981, 0x89FE },
426 { 0x8A41, 0x8A5A },
427 { 0x8A61, 0x8A7A },
428 { 0x8A81, 0x8AFE },
429 { 0x8B41, 0x8B5A },
430 { 0x8B61, 0x8B7A },
431 { 0x8B81, 0x8BFE },
432 { 0x8C41, 0x8C5A },
433 { 0x8C61, 0x8C7A },
434 { 0x8C81, 0x8CFE },
435 { 0x8D41, 0x8D5A },
436 { 0x8D61, 0x8D7A },
437 { 0x8D81, 0x8DFE },
438 { 0x8E41, 0x8E5A },
439 { 0x8E61, 0x8E7A },
440 { 0x8E81, 0x8EFE },
441 { 0x8F41, 0x8F5A },
442 { 0x8F61, 0x8F7A },
443 { 0x8F81, 0x8FFE },
444 { 0x9041, 0x905A },
445 { 0x9061, 0x907A },
446 { 0x9081, 0x90FE },
447 { 0x9141, 0x915A },
448 { 0x9161, 0x917A },
449 { 0x9181, 0x91FE },
450 { 0x9241, 0x925A },
451 { 0x9261, 0x927A },
452 { 0x9281, 0x92FE },
453 { 0x9341, 0x935A },
454 { 0x9361, 0x937A },
455 { 0x9381, 0x93FE },
456 { 0x9441, 0x945A },
457 { 0x9461, 0x947A },
458 { 0x9481, 0x94FE },
459 { 0x9541, 0x955A },
460 { 0x9561, 0x957A },
461 { 0x9581, 0x95FE },
462 { 0x9641, 0x965A },
463 { 0x9661, 0x967A },
464 { 0x9681, 0x96FE },
465 { 0x9741, 0x975A },
466 { 0x9761, 0x977A },
467 { 0x9781, 0x97FE },
468 { 0x9841, 0x985A },
469 { 0x9861, 0x987A },
470 { 0x9881, 0x98FE },
471 { 0x9941, 0x995A },
472 { 0x9961, 0x997A },
473 { 0x9981, 0x99FE },
474 { 0x9A41, 0x9A5A },
475 { 0x9A61, 0x9A7A },
476 { 0x9A81, 0x9AFE },
477 { 0x9B41, 0x9B5A },
478 { 0x9B61, 0x9B7A },
479 { 0x9B81, 0x9BFE },
480 { 0x9C41, 0x9C5A },
481 { 0x9C61, 0x9C7A },
482 { 0x9C81, 0x9CFE },
483 { 0x9D41, 0x9D5A },
484 { 0x9D61, 0x9D7A },
485 { 0x9D81, 0x9DFE },
486 { 0x9E41, 0x9E5A },
487 { 0x9E61, 0x9E7A },
488 { 0x9E81, 0x9EFE },
489 { 0x9F41, 0x9F5A },
490 { 0x9F61, 0x9F7A },
491 { 0x9F81, 0x9FFE },
492 { 0xA041, 0xA05A },
493 { 0xA061, 0xA07A },
494 { 0xA081, 0xA0FE },
495 { 0xA141, 0xA15A },
496 { 0xA161, 0xA17A },
497 { 0xA181, 0xA1FE },
498 { 0xA241, 0xA25A },
499 { 0xA261, 0xA27A },
500 { 0xA281, 0xA2E7 },
501 { 0xA341, 0xA35A },
502 { 0xA361, 0xA37A },
503 { 0xA381, 0xA3FE },
504 { 0xA441, 0xA45A },
505 { 0xA461, 0xA47A },
506 { 0xA481, 0xA4FE },
507 { 0xA541, 0xA55A },
508 { 0xA561, 0xA57A },
509 { 0xA581, 0xA5AA },
510 { 0xA5B0, 0xA5B9 },
511 { 0xA5C1, 0xA5D8 },
512 { 0xA5E1, 0xA5F8 },
513 { 0xA641, 0xA65A },
514 { 0xA661, 0xA67A },
515 { 0xA681, 0xA6E4 },
516 { 0xA741, 0xA75A },
517 { 0xA761, 0xA77A },
518 { 0xA781, 0xA7EF },
519 { 0xA841, 0xA85A },
520 { 0xA861, 0xA87A },
521 { 0xA881, 0xA8A4 },
522 { 0xA8A6, 0xA8A6 },
523 { 0xA8A8, 0xA8AF },
524 { 0xA8B1, 0xA8FE },
525 { 0xA941, 0xA95A },
526 { 0xA961, 0xA97A },
527 { 0xA981, 0xA9FE },
528 { 0xAA41, 0xAA5A },
529 { 0xAA61, 0xAA7A },
530 { 0xAA81, 0xAAF3 },
531 { 0xAB41, 0xAB5A },
532 { 0xAB61, 0xAB7A },
533 { 0xAB81, 0xABF6 },
534 { 0xAC41, 0xAC5A },
535 { 0xAC61, 0xAC7A },
536 { 0xAC81, 0xACC1 },
537 { 0xACD1, 0xACF1 },
538 { 0xAD41, 0xAD5A },
539 { 0xAD61, 0xAD7A },
540 { 0xAD81, 0xADA0 },
541 { 0xAE41, 0xAE5A },
542 { 0xAE61, 0xAE7A },
543 { 0xAE81, 0xAEA0 },
544 { 0xAF41, 0xAF5A },
545 { 0xAF61, 0xAF7A },
546 { 0xAF81, 0xAFA0 },
547 { 0xB041, 0xB05A },
548 { 0xB061, 0xB07A },
549 { 0xB081, 0xB0FE },
550 { 0xB141, 0xB15A },
551 { 0xB161, 0xB17A },
552 { 0xB181, 0xB1FE },
553 { 0xB241, 0xB25A },
554 { 0xB261, 0xB27A },
555 { 0xB281, 0xB2FE },
556 { 0xB341, 0xB35A },
557 { 0xB361, 0xB37A },
558 { 0xB381, 0xB3FE },
559 { 0xB441, 0xB45A },
560 { 0xB461, 0xB47A },
561 { 0xB481, 0xB4FE },
562 { 0xB541, 0xB55A },
563 { 0xB561, 0xB57A },
564 { 0xB581, 0xB5FE },
565 { 0xB641, 0xB65A },
566 { 0xB661, 0xB67A },
567 { 0xB681, 0xB6FE },
568 { 0xB741, 0xB75A },
569 { 0xB761, 0xB77A },
570 { 0xB781, 0xB7FE },
571 { 0xB841, 0xB85A },
572 { 0xB861, 0xB87A },
573 { 0xB881, 0xB8FE },
574 { 0xB941, 0xB95A },
575 { 0xB961, 0xB97A },
576 { 0xB981, 0xB9FE },
577 { 0xBA41, 0xBA5A },
578 { 0xBA61, 0xBA7A },
579 { 0xBA81, 0xBAFE },
580 { 0xBB41, 0xBB5A },
581 { 0xBB61, 0xBB7A },
582 { 0xBB81, 0xBBFE },
583 { 0xBC41, 0xBC5A },
584 { 0xBC61, 0xBC7A },
585 { 0xBC81, 0xBCFE },
586 { 0xBD41, 0xBD5A },
587 { 0xBD61, 0xBD7A },
588 { 0xBD81, 0xBDFE },
589 { 0xBE41, 0xBE5A },
590 { 0xBE61, 0xBE7A },
591 { 0xBE81, 0xBEFE },
592 { 0xBF41, 0xBF5A },
593 { 0xBF61, 0xBF7A },
594 { 0xBF81, 0xBFFE },
595 { 0xC041, 0xC05A },
596 { 0xC061, 0xC07A },
597 { 0xC081, 0xC0FE },
598 { 0xC141, 0xC15A },
599 { 0xC161, 0xC17A },
600 { 0xC181, 0xC1FE },
601 { 0xC241, 0xC25A },
602 { 0xC261, 0xC27A },
603 { 0xC281, 0xC2FE },
604 { 0xC341, 0xC35A },
605 { 0xC361, 0xC37A },
606 { 0xC381, 0xC3FE },
607 { 0xC441, 0xC45A },
608 { 0xC461, 0xC47A },
609 { 0xC481, 0xC4FE },
610 { 0xC541, 0xC55A },
611 { 0xC561, 0xC57A },
612 { 0xC581, 0xC5FE },
613 { 0xC641, 0xC652 },
614 { 0xC6A1, 0xC6FE },
615 { 0xC7A1, 0xC7FE },
616 { 0xC8A1, 0xC8FE },
617 { 0xCAA1, 0xCAFE },
618 { 0xCBA1, 0xCBFE },
619 { 0xCCA1, 0xCCFE },
620 { 0xCDA1, 0xCDFE },
621 { 0xCEA1, 0xCEFE },
622 { 0xCFA1, 0xCFFE },
623 { 0xD0A1, 0xD0FE },
624 { 0xD1A1, 0xD1FE },
625 { 0xD2A1, 0xD2FE },
626 { 0xD3A1, 0xD3FE },
627 { 0xD4A1, 0xD4FE },
628 { 0xD5A1, 0xD5FE },
629 { 0xD6A1, 0xD6FE },
630 { 0xD7A1, 0xD7FE },
631 { 0xD8A1, 0xD8FE },
632 { 0xD9A1, 0xD9FE },
633 { 0xDAA1, 0xDAFE },
634 { 0xDBA1, 0xDBFE },
635 { 0xDCA1, 0xDCFE },
636 { 0xDDA1, 0xDDFE },
637 { 0xDEA1, 0xDEFE },
638 { 0xDFA1, 0xDFFE },
639 { 0xE0A1, 0xE0FE },
640 { 0xE1A1, 0xE1FE },
641 { 0xE2A1, 0xE2FE },
642 { 0xE3A1, 0xE3FE },
643 { 0xE4A1, 0xE4FE },
644 { 0xE5A1, 0xE5FE },
645 { 0xE6A1, 0xE6FE },
646 { 0xE7A1, 0xE7FE },
647 { 0xE8A1, 0xE8FE },
648 { 0xE9A1, 0xE9FE },
649 { 0xEAA1, 0xEAFE },
650 { 0xEBA1, 0xEBFE },
651 { 0xECA1, 0xECFE },
652 { 0xEDA1, 0xEDFE },
653 { 0xEEA1, 0xEEFE },
654 { 0xEFA1, 0xEFFE },
655 { 0xF0A1, 0xF0FE },
656 { 0xF1A1, 0xF1FE },
657 { 0xF2A1, 0xF2FE },
658 { 0xF3A1, 0xF3FE },
659 { 0xF4A1, 0xF4FE },
660 { 0xF5A1, 0xF5FE },
661 { 0xF6A1, 0xF6FE },
662 { 0xF7A1, 0xF7FE },
663 { 0xF8A1, 0xF8FE },
664 { 0xF9A1, 0xF9FE },
665 { 0xFAA1, 0xFAFE },
666 { 0xFBA1, 0xFBFE },
667 { 0xFCA1, 0xFCFE },
668 { 0xFDA1, 0xFDFE },
669 };
670
671 // generated from http://unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT
672 static const CharRange kBig5Ranges[] = {
673 { 0xA140, 0xA17E },
674 { 0xA1A1, 0xA1FE },
675 { 0xA240, 0xA27E },
676 { 0xA2A1, 0xA2FE },
677 { 0xA340, 0xA37E },
678 { 0xA3A1, 0xA3BF },
679 { 0xA3E1, 0xA3E1 },
680 { 0xA440, 0xA47E },
681 { 0xA4A1, 0xA4FE },
682 { 0xA540, 0xA57E },
683 { 0xA5A1, 0xA5FE },
684 { 0xA640, 0xA67E },
685 { 0xA6A1, 0xA6FE },
686 { 0xA740, 0xA77E },
687 { 0xA7A1, 0xA7FE },
688 { 0xA840, 0xA87E },
689 { 0xA8A1, 0xA8FE },
690 { 0xA940, 0xA97E },
691 { 0xA9A1, 0xA9FE },
692 { 0xAA40, 0xAA7E },
693 { 0xAAA1, 0xAAFE },
694 { 0xAB40, 0xAB7E },
695 { 0xABA1, 0xABFE },
696 { 0xAC40, 0xAC7E },
697 { 0xACA1, 0xACFE },
698 { 0xAD40, 0xAD7E },
699 { 0xADA1, 0xADFE },
700 { 0xAE40, 0xAE7E },
701 { 0xAEA1, 0xAEFE },
702 { 0xAF40, 0xAF7E },
703 { 0xAFA1, 0xAFFE },
704 { 0xB040, 0xB07E },
705 { 0xB0A1, 0xB0FE },
706 { 0xB140, 0xB17E },
707 { 0xB1A1, 0xB1FE },
708 { 0xB240, 0xB27E },
709 { 0xB2A1, 0xB2FE },
710 { 0xB340, 0xB37E },
711 { 0xB3A1, 0xB3FE },
712 { 0xB440, 0xB47E },
713 { 0xB4A1, 0xB4FE },
714 { 0xB540, 0xB57E },
715 { 0xB5A1, 0xB5FE },
716 { 0xB640, 0xB67E },
717 { 0xB6A1, 0xB6FE },
718 { 0xB740, 0xB77E },
719 { 0xB7A1, 0xB7FE },
720 { 0xB840, 0xB87E },
721 { 0xB8A1, 0xB8FE },
722 { 0xB940, 0xB97E },
723 { 0xB9A1, 0xB9FE },
724 { 0xBA40, 0xBA7E },
725 { 0xBAA1, 0xBAFE },
726 { 0xBB40, 0xBB7E },
727 { 0xBBA1, 0xBBFE },
728 { 0xBC40, 0xBC7E },
729 { 0xBCA1, 0xBCFE },
730 { 0xBD40, 0xBD7E },
731 { 0xBDA1, 0xBDFE },
732 { 0xBE40, 0xBE7E },
733 { 0xBEA1, 0xBEFE },
734 { 0xBF40, 0xBF7E },
735 { 0xBFA1, 0xBFFE },
736 { 0xC040, 0xC07E },
737 { 0xC0A1, 0xC0FE },
738 { 0xC140, 0xC17E },
739 { 0xC1A1, 0xC1FE },
740 { 0xC240, 0xC27E },
741 { 0xC2A1, 0xC2FE },
742 { 0xC340, 0xC37E },
743 { 0xC3A1, 0xC3FE },
744 { 0xC440, 0xC47E },
745 { 0xC4A1, 0xC4FE },
746 { 0xC540, 0xC57E },
747 { 0xC5A1, 0xC5FE },
748 { 0xC640, 0xC67E },
749 { 0xC940, 0xC97E },
750 { 0xC9A1, 0xC9FE },
751 { 0xCA40, 0xCA7E },
752 { 0xCAA1, 0xCAFE },
753 { 0xCB40, 0xCB7E },
754 { 0xCBA1, 0xCBFE },
755 { 0xCC40, 0xCC7E },
756 { 0xCCA1, 0xCCFE },
757 { 0xCD40, 0xCD7E },
758 { 0xCDA1, 0xCDFE },
759 { 0xCE40, 0xCE7E },
760 { 0xCEA1, 0xCEFE },
761 { 0xCF40, 0xCF7E },
762 { 0xCFA1, 0xCFFE },
763 { 0xD040, 0xD07E },
764 { 0xD0A1, 0xD0FE },
765 { 0xD140, 0xD17E },
766 { 0xD1A1, 0xD1FE },
767 { 0xD240, 0xD27E },
768 { 0xD2A1, 0xD2FE },
769 { 0xD340, 0xD37E },
770 { 0xD3A1, 0xD3FE },
771 { 0xD440, 0xD47E },
772 { 0xD4A1, 0xD4FE },
773 { 0xD540, 0xD57E },
774 { 0xD5A1, 0xD5FE },
775 { 0xD640, 0xD67E },
776 { 0xD6A1, 0xD6FE },
777 { 0xD740, 0xD77E },
778 { 0xD7A1, 0xD7FE },
779 { 0xD840, 0xD87E },
780 { 0xD8A1, 0xD8FE },
781 { 0xD940, 0xD97E },
782 { 0xD9A1, 0xD9FE },
783 { 0xDA40, 0xDA7E },
784 { 0xDAA1, 0xDAFE },
785 { 0xDB40, 0xDB7E },
786 { 0xDBA1, 0xDBFE },
787 { 0xDC40, 0xDC7E },
788 { 0xDCA1, 0xDCFE },
789 { 0xDD40, 0xDD7E },
790 { 0xDDA1, 0xDDFE },
791 { 0xDE40, 0xDE7E },
792 { 0xDEA1, 0xDEFE },
793 { 0xDF40, 0xDF7E },
794 { 0xDFA1, 0xDFFE },
795 { 0xE040, 0xE07E },
796 { 0xE0A1, 0xE0FE },
797 { 0xE140, 0xE17E },
798 { 0xE1A1, 0xE1FE },
799 { 0xE240, 0xE27E },
800 { 0xE2A1, 0xE2FE },
801 { 0xE340, 0xE37E },
802 { 0xE3A1, 0xE3FE },
803 { 0xE440, 0xE47E },
804 { 0xE4A1, 0xE4FE },
805 { 0xE540, 0xE57E },
806 { 0xE5A1, 0xE5FE },
807 { 0xE640, 0xE67E },
808 { 0xE6A1, 0xE6FE },
809 { 0xE740, 0xE77E },
810 { 0xE7A1, 0xE7FE },
811 { 0xE840, 0xE87E },
812 { 0xE8A1, 0xE8FE },
813 { 0xE940, 0xE97E },
814 { 0xE9A1, 0xE9FE },
815 { 0xEA40, 0xEA7E },
816 { 0xEAA1, 0xEAFE },
817 { 0xEB40, 0xEB7E },
818 { 0xEBA1, 0xEBFE },
819 { 0xEC40, 0xEC7E },
820 { 0xECA1, 0xECFE },
821 { 0xED40, 0xED7E },
822 { 0xEDA1, 0xEDFE },
823 { 0xEE40, 0xEE7E },
824 { 0xEEA1, 0xEEFE },
825 { 0xEF40, 0xEF7E },
826 { 0xEFA1, 0xEFFE },
827 { 0xF040, 0xF07E },
828 { 0xF0A1, 0xF0FE },
829 { 0xF140, 0xF17E },
830 { 0xF1A1, 0xF1FE },
831 { 0xF240, 0xF27E },
832 { 0xF2A1, 0xF2FE },
833 { 0xF340, 0xF37E },
834 { 0xF3A1, 0xF3FE },
835 { 0xF440, 0xF47E },
836 { 0xF4A1, 0xF4FE },
837 { 0xF540, 0xF57E },
838 { 0xF5A1, 0xF5FE },
839 { 0xF640, 0xF67E },
840 { 0xF6A1, 0xF6FE },
841 { 0xF740, 0xF77E },
842 { 0xF7A1, 0xF7FE },
843 { 0xF840, 0xF87E },
844 { 0xF8A1, 0xF8FE },
845 { 0xF940, 0xF97E },
846 { 0xF9A1, 0xF9FE },
847 };
848
charMatchesEncoding(int ch,const CharRange * encodingRanges,int rangeCount)849 static bool charMatchesEncoding(int ch, const CharRange* encodingRanges, int rangeCount) {
850 // Use binary search to see if the character is contained in the encoding
851 int low = 0;
852 int high = rangeCount;
853
854 while (low < high) {
855 int i = (low + high) / 2;
856 const CharRange* range = &encodingRanges[i];
857 if (ch >= range->first && ch <= range->last)
858 return true;
859 if (ch > range->last)
860 low = i + 1;
861 else
862 high = i;
863 }
864
865 return false;
866 }
867
findPossibleEncodings(int ch)868 extern uint32_t findPossibleEncodings(int ch)
869 {
870 // ASCII matches everything
871 if (ch < 256) return kEncodingAll;
872
873 int result = kEncodingNone;
874
875 if (charMatchesEncoding(ch, kShiftJISRanges, ARRAY_SIZE(kShiftJISRanges)))
876 result |= kEncodingShiftJIS;
877 if (charMatchesEncoding(ch, kGBKRanges, ARRAY_SIZE(kGBKRanges)))
878 result |= kEncodingGBK;
879 if (charMatchesEncoding(ch, kBig5Ranges, ARRAY_SIZE(kBig5Ranges)))
880 result |= kEncodingBig5;
881 if (charMatchesEncoding(ch, kEUCKRRanges, ARRAY_SIZE(kEUCKRRanges)))
882 result |= kEncodingEUCKR;
883
884 return result;
885 }
886