1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1996-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * Provides functionality for mapping between
10 * LCID and Posix IDs or ICU locale to codepage
11 *
12 * Note: All classes and code in this file are
13 * intended for internal use only.
14 *
15 * Methods of interest:
16 * unsigned long convertToLCID(const char*);
17 * const char* convertToPosix(unsigned long);
18 *
19 * Kathleen Wilson, 4/30/96
20 *
21 * Date Name Description
22 * 3/11/97 aliu Fixed off-by-one bug in assignment operator. Added
23 * setId() method and safety check against
24 * MAX_ID_LENGTH.
25 * 04/23/99 stephen Added C wrapper for convertToPosix.
26 * 09/18/00 george Removed the memory leaks.
27 * 08/23/01 george Convert to C
28 */
29
30 #include "locmap.h"
31 #include "cstring.h"
32 #include "cmemory.h"
33
34 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
35 /*
36 * TODO: It seems like we should widen this to
37 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
38 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
39 * but those use gcc and won't have defined(_MSC_VER).
40 * We might need to #include some Windows header and test for some version macro from there.
41 * Or call some Windows function and see what it returns.
42 */
43 #define USE_WINDOWS_LOCALE_API
44 #endif
45
46 #ifdef USE_WINDOWS_LOCALE_API
47 #include <windows.h>
48 #include <winnls.h>
49 #endif
50
51 /*
52 * Note:
53 * The mapping from Win32 locale ID numbers to POSIX locale strings should
54 * be the faster one.
55 *
56 * Many LCID values come from winnt.h
57 * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
58 */
59
60 /*
61 ////////////////////////////////////////////////
62 //
63 // Internal Classes for LCID <--> POSIX Mapping
64 //
65 /////////////////////////////////////////////////
66 */
67
68 typedef struct ILcidPosixElement
69 {
70 const uint32_t hostID;
71 const char * const posixID;
72 } ILcidPosixElement;
73
74 typedef struct ILcidPosixMap
75 {
76 const uint32_t numRegions;
77 const struct ILcidPosixElement* const regionMaps;
78 } ILcidPosixMap;
79
80
81 /*
82 /////////////////////////////////////////////////
83 //
84 // Easy macros to make the LCID <--> POSIX Mapping
85 //
86 /////////////////////////////////////////////////
87 */
88
89 /**
90 * The standard one language/one country mapping for LCID.
91 * The first element must be the language, and the following
92 * elements are the language with the country.
93 * @param hostID LCID in host format such as 0x044d
94 * @param languageID posix ID of just the language such as 'de'
95 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
96 */
97 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
98 static const ILcidPosixElement locmap_ ## languageID [] = { \
99 {LANGUAGE_LCID(hostID), #languageID}, /* parent locale */ \
100 {hostID, #posixID}, \
101 };
102
103 /**
104 * Define a subtable by ID
105 * @param id the POSIX ID, either a language or language_TERRITORY
106 */
107 #define ILCID_POSIX_SUBTABLE(id) \
108 static const ILcidPosixElement locmap_ ## id [] =
109
110
111 /**
112 * Create the map for the posixID. This macro supposes that the language string
113 * name is the same as the global variable name, and that the first element
114 * in the ILcidPosixElement is just the language.
115 * @param _posixID the full POSIX ID for this entry.
116 */
117 #define ILCID_POSIX_MAP(_posixID) \
118 {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
119
120 /*
121 ////////////////////////////////////////////
122 //
123 // Create the table of LCID to POSIX Mapping
124 // None of it should be dynamically created.
125 //
126 // Keep static locale variables inside the function so that
127 // it can be created properly during static init.
128 //
129 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
130 // Microsoft is moving away from LCID in favor of locale name as of Vista. This table needs to be
131 // maintained for support of older Windows version.
132 // Update: Windows 7 (091130)
133 //
134 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
135 // @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
136 // called from uloc_getLCID(), keywords other than collation are already removed. If we really need
137 // to support other keywords in this mapping data, we must update the implementation.
138 ////////////////////////////////////////////
139 */
140
141 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
142
ILCID_POSIX_SUBTABLE(ar)143 ILCID_POSIX_SUBTABLE(ar) {
144 {0x01, "ar"},
145 {0x3801, "ar_AE"},
146 {0x3c01, "ar_BH"},
147 {0x1401, "ar_DZ"},
148 {0x0c01, "ar_EG"},
149 {0x0801, "ar_IQ"},
150 {0x2c01, "ar_JO"},
151 {0x3401, "ar_KW"},
152 {0x3001, "ar_LB"},
153 {0x1001, "ar_LY"},
154 {0x1801, "ar_MA"},
155 {0x1801, "ar_MO"},
156 {0x2001, "ar_OM"},
157 {0x4001, "ar_QA"},
158 {0x0401, "ar_SA"},
159 {0x2801, "ar_SY"},
160 {0x1c01, "ar_TN"},
161 {0x2401, "ar_YE"}
162 };
163
164 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
165 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
166 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
167
ILCID_POSIX_SUBTABLE(az)168 ILCID_POSIX_SUBTABLE(az) {
169 {0x2c, "az"},
170 {0x082c, "az_Cyrl_AZ"}, /* Cyrillic based */
171 {0x742c, "az_Cyrl"}, /* Cyrillic based */
172 {0x042c, "az_Latn_AZ"}, /* Latin based */
173 {0x782c, "az_Latn"}, /* Latin based */
174 {0x042c, "az_AZ"} /* Latin based */
175 };
176
177 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
178 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
179
180 /*ILCID_POSIX_SUBTABLE(ber) {
181 {0x5f, "ber"},
182 {0x045f, "ber_Arab_DZ"},
183 {0x045f, "ber_Arab"},
184 {0x085f, "ber_Latn_DZ"},
185 {0x085f, "ber_Latn"}
186 };*/
187
188 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
189
190 ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
191
ILCID_POSIX_SUBTABLE(bn)192 ILCID_POSIX_SUBTABLE(bn) {
193 {0x45, "bn"},
194 {0x0845, "bn_BD"},
195 {0x0445, "bn_IN"}
196 };
197
ILCID_POSIX_SUBTABLE(bo)198 ILCID_POSIX_SUBTABLE(bo) {
199 {0x51, "bo"},
200 {0x0851, "bo_BT"},
201 {0x0451, "bo_CN"},
202 {0x0c51, "dz_BT"}
203 };
204
205 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
206
ILCID_POSIX_SUBTABLE(ca)207 ILCID_POSIX_SUBTABLE(ca) {
208 {0x03, "ca"},
209 {0x0403, "ca_ES"},
210 {0x0803, "ca_ES_VALENCIA"}
211 };
212
213 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
214 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
215
ILCID_POSIX_SUBTABLE(ckb)216 ILCID_POSIX_SUBTABLE(ckb) {
217 {0x92, "ckb"},
218 {0x7c92, "ckb_Arab"},
219 {0x0492, "ckb_Arab_IQ"}
220 };
221
222 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
223 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
224
225 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
226 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
227
ILCID_POSIX_SUBTABLE(de)228 ILCID_POSIX_SUBTABLE(de) {
229 {0x07, "de"},
230 {0x0c07, "de_AT"},
231 {0x0807, "de_CH"},
232 {0x0407, "de_DE"},
233 {0x1407, "de_LI"},
234 {0x1007, "de_LU"},
235 {0x10407,"de_DE@collation=phonebook"}, /*This is really de_DE_PHONEBOOK on Windows*/
236 {0x10407,"de@collation=phonebook"} /*This is really de_DE_PHONEBOOK on Windows*/
237 };
238
239 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
240 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
241
ILCID_POSIX_SUBTABLE(en)242 ILCID_POSIX_SUBTABLE(en) {
243 {0x09, "en"},
244 {0x0c09, "en_AU"},
245 {0x2809, "en_BZ"},
246 {0x1009, "en_CA"},
247 {0x0809, "en_GB"},
248 {0x3c09, "en_HK"},
249 {0x3809, "en_ID"},
250 {0x1809, "en_IE"},
251 {0x4009, "en_IN"},
252 {0x2009, "en_JM"},
253 {0x4409, "en_MY"},
254 {0x1409, "en_NZ"},
255 {0x3409, "en_PH"},
256 {0x4809, "en_SG"},
257 {0x2C09, "en_TT"},
258 {0x0409, "en_US"},
259 {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
260 {0x2409, "en_VI"}, /* Virgin Islands AKA Caribbean Islands (en_CB). */
261 {0x1c09, "en_ZA"},
262 {0x3009, "en_ZW"},
263 {0x2409, "en_029"},
264 {0x0409, "en_AS"}, /* Alias for en_US. Leave last. */
265 {0x0409, "en_GU"}, /* Alias for en_US. Leave last. */
266 {0x0409, "en_MH"}, /* Alias for en_US. Leave last. */
267 {0x0409, "en_MP"}, /* Alias for en_US. Leave last. */
268 {0x0409, "en_UM"} /* Alias for en_US. Leave last. */
269 };
270
ILCID_POSIX_SUBTABLE(en_US_POSIX)271 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
272 {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
273 };
274
ILCID_POSIX_SUBTABLE(es)275 ILCID_POSIX_SUBTABLE(es) {
276 {0x0a, "es"},
277 {0x2c0a, "es_AR"},
278 {0x400a, "es_BO"},
279 {0x340a, "es_CL"},
280 {0x240a, "es_CO"},
281 {0x140a, "es_CR"},
282 {0x5c0a, "es_CU"},
283 {0x1c0a, "es_DO"},
284 {0x300a, "es_EC"},
285 {0x0c0a, "es_ES"}, /*Modern sort.*/
286 {0x100a, "es_GT"},
287 {0x480a, "es_HN"},
288 {0x080a, "es_MX"},
289 {0x4c0a, "es_NI"},
290 {0x180a, "es_PA"},
291 {0x280a, "es_PE"},
292 {0x500a, "es_PR"},
293 {0x3c0a, "es_PY"},
294 {0x440a, "es_SV"},
295 {0x540a, "es_US"},
296 {0x380a, "es_UY"},
297 {0x200a, "es_VE"},
298 {0x580a, "es_419"},
299 {0x040a, "es_ES@collation=traditional"},
300 {0x040a, "es@collation=traditional"}
301 };
302
303 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
304 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
305
306 /* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa)307 ILCID_POSIX_SUBTABLE(fa) {
308 {0x29, "fa"},
309 {0x0429, "fa_IR"}, /* Persian/Farsi (Iran) */
310 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
311 };
312
313 /* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF)314 ILCID_POSIX_SUBTABLE(fa_AF) {
315 {0x8c, "fa_AF"}, /* Persian/Dari (Afghanistan) */
316 {0x048c, "fa_AF"} /* Persian/Dari (Afghanistan) */
317 };
318
ILCID_POSIX_SUBTABLE(ff)319 ILCID_POSIX_SUBTABLE(ff) {
320 {0x67, "ff"},
321 {0x7c67, "ff_Latn"},
322 {0x0867, "ff_Latn_SN"},
323 {0x0467, "ff_NG"}
324 };
325
326 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
327 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
328 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
329
ILCID_POSIX_SUBTABLE(fr)330 ILCID_POSIX_SUBTABLE(fr) {
331 {0x0c, "fr"},
332 {0x080c, "fr_BE"},
333 {0x0c0c, "fr_CA"},
334 {0x240c, "fr_CD"},
335 {0x240c, "fr_CG"},
336 {0x100c, "fr_CH"},
337 {0x300c, "fr_CI"},
338 {0x2c0c, "fr_CM"},
339 {0x040c, "fr_FR"},
340 {0x3c0c, "fr_HT"},
341 {0x140c, "fr_LU"},
342 {0x380c, "fr_MA"},
343 {0x180c, "fr_MC"},
344 {0x340c, "fr_ML"},
345 {0x200c, "fr_RE"},
346 {0x280c, "fr_SN"},
347 {0xe40c, "fr_015"},
348 {0x1c0c, "fr_029"}
349 };
350
351 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
352
353 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
354
ILCID_POSIX_SUBTABLE(ga)355 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
356 {0x3c, "ga"},
357 {0x083c, "ga_IE"},
358 {0x043c, "gd_GB"}
359 };
360
ILCID_POSIX_SUBTABLE(gd)361 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
362 {0x91, "gd"},
363 {0x0491, "gd_GB"}
364 };
365
366 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
367 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
368 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
369 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
370
ILCID_POSIX_SUBTABLE(ha)371 ILCID_POSIX_SUBTABLE(ha) {
372 {0x68, "ha"},
373 {0x7c68, "ha_Latn"},
374 {0x0468, "ha_Latn_NG"},
375 };
376
377 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
378 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
379 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
380
381 /* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr)382 ILCID_POSIX_SUBTABLE(hr) {
383 {0x1a, "hr"},
384 {0x141a, "bs_Latn_BA"}, /* Bosnian, Bosnia and Herzegovina */
385 {0x681a, "bs_Latn"}, /* Bosnian, Bosnia and Herzegovina */
386 {0x141a, "bs_BA"}, /* Bosnian, Bosnia and Herzegovina */
387 {0x781a, "bs"}, /* Bosnian */
388 {0x201a, "bs_Cyrl_BA"}, /* Bosnian, Bosnia and Herzegovina */
389 {0x641a, "bs_Cyrl"}, /* Bosnian, Bosnia and Herzegovina */
390 {0x101a, "hr_BA"}, /* Croatian in Bosnia */
391 {0x041a, "hr_HR"}, /* Croatian*/
392 {0x2c1a, "sr_Latn_ME"},
393 {0x241a, "sr_Latn_RS"},
394 {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
395 {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
396 {0x701a, "sr_Latn"}, /* It's 0x1a or 0x081a, pick one to make the test program happy. */
397 {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
398 {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
399 {0x301a, "sr_Cyrl_ME"},
400 {0x281a, "sr_Cyrl_RS"},
401 {0x6c1a, "sr_Cyrl"}, /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
402 {0x7c1a, "sr"} /* In CLDR sr is sr_Cyrl. */
403 };
404
ILCID_POSIX_SUBTABLE(hsb)405 ILCID_POSIX_SUBTABLE(hsb) {
406 {0x2E, "hsb"},
407 {0x042E, "hsb_DE"},
408 {0x082E, "dsb_DE"},
409 {0x7C2E, "dsb"},
410 };
411
412 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
413 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
414 ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
415 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
416 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
417 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
418 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
419
ILCID_POSIX_SUBTABLE(it)420 ILCID_POSIX_SUBTABLE(it) {
421 {0x10, "it"},
422 {0x0810, "it_CH"},
423 {0x0410, "it_IT"}
424 };
425
ILCID_POSIX_SUBTABLE(iu)426 ILCID_POSIX_SUBTABLE(iu) {
427 {0x5d, "iu"},
428 {0x045d, "iu_Cans_CA"},
429 {0x785d, "iu_Cans"},
430 {0x085d, "iu_Latn_CA"},
431 {0x7c5d, "iu_Latn"}
432 };
433
434 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL) /*Left in for compatibility*/
435 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
436 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
437 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
438 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
439 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
440 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
441
ILCID_POSIX_SUBTABLE(ko)442 ILCID_POSIX_SUBTABLE(ko) {
443 {0x12, "ko"},
444 {0x0812, "ko_KP"},
445 {0x0412, "ko_KR"}
446 };
447
448 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
449 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr, kr_NG)
450
ILCID_POSIX_SUBTABLE(ks)451 ILCID_POSIX_SUBTABLE(ks) { /* We could add PK and CN too */
452 {0x60, "ks"},
453 {0x0860, "ks_IN"}, /* Documentation doesn't mention script */
454 {0x0460, "ks_Arab_IN"},
455 {0x0860, "ks_Deva_IN"}
456 };
457
458 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG) /* Kyrgyz is spoken in Kyrgyzstan */
459 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT) /* TODO: Verify the country */
460 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
461 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
462 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
463 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
464 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
465 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
466 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
467
ILCID_POSIX_SUBTABLE(mn)468 ILCID_POSIX_SUBTABLE(mn) {
469 {0x50, "mn"},
470 {0x0450, "mn_MN"},
471 {0x7c50, "mn_Mong"},
472 {0x0850, "mn_Mong_CN"},
473 {0x0850, "mn_CN"},
474 {0x7850, "mn_Cyrl"},
475 {0x0c50, "mn_Mong_MN"}
476 };
477
478 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
479 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
480 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
481
ILCID_POSIX_SUBTABLE(ms)482 ILCID_POSIX_SUBTABLE(ms) {
483 {0x3e, "ms"},
484 {0x083e, "ms_BN"}, /* Brunei Darussalam*/
485 {0x043e, "ms_MY"} /* Malaysia*/
486 };
487
488 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
489 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
490
ILCID_POSIX_SUBTABLE(ne)491 ILCID_POSIX_SUBTABLE(ne) {
492 {0x61, "ne"},
493 {0x0861, "ne_IN"}, /* India*/
494 {0x0461, "ne_NP"} /* Nepal*/
495 };
496
ILCID_POSIX_SUBTABLE(nl)497 ILCID_POSIX_SUBTABLE(nl) {
498 {0x13, "nl"},
499 {0x0813, "nl_BE"},
500 {0x0413, "nl_NL"}
501 };
502
503 /* The "no" locale split into nb and nn. By default in ICU, "no" is nb.*/
ILCID_POSIX_SUBTABLE(no)504 ILCID_POSIX_SUBTABLE(no) {
505 {0x14, "no"}, /* really nb_NO */
506 {0x7c14, "nb"}, /* really nb */
507 {0x0414, "nb_NO"}, /* really nb_NO. Keep first in the 414 list. */
508 {0x0414, "no_NO"}, /* really nb_NO */
509 {0x0814, "nn_NO"}, /* really nn_NO. Keep first in the 814 list. */
510 {0x7814, "nn"}, /* It's 0x14 or 0x814, pick one to make the test program happy. */
511 {0x0814, "no_NO_NY"}/* really nn_NO */
512 };
513
514 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA) /* TODO: Verify the ISO-639 code */
515 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
516
ILCID_POSIX_SUBTABLE(om)517 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
518 {0x72, "om"},
519 {0x0472, "om_ET"},
520 {0x0472, "gaz_ET"}
521 };
522
523 /* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN)524 ILCID_POSIX_SUBTABLE(or_IN) {
525 {0x48, "or"},
526 {0x0448, "or_IN"},
527 };
528
529
ILCID_POSIX_SUBTABLE(pa)530 ILCID_POSIX_SUBTABLE(pa) {
531 {0x46, "pa"},
532 {0x0446, "pa_IN"},
533 {0x0846, "pa_PK"},
534 {0x0846, "pa_Arab_PK"}
535 };
536
537 ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
538 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
539 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
540
ILCID_POSIX_SUBTABLE(pt)541 ILCID_POSIX_SUBTABLE(pt) {
542 {0x16, "pt"},
543 {0x0416, "pt_BR"},
544 {0x0816, "pt_PT"}
545 };
546
ILCID_POSIX_SUBTABLE(qu)547 ILCID_POSIX_SUBTABLE(qu) {
548 {0x6b, "qu"},
549 {0x046b, "qu_BO"},
550 {0x086b, "qu_EC"},
551 {0x0C6b, "qu_PE"},
552 {0x046b, "quz_BO"},
553 {0x086b, "quz_EC"},
554 {0x0C6b, "quz_PE"}
555 };
556
ILCID_POSIX_SUBTABLE(quc)557 ILCID_POSIX_SUBTABLE(quc) {
558 {0x93, "quc"},
559 {0x0493, "quc_CO"},
560 /*
561 "quc_Latn_GT" is an exceptional case. Language ID of "quc"
562 is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
563 under the group of "qut". "qut" is a retired ISO 639-3 language
564 code for West Central Quiche, and merged to "quc".
565 It looks Windows previously reserved "qut" for K'iche', but,
566 decided to use "quc" when adding a locale for K'iche' (Guatemala).
567
568 This data structure used here assumes language ID bits in
569 LCID is unique for alphabetic language code. But this is not true
570 for "quc_Latn_GT". If we don't have the data below, LCID look up
571 by alphabetic locale ID (POSIX) will fail. The same entry is found
572 under "qut" below, which is required for reverse look up.
573 */
574 {0x0486, "quc_Latn_GT"}
575 };
576
ILCID_POSIX_SUBTABLE(qut)577 ILCID_POSIX_SUBTABLE(qut) {
578 {0x86, "qut"},
579 {0x0486, "qut_GT"},
580 /*
581 See the note in "quc" above.
582 */
583 {0x0486, "quc_Latn_GT"}
584 };
585
586 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
587
ILCID_POSIX_SUBTABLE(ro)588 ILCID_POSIX_SUBTABLE(ro) {
589 {0x18, "ro"},
590 {0x0418, "ro_RO"},
591 {0x0818, "ro_MD"}
592 };
593
ILCID_POSIX_SUBTABLE(root)594 ILCID_POSIX_SUBTABLE(root) {
595 {0x00, "root"}
596 };
597
ILCID_POSIX_SUBTABLE(ru)598 ILCID_POSIX_SUBTABLE(ru) {
599 {0x19, "ru"},
600 {0x0419, "ru_RU"},
601 {0x0819, "ru_MD"}
602 };
603
604 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
605 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
606 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
607
ILCID_POSIX_SUBTABLE(sd)608 ILCID_POSIX_SUBTABLE(sd) {
609 {0x59, "sd"},
610 {0x0459, "sd_IN"},
611 {0x0459, "sd_Deva_IN"},
612 {0x0859, "sd_PK"}
613 };
614
ILCID_POSIX_SUBTABLE(se)615 ILCID_POSIX_SUBTABLE(se) {
616 {0x3b, "se"},
617 {0x0c3b, "se_FI"},
618 {0x043b, "se_NO"},
619 {0x083b, "se_SE"},
620 {0x783b, "sma"},
621 {0x183b, "sma_NO"},
622 {0x1c3b, "sma_SE"},
623 {0x7c3b, "smj"},
624 {0x703b, "smn"},
625 {0x743b, "sms"},
626 {0x103b, "smj_NO"},
627 {0x143b, "smj_SE"},
628 {0x243b, "smn_FI"},
629 {0x203b, "sms_FI"},
630 };
631
632 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
633 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
634 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
635
ILCID_POSIX_SUBTABLE(so)636 ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
637 {0x77, "so"},
638 {0x0477, "so_ET"},
639 {0x0477, "so_SO"}
640 };
641
642 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
643 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
644
ILCID_POSIX_SUBTABLE(sv)645 ILCID_POSIX_SUBTABLE(sv) {
646 {0x1d, "sv"},
647 {0x081d, "sv_FI"},
648 {0x041d, "sv_SE"}
649 };
650
651 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
652 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
653
ILCID_POSIX_SUBTABLE(ta)654 ILCID_POSIX_SUBTABLE(ta) {
655 {0x49, "ta"},
656 {0x0449, "ta_IN"},
657 {0x0849, "ta_LK"}
658 };
659
660 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
661
662 /* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg)663 ILCID_POSIX_SUBTABLE(tg) {
664 {0x28, "tg"},
665 {0x7c28, "tg_Cyrl"},
666 {0x0428, "tg_Cyrl_TJ"}
667 };
668
669 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
670
ILCID_POSIX_SUBTABLE(ti)671 ILCID_POSIX_SUBTABLE(ti) {
672 {0x73, "ti"},
673 {0x0873, "ti_ER"},
674 {0x0473, "ti_ET"}
675 };
676
677 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
678
ILCID_POSIX_SUBTABLE(tn)679 ILCID_POSIX_SUBTABLE(tn) {
680 {0x32, "tn"},
681 {0x0832, "tn_BW"},
682 {0x0432, "tn_ZA"}
683 };
684
685 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
686 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
687 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
688
ILCID_POSIX_SUBTABLE(tzm)689 ILCID_POSIX_SUBTABLE(tzm) {
690 {0x5f, "tzm"},
691 {0x7c5f, "tzm_Latn"},
692 {0x085f, "tzm_Latn_DZ"},
693 {0x105f, "tzm_Tfng_MA"},
694 {0x045f, "tzm_Arab_MA"},
695 {0x045f, "tmz"}
696 };
697
ILCID_POSIX_SUBTABLE(ug)698 ILCID_POSIX_SUBTABLE(ug) {
699 {0x80, "ug"},
700 {0x0480, "ug_CN"},
701 {0x0480, "ug_Arab_CN"}
702 };
703
704 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
705
ILCID_POSIX_SUBTABLE(ur)706 ILCID_POSIX_SUBTABLE(ur) {
707 {0x20, "ur"},
708 {0x0820, "ur_IN"},
709 {0x0420, "ur_PK"}
710 };
711
ILCID_POSIX_SUBTABLE(uz)712 ILCID_POSIX_SUBTABLE(uz) {
713 {0x43, "uz"},
714 {0x0843, "uz_Cyrl_UZ"}, /* Cyrillic based */
715 {0x7843, "uz_Cyrl"}, /* Cyrillic based */
716 {0x0843, "uz_UZ"}, /* Cyrillic based */
717 {0x0443, "uz_Latn_UZ"}, /* Latin based */
718 {0x7c43, "uz_Latn"} /* Latin based */
719 };
720
ILCID_POSIX_SUBTABLE(ve)721 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
722 {0x33, "ve"},
723 {0x0433, "ve_ZA"},
724 {0x0433, "ven_ZA"}
725 };
726
727 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
728 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
729 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
730 ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
731 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
732
ILCID_POSIX_SUBTABLE(zh)733 ILCID_POSIX_SUBTABLE(zh) {
734 {0x0004, "zh_Hans"},
735 {0x7804, "zh"},
736 {0x0804, "zh_CN"},
737 {0x0804, "zh_Hans_CN"},
738 {0x0c04, "zh_Hant_HK"},
739 {0x0c04, "zh_HK"},
740 {0x1404, "zh_Hant_MO"},
741 {0x1404, "zh_MO"},
742 {0x1004, "zh_Hans_SG"},
743 {0x1004, "zh_SG"},
744 {0x0404, "zh_Hant_TW"},
745 {0x7c04, "zh_Hant"},
746 {0x0404, "zh_TW"},
747 {0x30404,"zh_Hant_TW"}, /* Bopomofo order */
748 {0x30404,"zh_TW"}, /* Bopomofo order */
749 {0x20004,"zh@collation=stroke"},
750 {0x20404,"zh_Hant@collation=stroke"},
751 {0x20404,"zh_Hant_TW@collation=stroke"},
752 {0x20404,"zh_TW@collation=stroke"},
753 {0x20804,"zh_Hans@collation=stroke"},
754 {0x20804,"zh_Hans_CN@collation=stroke"},
755 {0x20804,"zh_CN@collation=stroke"}
756 };
757
758 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
759
760 /* This must be static and grouped by LCID. */
761 static const ILcidPosixMap gPosixIDmap[] = {
762 ILCID_POSIX_MAP(af), /* af Afrikaans 0x36 */
763 ILCID_POSIX_MAP(am), /* am Amharic 0x5e */
764 ILCID_POSIX_MAP(ar), /* ar Arabic 0x01 */
765 ILCID_POSIX_MAP(arn), /* arn Araucanian/Mapudungun 0x7a */
766 ILCID_POSIX_MAP(as), /* as Assamese 0x4d */
767 ILCID_POSIX_MAP(az), /* az Azerbaijani 0x2c */
768 ILCID_POSIX_MAP(ba), /* ba Bashkir 0x6d */
769 ILCID_POSIX_MAP(be), /* be Belarusian 0x23 */
770 /* ILCID_POSIX_MAP(ber), ber Berber/Tamazight 0x5f */
771 ILCID_POSIX_MAP(bg), /* bg Bulgarian 0x02 */
772 ILCID_POSIX_MAP(bin), /* bin Edo 0x66 */
773 ILCID_POSIX_MAP(bn), /* bn Bengali; Bangla 0x45 */
774 ILCID_POSIX_MAP(bo), /* bo Tibetan 0x51 */
775 ILCID_POSIX_MAP(br), /* br Breton 0x7e */
776 ILCID_POSIX_MAP(ca), /* ca Catalan 0x03 */
777 ILCID_POSIX_MAP(chr), /* chr Cherokee 0x5c */
778 ILCID_POSIX_MAP(ckb), /* ckb Sorani (Central Kurdish) 0x92 */
779 ILCID_POSIX_MAP(co), /* co Corsican 0x83 */
780 ILCID_POSIX_MAP(cs), /* cs Czech 0x05 */
781 ILCID_POSIX_MAP(cy), /* cy Welsh 0x52 */
782 ILCID_POSIX_MAP(da), /* da Danish 0x06 */
783 ILCID_POSIX_MAP(de), /* de German 0x07 */
784 ILCID_POSIX_MAP(dv), /* dv Divehi 0x65 */
785 ILCID_POSIX_MAP(el), /* el Greek 0x08 */
786 ILCID_POSIX_MAP(en), /* en English 0x09 */
787 ILCID_POSIX_MAP(en_US_POSIX), /* invariant 0x7f */
788 ILCID_POSIX_MAP(es), /* es Spanish 0x0a */
789 ILCID_POSIX_MAP(et), /* et Estonian 0x25 */
790 ILCID_POSIX_MAP(eu), /* eu Basque 0x2d */
791 ILCID_POSIX_MAP(fa), /* fa Persian/Farsi 0x29 */
792 ILCID_POSIX_MAP(fa_AF), /* fa Persian/Dari 0x8c */
793 ILCID_POSIX_MAP(ff), /* ff Fula 0x67 */
794 ILCID_POSIX_MAP(fi), /* fi Finnish 0x0b */
795 ILCID_POSIX_MAP(fil), /* fil Filipino 0x64 */
796 ILCID_POSIX_MAP(fo), /* fo Faroese 0x38 */
797 ILCID_POSIX_MAP(fr), /* fr French 0x0c */
798 ILCID_POSIX_MAP(fuv), /* fuv Fulfulde - Nigeria 0x67 */
799 ILCID_POSIX_MAP(fy), /* fy Frisian 0x62 */
800 ILCID_POSIX_MAP(ga), /* * Gaelic (Ireland,Scotland) 0x3c */
801 ILCID_POSIX_MAP(gd), /* gd Gaelic (United Kingdom) 0x91 */
802 ILCID_POSIX_MAP(gl), /* gl Galician 0x56 */
803 ILCID_POSIX_MAP(gn), /* gn Guarani 0x74 */
804 ILCID_POSIX_MAP(gsw), /* gsw Alemanic/Alsatian/Swiss German 0x84 */
805 ILCID_POSIX_MAP(gu), /* gu Gujarati 0x47 */
806 ILCID_POSIX_MAP(ha), /* ha Hausa 0x68 */
807 ILCID_POSIX_MAP(haw), /* haw Hawaiian 0x75 */
808 ILCID_POSIX_MAP(he), /* he Hebrew (formerly iw) 0x0d */
809 ILCID_POSIX_MAP(hi), /* hi Hindi 0x39 */
810 ILCID_POSIX_MAP(hr), /* * Croatian and others 0x1a */
811 ILCID_POSIX_MAP(hsb), /* hsb Upper Sorbian 0x2e */
812 ILCID_POSIX_MAP(hu), /* hu Hungarian 0x0e */
813 ILCID_POSIX_MAP(hy), /* hy Armenian 0x2b */
814 ILCID_POSIX_MAP(ibb), /* ibb Ibibio - Nigeria 0x69 */
815 ILCID_POSIX_MAP(id), /* id Indonesian (formerly in) 0x21 */
816 ILCID_POSIX_MAP(ig), /* ig Igbo 0x70 */
817 ILCID_POSIX_MAP(ii), /* ii Sichuan Yi 0x78 */
818 ILCID_POSIX_MAP(is), /* is Icelandic 0x0f */
819 ILCID_POSIX_MAP(it), /* it Italian 0x10 */
820 ILCID_POSIX_MAP(iu), /* iu Inuktitut 0x5d */
821 ILCID_POSIX_MAP(iw), /* iw Hebrew 0x0d */
822 ILCID_POSIX_MAP(ja), /* ja Japanese 0x11 */
823 ILCID_POSIX_MAP(ka), /* ka Georgian 0x37 */
824 ILCID_POSIX_MAP(kk), /* kk Kazakh 0x3f */
825 ILCID_POSIX_MAP(kl), /* kl Kalaallisut 0x6f */
826 ILCID_POSIX_MAP(km), /* km Khmer 0x53 */
827 ILCID_POSIX_MAP(kn), /* kn Kannada 0x4b */
828 ILCID_POSIX_MAP(ko), /* ko Korean 0x12 */
829 ILCID_POSIX_MAP(kok), /* kok Konkani 0x57 */
830 ILCID_POSIX_MAP(kr), /* kr Kanuri 0x71 */
831 ILCID_POSIX_MAP(ks), /* ks Kashmiri 0x60 */
832 ILCID_POSIX_MAP(ky), /* ky Kyrgyz 0x40 */
833 ILCID_POSIX_MAP(lb), /* lb Luxembourgish 0x6e */
834 ILCID_POSIX_MAP(la), /* la Latin 0x76 */
835 ILCID_POSIX_MAP(lo), /* lo Lao 0x54 */
836 ILCID_POSIX_MAP(lt), /* lt Lithuanian 0x27 */
837 ILCID_POSIX_MAP(lv), /* lv Latvian, Lettish 0x26 */
838 ILCID_POSIX_MAP(mi), /* mi Maori 0x81 */
839 ILCID_POSIX_MAP(mk), /* mk Macedonian 0x2f */
840 ILCID_POSIX_MAP(ml), /* ml Malayalam 0x4c */
841 ILCID_POSIX_MAP(mn), /* mn Mongolian 0x50 */
842 ILCID_POSIX_MAP(mni), /* mni Manipuri 0x58 */
843 ILCID_POSIX_MAP(moh), /* moh Mohawk 0x7c */
844 ILCID_POSIX_MAP(mr), /* mr Marathi 0x4e */
845 ILCID_POSIX_MAP(ms), /* ms Malay 0x3e */
846 ILCID_POSIX_MAP(mt), /* mt Maltese 0x3a */
847 ILCID_POSIX_MAP(my), /* my Burmese 0x55 */
848 /* ILCID_POSIX_MAP(nb), // no Norwegian 0x14 */
849 ILCID_POSIX_MAP(ne), /* ne Nepali 0x61 */
850 ILCID_POSIX_MAP(nl), /* nl Dutch 0x13 */
851 /* ILCID_POSIX_MAP(nn), // no Norwegian 0x14 */
852 ILCID_POSIX_MAP(no), /* * Norwegian 0x14 */
853 ILCID_POSIX_MAP(nso), /* nso Sotho, Northern (Sepedi dialect) 0x6c */
854 ILCID_POSIX_MAP(oc), /* oc Occitan 0x82 */
855 ILCID_POSIX_MAP(om), /* om Oromo 0x72 */
856 ILCID_POSIX_MAP(or_IN), /* or Oriya 0x48 */
857 ILCID_POSIX_MAP(pa), /* pa Punjabi 0x46 */
858 ILCID_POSIX_MAP(pap), /* pap Papiamentu 0x79 */
859 ILCID_POSIX_MAP(pl), /* pl Polish 0x15 */
860 ILCID_POSIX_MAP(ps), /* ps Pashto 0x63 */
861 ILCID_POSIX_MAP(pt), /* pt Portuguese 0x16 */
862 ILCID_POSIX_MAP(qu), /* qu Quechua 0x6B */
863 ILCID_POSIX_MAP(quc), /* quc K'iche 0x93 */
864 ILCID_POSIX_MAP(qut), /* qut K'iche 0x86 */
865 ILCID_POSIX_MAP(rm), /* rm Raeto-Romance/Romansh 0x17 */
866 ILCID_POSIX_MAP(ro), /* ro Romanian 0x18 */
867 ILCID_POSIX_MAP(root), /* root 0x00 */
868 ILCID_POSIX_MAP(ru), /* ru Russian 0x19 */
869 ILCID_POSIX_MAP(rw), /* rw Kinyarwanda 0x87 */
870 ILCID_POSIX_MAP(sa), /* sa Sanskrit 0x4f */
871 ILCID_POSIX_MAP(sah), /* sah Yakut 0x85 */
872 ILCID_POSIX_MAP(sd), /* sd Sindhi 0x59 */
873 ILCID_POSIX_MAP(se), /* se Sami 0x3b */
874 /* ILCID_POSIX_MAP(sh), // sh Serbo-Croatian 0x1a */
875 ILCID_POSIX_MAP(si), /* si Sinhalese 0x5b */
876 ILCID_POSIX_MAP(sk), /* sk Slovak 0x1b */
877 ILCID_POSIX_MAP(sl), /* sl Slovenian 0x24 */
878 ILCID_POSIX_MAP(so), /* so Somali 0x77 */
879 ILCID_POSIX_MAP(sq), /* sq Albanian 0x1c */
880 /* ILCID_POSIX_MAP(sr), // sr Serbian 0x1a */
881 ILCID_POSIX_MAP(st), /* st Sutu 0x30 */
882 ILCID_POSIX_MAP(sv), /* sv Swedish 0x1d */
883 ILCID_POSIX_MAP(sw), /* sw Swahili 0x41 */
884 ILCID_POSIX_MAP(syr), /* syr Syriac 0x5A */
885 ILCID_POSIX_MAP(ta), /* ta Tamil 0x49 */
886 ILCID_POSIX_MAP(te), /* te Telugu 0x4a */
887 ILCID_POSIX_MAP(tg), /* tg Tajik 0x28 */
888 ILCID_POSIX_MAP(th), /* th Thai 0x1e */
889 ILCID_POSIX_MAP(ti), /* ti Tigrigna 0x73 */
890 ILCID_POSIX_MAP(tk), /* tk Turkmen 0x42 */
891 ILCID_POSIX_MAP(tn), /* tn Tswana 0x32 */
892 ILCID_POSIX_MAP(tr), /* tr Turkish 0x1f */
893 ILCID_POSIX_MAP(ts), /* ts Tsonga 0x31 */
894 ILCID_POSIX_MAP(tt), /* tt Tatar 0x44 */
895 ILCID_POSIX_MAP(tzm), /* tzm Tamazight 0x5f */
896 ILCID_POSIX_MAP(ug), /* ug Uighur 0x80 */
897 ILCID_POSIX_MAP(uk), /* uk Ukrainian 0x22 */
898 ILCID_POSIX_MAP(ur), /* ur Urdu 0x20 */
899 ILCID_POSIX_MAP(uz), /* uz Uzbek 0x43 */
900 ILCID_POSIX_MAP(ve), /* ve Venda 0x33 */
901 ILCID_POSIX_MAP(vi), /* vi Vietnamese 0x2a */
902 ILCID_POSIX_MAP(wo), /* wo Wolof 0x88 */
903 ILCID_POSIX_MAP(xh), /* xh Xhosa 0x34 */
904 ILCID_POSIX_MAP(yi), /* yi Yiddish 0x3d */
905 ILCID_POSIX_MAP(yo), /* yo Yoruba 0x6a */
906 ILCID_POSIX_MAP(zh), /* zh Chinese 0x04 */
907 ILCID_POSIX_MAP(zu), /* zu Zulu 0x35 */
908 };
909
910 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
911
912 /**
913 * Do not call this function. It is called by hostID.
914 * The function is not private because this struct must stay as a C struct,
915 * and this is an internal class.
916 */
917 static int32_t
idCmp(const char * id1,const char * id2)918 idCmp(const char* id1, const char* id2)
919 {
920 int32_t diffIdx = 0;
921 while (*id1 == *id2 && *id1 != 0) {
922 diffIdx++;
923 id1++;
924 id2++;
925 }
926 return diffIdx;
927 }
928
929 /**
930 * Searches for a Windows LCID
931 *
932 * @param posixid the Posix style locale id.
933 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
934 * no equivalent Windows LCID.
935 * @return the LCID
936 */
937 static uint32_t
getHostID(const ILcidPosixMap * this_0,const char * posixID,UErrorCode * status)938 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
939 {
940 int32_t bestIdx = 0;
941 int32_t bestIdxDiff = 0;
942 int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
943 uint32_t idx;
944
945 for (idx = 0; idx < this_0->numRegions; idx++ ) {
946 int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
947 if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
948 if (posixIDlen == sameChars) {
949 /* Exact match */
950 return this_0->regionMaps[idx].hostID;
951 }
952 bestIdxDiff = sameChars;
953 bestIdx = idx;
954 }
955 }
956 /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
957 /* We also have to make sure that sid and si and similar string subsets don't match. */
958 if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
959 && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
960 {
961 *status = U_USING_FALLBACK_WARNING;
962 return this_0->regionMaps[bestIdx].hostID;
963 }
964
965 /*no match found */
966 *status = U_ILLEGAL_ARGUMENT_ERROR;
967 return this_0->regionMaps->hostID;
968 }
969
970 static const char*
getPosixID(const ILcidPosixMap * this_0,uint32_t hostID)971 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
972 {
973 uint32_t i;
974 for (i = 0; i <= this_0->numRegions; i++)
975 {
976 if (this_0->regionMaps[i].hostID == hostID)
977 {
978 return this_0->regionMaps[i].posixID;
979 }
980 }
981
982 /* If you get here, then no matching region was found,
983 so return the language id with the wild card region. */
984 return this_0->regionMaps[0].posixID;
985 }
986
987 /*
988 //////////////////////////////////////
989 //
990 // LCID --> POSIX
991 //
992 /////////////////////////////////////
993 */
994 #ifdef USE_WINDOWS_LOCALE_API
995 /*
996 * Various language tags needs to be changed:
997 * quz -> qu
998 * prs -> fa
999 */
1000 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
1001 if (len >= 3) { \
1002 if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1003 buffer[2] = 0; \
1004 uprv_strcat(buffer, buffer+3); \
1005 } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1006 buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1007 uprv_strcat(buffer, buffer+3); \
1008 } \
1009 }
1010
1011 #endif
1012 U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid,char * posixID,int32_t posixIDCapacity,UErrorCode * status)1013 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1014 {
1015 uint16_t langID;
1016 uint32_t localeIndex;
1017 UBool bLookup = TRUE;
1018 const char *pPosixID = NULL;
1019
1020 #ifdef USE_WINDOWS_LOCALE_API
1021 // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1022 // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1023 // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1024 // use the Windows API to resolve locale ID for this specific case.
1025 if ((hostid & 0x3FF) != 0x92) {
1026 int32_t tmpLen = 0;
1027 char locName[157]; /* ULOC_FULLNAME_CAPACITY */
1028
1029 tmpLen = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)locName, UPRV_LENGTHOF(locName));
1030 if (tmpLen > 1) {
1031 /* Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1032 In such case, we need special mapping data found in the hardcoded table
1033 in this source file. */
1034 char *p = uprv_strchr(locName, '_');
1035 if (p) {
1036 /* Keep the base locale, without variant */
1037 *p = 0;
1038 tmpLen = uprv_strlen(locName);
1039 }
1040 else {
1041 /* No hardcoded table lookup necessary */
1042 bLookup = FALSE;
1043 }
1044 /* Change the tag separator from '-' to '_' */
1045 p = locName;
1046 while (*p) {
1047 if (*p == '-') {
1048 *p = '_';
1049 }
1050 p++;
1051 }
1052 FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1053 pPosixID = locName;
1054 }
1055 }
1056 #endif
1057 if (bLookup) {
1058 const char *pCandidate = NULL;
1059 langID = LANGUAGE_LCID(hostid);
1060
1061 for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1062 if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1063 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1064 break;
1065 }
1066 }
1067
1068 /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1069 If a match in the hardcoded table is longer than the Windows locale name without
1070 variant, we use the one as the result */
1071 if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1072 pPosixID = pCandidate;
1073 }
1074 }
1075
1076 if (pPosixID) {
1077 int32_t resLen = uprv_strlen(pPosixID);
1078 int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1079 uprv_memcpy(posixID, pPosixID, copyLen);
1080 if (resLen < posixIDCapacity) {
1081 posixID[resLen] = 0;
1082 if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1083 *status = U_ZERO_ERROR;
1084 }
1085 } else if (resLen == posixIDCapacity) {
1086 *status = U_STRING_NOT_TERMINATED_WARNING;
1087 } else {
1088 *status = U_BUFFER_OVERFLOW_ERROR;
1089 }
1090 return resLen;
1091 }
1092
1093 /* no match found */
1094 *status = U_ILLEGAL_ARGUMENT_ERROR;
1095 return -1;
1096 }
1097
1098 /*
1099 //////////////////////////////////////
1100 //
1101 // POSIX --> LCID
1102 // This should only be called from uloc_getLCID.
1103 // The locale ID must be in canonical form.
1104 // langID is separate so that this file doesn't depend on the uloc_* API.
1105 //
1106 /////////////////////////////////////
1107 */
1108
1109 U_CAPI uint32_t
uprv_convertToLCID(const char * langID,const char * posixID,UErrorCode * status)1110 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1111 {
1112
1113 uint32_t low = 0;
1114 uint32_t high = gLocaleCount;
1115 uint32_t mid;
1116 uint32_t oldmid = 0;
1117 int32_t compVal;
1118
1119 uint32_t value = 0;
1120 uint32_t fallbackValue = (uint32_t)-1;
1121 UErrorCode myStatus;
1122 uint32_t idx;
1123
1124 /* Check for incomplete id. */
1125 if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1126 return 0;
1127 }
1128
1129 /*Binary search for the map entry for normal cases */
1130
1131 while (high > low) /*binary search*/{
1132
1133 mid = (high+low) >> 1; /*Finds median*/
1134
1135 if (mid == oldmid)
1136 break;
1137
1138 compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1139 if (compVal < 0){
1140 high = mid;
1141 }
1142 else if (compVal > 0){
1143 low = mid;
1144 }
1145 else /*we found it*/{
1146 return getHostID(&gPosixIDmap[mid], posixID, status);
1147 }
1148 oldmid = mid;
1149 }
1150
1151 /*
1152 * Sometimes we can't do a binary search on posixID because some LCIDs
1153 * go to different locales. We hit one of those special cases.
1154 */
1155 for (idx = 0; idx < gLocaleCount; idx++ ) {
1156 myStatus = U_ZERO_ERROR;
1157 value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1158 if (myStatus == U_ZERO_ERROR) {
1159 return value;
1160 }
1161 else if (myStatus == U_USING_FALLBACK_WARNING) {
1162 fallbackValue = value;
1163 }
1164 }
1165
1166 if (fallbackValue != (uint32_t)-1) {
1167 *status = U_USING_FALLBACK_WARNING;
1168 return fallbackValue;
1169 }
1170
1171 /* no match found */
1172 *status = U_ILLEGAL_ARGUMENT_ERROR;
1173 return 0; /* return international (root) */
1174 }
1175
1176