• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  *   Copyright (C) 1996-2016, International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  **********************************************************************
8  *
9  * Provides functionality for mapping between
10  * LCID and Posix IDs or ICU locale to codepage
11  *
12  * Note: All classes and code in this file are
13  *       intended for internal use only.
14  *
15  * Methods of interest:
16  *   unsigned long convertToLCID(const char*);
17  *   const char* convertToPosix(unsigned long);
18  *
19  * Kathleen Wilson, 4/30/96
20  *
21  *  Date        Name        Description
22  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
23  *                          setId() method and safety check against
24  *                          MAX_ID_LENGTH.
25  * 04/23/99     stephen     Added C wrapper for convertToPosix.
26  * 09/18/00     george      Removed the memory leaks.
27  * 08/23/01     george      Convert to C
28  */
29 
30 #include "locmap.h"
31 #include "cstring.h"
32 #include "cmemory.h"
33 
34 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
35 /*
36  * TODO: It seems like we should widen this to
37  * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
38  * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
39  * but those use gcc and won't have defined(_MSC_VER).
40  * We might need to #include some Windows header and test for some version macro from there.
41  * Or call some Windows function and see what it returns.
42  */
43 #define USE_WINDOWS_LOCALE_API
44 #endif
45 
46 #ifdef USE_WINDOWS_LOCALE_API
47 #include <windows.h>
48 #include <winnls.h>
49 #endif
50 
51 /*
52  * Note:
53  * The mapping from Win32 locale ID numbers to POSIX locale strings should
54  * be the faster one.
55  *
56  * Many LCID values come from winnt.h
57  * Some also come from http://www.microsoft.com/globaldev/reference/lcid-all.mspx
58  */
59 
60 /*
61 ////////////////////////////////////////////////
62 //
63 // Internal Classes for LCID <--> POSIX Mapping
64 //
65 /////////////////////////////////////////////////
66 */
67 
68 typedef struct ILcidPosixElement
69 {
70     const uint32_t hostID;
71     const char * const posixID;
72 } ILcidPosixElement;
73 
74 typedef struct ILcidPosixMap
75 {
76     const uint32_t numRegions;
77     const struct ILcidPosixElement* const regionMaps;
78 } ILcidPosixMap;
79 
80 
81 /*
82 /////////////////////////////////////////////////
83 //
84 // Easy macros to make the LCID <--> POSIX Mapping
85 //
86 /////////////////////////////////////////////////
87 */
88 
89 /**
90  * The standard one language/one country mapping for LCID.
91  * The first element must be the language, and the following
92  * elements are the language with the country.
93  * @param hostID LCID in host format such as 0x044d
94  * @param languageID posix ID of just the language such as 'de'
95  * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
96  */
97 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
98 static const ILcidPosixElement locmap_ ## languageID [] = { \
99     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
100     {hostID, #posixID}, \
101 };
102 
103 /**
104  * Define a subtable by ID
105  * @param id the POSIX ID, either a language or language_TERRITORY
106  */
107 #define ILCID_POSIX_SUBTABLE(id) \
108 static const ILcidPosixElement locmap_ ## id [] =
109 
110 
111 /**
112  * Create the map for the posixID. This macro supposes that the language string
113  * name is the same as the global variable name, and that the first element
114  * in the ILcidPosixElement is just the language.
115  * @param _posixID the full POSIX ID for this entry.
116  */
117 #define ILCID_POSIX_MAP(_posixID) \
118     {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
119 
120 /*
121 ////////////////////////////////////////////
122 //
123 // Create the table of LCID to POSIX Mapping
124 // None of it should be dynamically created.
125 //
126 // Keep static locale variables inside the function so that
127 // it can be created properly during static init.
128 //
129 // Note: This table should be updated periodically. Check the National Lanaguage Support API Reference Website.
130 //       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
131 //       maintained for support of older Windows version.
132 //       Update: Windows 7 (091130)
133 //
134 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
135 //       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
136 //       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
137 //       to support other keywords in this mapping data, we must update the implementation.
138 ////////////////////////////////////////////
139 */
140 
141 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
142 
ILCID_POSIX_SUBTABLE(ar)143 ILCID_POSIX_SUBTABLE(ar) {
144     {0x01,   "ar"},
145     {0x3801, "ar_AE"},
146     {0x3c01, "ar_BH"},
147     {0x1401, "ar_DZ"},
148     {0x0c01, "ar_EG"},
149     {0x0801, "ar_IQ"},
150     {0x2c01, "ar_JO"},
151     {0x3401, "ar_KW"},
152     {0x3001, "ar_LB"},
153     {0x1001, "ar_LY"},
154     {0x1801, "ar_MA"},
155     {0x1801, "ar_MO"},
156     {0x2001, "ar_OM"},
157     {0x4001, "ar_QA"},
158     {0x0401, "ar_SA"},
159     {0x2801, "ar_SY"},
160     {0x1c01, "ar_TN"},
161     {0x2401, "ar_YE"}
162 };
163 
164 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
165 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
166 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
167 
ILCID_POSIX_SUBTABLE(az)168 ILCID_POSIX_SUBTABLE(az) {
169     {0x2c,   "az"},
170     {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
171     {0x742c, "az_Cyrl"},  /* Cyrillic based */
172     {0x042c, "az_Latn_AZ"}, /* Latin based */
173     {0x782c, "az_Latn"}, /* Latin based */
174     {0x042c, "az_AZ"} /* Latin based */
175 };
176 
177 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
178 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
179 
180 /*ILCID_POSIX_SUBTABLE(ber) {
181     {0x5f,   "ber"},
182     {0x045f, "ber_Arab_DZ"},
183     {0x045f, "ber_Arab"},
184     {0x085f, "ber_Latn_DZ"},
185     {0x085f, "ber_Latn"}
186 };*/
187 
188 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
189 
190 ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
191 
ILCID_POSIX_SUBTABLE(bn)192 ILCID_POSIX_SUBTABLE(bn) {
193     {0x45,   "bn"},
194     {0x0845, "bn_BD"},
195     {0x0445, "bn_IN"}
196 };
197 
ILCID_POSIX_SUBTABLE(bo)198 ILCID_POSIX_SUBTABLE(bo) {
199     {0x51,   "bo"},
200     {0x0851, "bo_BT"},
201     {0x0451, "bo_CN"},
202     {0x0c51, "dz_BT"}
203 };
204 
205 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
206 
ILCID_POSIX_SUBTABLE(ca)207 ILCID_POSIX_SUBTABLE(ca) {
208     {0x03,   "ca"},
209     {0x0403, "ca_ES"},
210     {0x0803, "ca_ES_VALENCIA"}
211 };
212 
213 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
214 ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
215 
ILCID_POSIX_SUBTABLE(ckb)216 ILCID_POSIX_SUBTABLE(ckb) {
217     {0x92,   "ckb"},
218     {0x7c92, "ckb_Arab"},
219     {0x0492, "ckb_Arab_IQ"}
220 };
221 
222 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
223 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
224 
225 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
226 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
227 
ILCID_POSIX_SUBTABLE(de)228 ILCID_POSIX_SUBTABLE(de) {
229     {0x07,   "de"},
230     {0x0c07, "de_AT"},
231     {0x0807, "de_CH"},
232     {0x0407, "de_DE"},
233     {0x1407, "de_LI"},
234     {0x1007, "de_LU"},
235     {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
236     {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
237 };
238 
239 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
240 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
241 
ILCID_POSIX_SUBTABLE(en)242 ILCID_POSIX_SUBTABLE(en) {
243     {0x09,   "en"},
244     {0x0c09, "en_AU"},
245     {0x2809, "en_BZ"},
246     {0x1009, "en_CA"},
247     {0x0809, "en_GB"},
248     {0x3c09, "en_HK"},
249     {0x3809, "en_ID"},
250     {0x1809, "en_IE"},
251     {0x4009, "en_IN"},
252     {0x2009, "en_JM"},
253     {0x4409, "en_MY"},
254     {0x1409, "en_NZ"},
255     {0x3409, "en_PH"},
256     {0x4809, "en_SG"},
257     {0x2C09, "en_TT"},
258     {0x0409, "en_US"},
259     {0x007f, "en_US_POSIX"}, /* duplicate for roundtripping */
260     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). */
261     {0x1c09, "en_ZA"},
262     {0x3009, "en_ZW"},
263     {0x2409, "en_029"},
264     {0x0409, "en_AS"},  /* Alias for en_US. Leave last. */
265     {0x0409, "en_GU"},  /* Alias for en_US. Leave last. */
266     {0x0409, "en_MH"},  /* Alias for en_US. Leave last. */
267     {0x0409, "en_MP"},  /* Alias for en_US. Leave last. */
268     {0x0409, "en_UM"}   /* Alias for en_US. Leave last. */
269 };
270 
ILCID_POSIX_SUBTABLE(en_US_POSIX)271 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
272     {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
273 };
274 
ILCID_POSIX_SUBTABLE(es)275 ILCID_POSIX_SUBTABLE(es) {
276     {0x0a,   "es"},
277     {0x2c0a, "es_AR"},
278     {0x400a, "es_BO"},
279     {0x340a, "es_CL"},
280     {0x240a, "es_CO"},
281     {0x140a, "es_CR"},
282     {0x5c0a, "es_CU"},
283     {0x1c0a, "es_DO"},
284     {0x300a, "es_EC"},
285     {0x0c0a, "es_ES"},      /*Modern sort.*/
286     {0x100a, "es_GT"},
287     {0x480a, "es_HN"},
288     {0x080a, "es_MX"},
289     {0x4c0a, "es_NI"},
290     {0x180a, "es_PA"},
291     {0x280a, "es_PE"},
292     {0x500a, "es_PR"},
293     {0x3c0a, "es_PY"},
294     {0x440a, "es_SV"},
295     {0x540a, "es_US"},
296     {0x380a, "es_UY"},
297     {0x200a, "es_VE"},
298     {0x580a, "es_419"},
299     {0x040a, "es_ES@collation=traditional"},
300     {0x040a, "es@collation=traditional"}
301 };
302 
303 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
304 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
305 
306 /* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa)307 ILCID_POSIX_SUBTABLE(fa) {
308     {0x29,   "fa"},
309     {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
310     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
311 };
312 
313 /* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF)314 ILCID_POSIX_SUBTABLE(fa_AF) {
315     {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
316     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
317 };
318 
ILCID_POSIX_SUBTABLE(ff)319 ILCID_POSIX_SUBTABLE(ff) {
320     {0x67,   "ff"},
321     {0x7c67, "ff_Latn"},
322     {0x0867, "ff_Latn_SN"},
323     {0x0467, "ff_NG"}
324 };
325 
326 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
327 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
328 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
329 
ILCID_POSIX_SUBTABLE(fr)330 ILCID_POSIX_SUBTABLE(fr) {
331     {0x0c,   "fr"},
332     {0x080c, "fr_BE"},
333     {0x0c0c, "fr_CA"},
334     {0x240c, "fr_CD"},
335     {0x240c, "fr_CG"},
336     {0x100c, "fr_CH"},
337     {0x300c, "fr_CI"},
338     {0x2c0c, "fr_CM"},
339     {0x040c, "fr_FR"},
340     {0x3c0c, "fr_HT"},
341     {0x140c, "fr_LU"},
342     {0x380c, "fr_MA"},
343     {0x180c, "fr_MC"},
344     {0x340c, "fr_ML"},
345     {0x200c, "fr_RE"},
346     {0x280c, "fr_SN"},
347     {0xe40c, "fr_015"},
348     {0x1c0c, "fr_029"}
349 };
350 
351 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
352 
353 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
354 
ILCID_POSIX_SUBTABLE(ga)355 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
356     {0x3c,   "ga"},
357     {0x083c, "ga_IE"},
358     {0x043c, "gd_GB"}
359 };
360 
ILCID_POSIX_SUBTABLE(gd)361 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
362     {0x91,   "gd"},
363     {0x0491, "gd_GB"}
364 };
365 
366 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
367 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
368 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
369 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
370 
ILCID_POSIX_SUBTABLE(ha)371 ILCID_POSIX_SUBTABLE(ha) {
372     {0x68,   "ha"},
373     {0x7c68, "ha_Latn"},
374     {0x0468, "ha_Latn_NG"},
375 };
376 
377 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
378 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
379 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
380 
381 /* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr)382 ILCID_POSIX_SUBTABLE(hr) {
383     {0x1a,   "hr"},
384     {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
385     {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
386     {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
387     {0x781a, "bs"},     /* Bosnian */
388     {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
389     {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
390     {0x101a, "hr_BA"},  /* Croatian in Bosnia */
391     {0x041a, "hr_HR"},  /* Croatian*/
392     {0x2c1a, "sr_Latn_ME"},
393     {0x241a, "sr_Latn_RS"},
394     {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
395     {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
396     {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
397     {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
398     {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
399     {0x301a, "sr_Cyrl_ME"},
400     {0x281a, "sr_Cyrl_RS"},
401     {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
402     {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
403 };
404 
ILCID_POSIX_SUBTABLE(hsb)405 ILCID_POSIX_SUBTABLE(hsb) {
406     {0x2E,   "hsb"},
407     {0x042E, "hsb_DE"},
408     {0x082E, "dsb_DE"},
409     {0x7C2E, "dsb"},
410 };
411 
412 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
413 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
414 ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
415 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
416 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
417 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
418 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
419 
ILCID_POSIX_SUBTABLE(it)420 ILCID_POSIX_SUBTABLE(it) {
421     {0x10,   "it"},
422     {0x0810, "it_CH"},
423     {0x0410, "it_IT"}
424 };
425 
ILCID_POSIX_SUBTABLE(iu)426 ILCID_POSIX_SUBTABLE(iu) {
427     {0x5d,   "iu"},
428     {0x045d, "iu_Cans_CA"},
429     {0x785d, "iu_Cans"},
430     {0x085d, "iu_Latn_CA"},
431     {0x7c5d, "iu_Latn"}
432 };
433 
434 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
435 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
436 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
437 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
438 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
439 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
440 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
441 
ILCID_POSIX_SUBTABLE(ko)442 ILCID_POSIX_SUBTABLE(ko) {
443     {0x12,   "ko"},
444     {0x0812, "ko_KP"},
445     {0x0412, "ko_KR"}
446 };
447 
448 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
449 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
450 
ILCID_POSIX_SUBTABLE(ks)451 ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
452     {0x60,   "ks"},
453     {0x0860, "ks_IN"},              /* Documentation doesn't mention script */
454     {0x0460, "ks_Arab_IN"},
455     {0x0860, "ks_Deva_IN"}
456 };
457 
458 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
459 ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT)   /* TODO: Verify the country */
460 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
461 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
462 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
463 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
464 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
465 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
466 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
467 
ILCID_POSIX_SUBTABLE(mn)468 ILCID_POSIX_SUBTABLE(mn) {
469     {0x50,   "mn"},
470     {0x0450, "mn_MN"},
471     {0x7c50, "mn_Mong"},
472     {0x0850, "mn_Mong_CN"},
473     {0x0850, "mn_CN"},
474     {0x7850, "mn_Cyrl"},
475     {0x0c50, "mn_Mong_MN"}
476 };
477 
478 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
479 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
480 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
481 
ILCID_POSIX_SUBTABLE(ms)482 ILCID_POSIX_SUBTABLE(ms) {
483     {0x3e,   "ms"},
484     {0x083e, "ms_BN"},   /* Brunei Darussalam*/
485     {0x043e, "ms_MY"}    /* Malaysia*/
486 };
487 
488 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
489 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
490 
ILCID_POSIX_SUBTABLE(ne)491 ILCID_POSIX_SUBTABLE(ne) {
492     {0x61,   "ne"},
493     {0x0861, "ne_IN"},   /* India*/
494     {0x0461, "ne_NP"}    /* Nepal*/
495 };
496 
ILCID_POSIX_SUBTABLE(nl)497 ILCID_POSIX_SUBTABLE(nl) {
498     {0x13,   "nl"},
499     {0x0813, "nl_BE"},
500     {0x0413, "nl_NL"}
501 };
502 
503 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
ILCID_POSIX_SUBTABLE(no)504 ILCID_POSIX_SUBTABLE(no) {
505     {0x14,   "no"},     /* really nb_NO */
506     {0x7c14, "nb"},     /* really nb */
507     {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
508     {0x0414, "no_NO"},  /* really nb_NO */
509     {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
510     {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
511     {0x0814, "no_NO_NY"}/* really nn_NO */
512 };
513 
514 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
515 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
516 
ILCID_POSIX_SUBTABLE(om)517 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
518     {0x72,   "om"},
519     {0x0472, "om_ET"},
520     {0x0472, "gaz_ET"}
521 };
522 
523 /* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN)524 ILCID_POSIX_SUBTABLE(or_IN) {
525     {0x48,   "or"},
526     {0x0448, "or_IN"},
527 };
528 
529 
ILCID_POSIX_SUBTABLE(pa)530 ILCID_POSIX_SUBTABLE(pa) {
531     {0x46,   "pa"},
532     {0x0446, "pa_IN"},
533     {0x0846, "pa_PK"},
534     {0x0846, "pa_Arab_PK"}
535 };
536 
537 ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
538 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
539 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
540 
ILCID_POSIX_SUBTABLE(pt)541 ILCID_POSIX_SUBTABLE(pt) {
542     {0x16,   "pt"},
543     {0x0416, "pt_BR"},
544     {0x0816, "pt_PT"}
545 };
546 
ILCID_POSIX_SUBTABLE(qu)547 ILCID_POSIX_SUBTABLE(qu) {
548     {0x6b,   "qu"},
549     {0x046b, "qu_BO"},
550     {0x086b, "qu_EC"},
551     {0x0C6b, "qu_PE"},
552     {0x046b, "quz_BO"},
553     {0x086b, "quz_EC"},
554     {0x0C6b, "quz_PE"}
555 };
556 
ILCID_POSIX_SUBTABLE(quc)557 ILCID_POSIX_SUBTABLE(quc) {
558     {0x93,   "quc"},
559     {0x0493, "quc_CO"},
560     /*
561         "quc_Latn_GT" is an exceptional case. Language ID of "quc"
562         is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
563         under the group of "qut". "qut" is a retired ISO 639-3 language
564         code for West Central Quiche, and merged to "quc".
565         It looks Windows previously reserved "qut" for K'iche', but,
566         decided to use "quc" when adding a locale for K'iche' (Guatemala).
567 
568         This data structure used here assumes language ID bits in
569         LCID is unique for alphabetic language code. But this is not true
570         for "quc_Latn_GT". If we don't have the data below, LCID look up
571         by alphabetic locale ID (POSIX) will fail. The same entry is found
572         under "qut" below, which is required for reverse look up.
573     */
574     {0x0486, "quc_Latn_GT"}
575 };
576 
ILCID_POSIX_SUBTABLE(qut)577 ILCID_POSIX_SUBTABLE(qut) {
578     {0x86,   "qut"},
579     {0x0486, "qut_GT"},
580     /*
581         See the note in "quc" above.
582     */
583     {0x0486, "quc_Latn_GT"}
584 };
585 
586 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
587 
ILCID_POSIX_SUBTABLE(ro)588 ILCID_POSIX_SUBTABLE(ro) {
589     {0x18,   "ro"},
590     {0x0418, "ro_RO"},
591     {0x0818, "ro_MD"}
592 };
593 
ILCID_POSIX_SUBTABLE(root)594 ILCID_POSIX_SUBTABLE(root) {
595     {0x00,   "root"}
596 };
597 
ILCID_POSIX_SUBTABLE(ru)598 ILCID_POSIX_SUBTABLE(ru) {
599     {0x19,   "ru"},
600     {0x0419, "ru_RU"},
601     {0x0819, "ru_MD"}
602 };
603 
604 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
605 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
606 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
607 
ILCID_POSIX_SUBTABLE(sd)608 ILCID_POSIX_SUBTABLE(sd) {
609     {0x59,   "sd"},
610     {0x0459, "sd_IN"},
611     {0x0459, "sd_Deva_IN"},
612     {0x0859, "sd_PK"}
613 };
614 
ILCID_POSIX_SUBTABLE(se)615 ILCID_POSIX_SUBTABLE(se) {
616     {0x3b,   "se"},
617     {0x0c3b, "se_FI"},
618     {0x043b, "se_NO"},
619     {0x083b, "se_SE"},
620     {0x783b, "sma"},
621     {0x183b, "sma_NO"},
622     {0x1c3b, "sma_SE"},
623     {0x7c3b, "smj"},
624     {0x703b, "smn"},
625     {0x743b, "sms"},
626     {0x103b, "smj_NO"},
627     {0x143b, "smj_SE"},
628     {0x243b, "smn_FI"},
629     {0x203b, "sms_FI"},
630 };
631 
632 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
633 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
634 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
635 
ILCID_POSIX_SUBTABLE(so)636 ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
637     {0x77,   "so"},
638     {0x0477, "so_ET"},
639     {0x0477, "so_SO"}
640 };
641 
642 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
643 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
644 
ILCID_POSIX_SUBTABLE(sv)645 ILCID_POSIX_SUBTABLE(sv) {
646     {0x1d,   "sv"},
647     {0x081d, "sv_FI"},
648     {0x041d, "sv_SE"}
649 };
650 
651 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
652 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
653 
ILCID_POSIX_SUBTABLE(ta)654 ILCID_POSIX_SUBTABLE(ta) {
655     {0x49,   "ta"},
656     {0x0449, "ta_IN"},
657     {0x0849, "ta_LK"}
658 };
659 
660 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
661 
662 /* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg)663 ILCID_POSIX_SUBTABLE(tg) {
664     {0x28,   "tg"},
665     {0x7c28, "tg_Cyrl"},
666     {0x0428, "tg_Cyrl_TJ"}
667 };
668 
669 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
670 
ILCID_POSIX_SUBTABLE(ti)671 ILCID_POSIX_SUBTABLE(ti) {
672     {0x73,   "ti"},
673     {0x0873, "ti_ER"},
674     {0x0473, "ti_ET"}
675 };
676 
677 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
678 
ILCID_POSIX_SUBTABLE(tn)679 ILCID_POSIX_SUBTABLE(tn) {
680     {0x32,   "tn"},
681     {0x0832, "tn_BW"},
682     {0x0432, "tn_ZA"}
683 };
684 
685 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
686 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
687 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
688 
ILCID_POSIX_SUBTABLE(tzm)689 ILCID_POSIX_SUBTABLE(tzm) {
690     {0x5f,   "tzm"},
691     {0x7c5f, "tzm_Latn"},
692     {0x085f, "tzm_Latn_DZ"},
693     {0x105f, "tzm_Tfng_MA"},
694     {0x045f, "tzm_Arab_MA"},
695     {0x045f, "tmz"}
696 };
697 
ILCID_POSIX_SUBTABLE(ug)698 ILCID_POSIX_SUBTABLE(ug) {
699     {0x80,   "ug"},
700     {0x0480, "ug_CN"},
701     {0x0480, "ug_Arab_CN"}
702 };
703 
704 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
705 
ILCID_POSIX_SUBTABLE(ur)706 ILCID_POSIX_SUBTABLE(ur) {
707     {0x20,   "ur"},
708     {0x0820, "ur_IN"},
709     {0x0420, "ur_PK"}
710 };
711 
ILCID_POSIX_SUBTABLE(uz)712 ILCID_POSIX_SUBTABLE(uz) {
713     {0x43,   "uz"},
714     {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
715     {0x7843, "uz_Cyrl"},  /* Cyrillic based */
716     {0x0843, "uz_UZ"},  /* Cyrillic based */
717     {0x0443, "uz_Latn_UZ"}, /* Latin based */
718     {0x7c43, "uz_Latn"} /* Latin based */
719 };
720 
ILCID_POSIX_SUBTABLE(ve)721 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
722     {0x33,   "ve"},
723     {0x0433, "ve_ZA"},
724     {0x0433, "ven_ZA"}
725 };
726 
727 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
728 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
729 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
730 ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
731 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
732 
ILCID_POSIX_SUBTABLE(zh)733 ILCID_POSIX_SUBTABLE(zh) {
734     {0x0004, "zh_Hans"},
735     {0x7804, "zh"},
736     {0x0804, "zh_CN"},
737     {0x0804, "zh_Hans_CN"},
738     {0x0c04, "zh_Hant_HK"},
739     {0x0c04, "zh_HK"},
740     {0x1404, "zh_Hant_MO"},
741     {0x1404, "zh_MO"},
742     {0x1004, "zh_Hans_SG"},
743     {0x1004, "zh_SG"},
744     {0x0404, "zh_Hant_TW"},
745     {0x7c04, "zh_Hant"},
746     {0x0404, "zh_TW"},
747     {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
748     {0x30404,"zh_TW"},          /* Bopomofo order */
749     {0x20004,"zh@collation=stroke"},
750     {0x20404,"zh_Hant@collation=stroke"},
751     {0x20404,"zh_Hant_TW@collation=stroke"},
752     {0x20404,"zh_TW@collation=stroke"},
753     {0x20804,"zh_Hans@collation=stroke"},
754     {0x20804,"zh_Hans_CN@collation=stroke"},
755     {0x20804,"zh_CN@collation=stroke"}
756 };
757 
758 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
759 
760 /* This must be static and grouped by LCID. */
761 static const ILcidPosixMap gPosixIDmap[] = {
762     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
763     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
764     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
765     ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
766     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
767     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
768     ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
769     ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
770 /*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
771     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
772     ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
773     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
774     ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
775     ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
776     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
777     ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
778     ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
779     ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
780     ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
781     ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
782     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
783     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
784     ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
785     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
786     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
787     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
788     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
789     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
790     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
791     ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
792     ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
793     ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
794     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
795     ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
796     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
797     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
798     ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
799     ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
800     ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
801     ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
802     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
803     ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
804     ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
805     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
806     ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
807     ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
808     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
809     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
810     ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
811     ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
812     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
813     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
814     ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
815     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
816     ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
817     ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
818     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
819     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
820     ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
821     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
822     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
823     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
824     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
825     ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
826     ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
827     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
828     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
829     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
830     ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
831     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
832     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
833     ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
834     ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
835     ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
836     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
837     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
838     ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
839     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
840     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
841     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
842     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
843     ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
844     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
845     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
846     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
847     ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
848 /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
849     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
850     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
851 /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
852     ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
853     ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
854     ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
855     ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
856     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
857     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
858     ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
859     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
860     ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
861     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
862     ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
863     ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
864     ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
865     ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
866     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
867     ILCID_POSIX_MAP(root),  /*  root                          0x00 */
868     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
869     ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
870     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
871     ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
872     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
873     ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
874 /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
875     ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
876     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
877     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
878     ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
879     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
880 /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
881     ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
882     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
883     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
884     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
885     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
886     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
887     ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
888     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
889     ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
890     ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
891     ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
892     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
893     ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
894     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
895     ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
896     ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
897     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
898     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
899     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
900     ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
901     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
902     ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
903     ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
904     ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
905     ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
906     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
907     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
908 };
909 
910 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
911 
912 /**
913  * Do not call this function. It is called by hostID.
914  * The function is not private because this struct must stay as a C struct,
915  * and this is an internal class.
916  */
917 static int32_t
idCmp(const char * id1,const char * id2)918 idCmp(const char* id1, const char* id2)
919 {
920     int32_t diffIdx = 0;
921     while (*id1 == *id2 && *id1 != 0) {
922         diffIdx++;
923         id1++;
924         id2++;
925     }
926     return diffIdx;
927 }
928 
929 /**
930  * Searches for a Windows LCID
931  *
932  * @param posixid the Posix style locale id.
933  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
934  *               no equivalent Windows LCID.
935  * @return the LCID
936  */
937 static uint32_t
getHostID(const ILcidPosixMap * this_0,const char * posixID,UErrorCode * status)938 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
939 {
940     int32_t bestIdx = 0;
941     int32_t bestIdxDiff = 0;
942     int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
943     uint32_t idx;
944 
945     for (idx = 0; idx < this_0->numRegions; idx++ ) {
946         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
947         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
948             if (posixIDlen == sameChars) {
949                 /* Exact match */
950                 return this_0->regionMaps[idx].hostID;
951             }
952             bestIdxDiff = sameChars;
953             bestIdx = idx;
954         }
955     }
956     /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
957     /* We also have to make sure that sid and si and similar string subsets don't match. */
958     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
959         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
960     {
961         *status = U_USING_FALLBACK_WARNING;
962         return this_0->regionMaps[bestIdx].hostID;
963     }
964 
965     /*no match found */
966     *status = U_ILLEGAL_ARGUMENT_ERROR;
967     return this_0->regionMaps->hostID;
968 }
969 
970 static const char*
getPosixID(const ILcidPosixMap * this_0,uint32_t hostID)971 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
972 {
973     uint32_t i;
974     for (i = 0; i <= this_0->numRegions; i++)
975     {
976         if (this_0->regionMaps[i].hostID == hostID)
977         {
978             return this_0->regionMaps[i].posixID;
979         }
980     }
981 
982     /* If you get here, then no matching region was found,
983        so return the language id with the wild card region. */
984     return this_0->regionMaps[0].posixID;
985 }
986 
987 /*
988 //////////////////////////////////////
989 //
990 // LCID --> POSIX
991 //
992 /////////////////////////////////////
993 */
994 #ifdef USE_WINDOWS_LOCALE_API
995 /*
996  * Various language tags needs to be changed:
997  * quz -> qu
998  * prs -> fa
999  */
1000 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
1001     if (len >= 3) { \
1002         if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1003             buffer[2] = 0; \
1004             uprv_strcat(buffer, buffer+3); \
1005         } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1006             buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1007             uprv_strcat(buffer, buffer+3); \
1008         } \
1009     }
1010 
1011 #endif
1012 U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid,char * posixID,int32_t posixIDCapacity,UErrorCode * status)1013 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1014 {
1015     uint16_t langID;
1016     uint32_t localeIndex;
1017     UBool bLookup = TRUE;
1018     const char *pPosixID = NULL;
1019 
1020 #ifdef USE_WINDOWS_LOCALE_API
1021     // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1022     // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1023     // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1024     // use the Windows API to resolve locale ID for this specific case.
1025     if ((hostid & 0x3FF) != 0x92) {
1026         int32_t tmpLen = 0;
1027         char locName[157];  /* ULOC_FULLNAME_CAPACITY */
1028 
1029         tmpLen = GetLocaleInfoA(hostid, LOCALE_SNAME, (LPSTR)locName, UPRV_LENGTHOF(locName));
1030         if (tmpLen > 1) {
1031             /* Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1032             In such case, we need special mapping data found in the hardcoded table
1033             in this source file. */
1034             char *p = uprv_strchr(locName, '_');
1035             if (p) {
1036                 /* Keep the base locale, without variant */
1037                 *p = 0;
1038                 tmpLen = uprv_strlen(locName);
1039             }
1040             else {
1041                 /* No hardcoded table lookup necessary */
1042                 bLookup = FALSE;
1043             }
1044             /* Change the tag separator from '-' to '_' */
1045             p = locName;
1046             while (*p) {
1047                 if (*p == '-') {
1048                     *p = '_';
1049                 }
1050                 p++;
1051             }
1052             FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1053             pPosixID = locName;
1054         }
1055     }
1056 #endif
1057     if (bLookup) {
1058         const char *pCandidate = NULL;
1059         langID = LANGUAGE_LCID(hostid);
1060 
1061         for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1062             if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1063                 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1064                 break;
1065             }
1066         }
1067 
1068         /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1069            If a match in the hardcoded table is longer than the Windows locale name without
1070            variant, we use the one as the result */
1071         if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1072             pPosixID = pCandidate;
1073         }
1074     }
1075 
1076     if (pPosixID) {
1077         int32_t resLen = uprv_strlen(pPosixID);
1078         int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1079         uprv_memcpy(posixID, pPosixID, copyLen);
1080         if (resLen < posixIDCapacity) {
1081             posixID[resLen] = 0;
1082             if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1083                 *status = U_ZERO_ERROR;
1084             }
1085         } else if (resLen == posixIDCapacity) {
1086             *status = U_STRING_NOT_TERMINATED_WARNING;
1087         } else {
1088             *status = U_BUFFER_OVERFLOW_ERROR;
1089         }
1090         return resLen;
1091     }
1092 
1093     /* no match found */
1094     *status = U_ILLEGAL_ARGUMENT_ERROR;
1095     return -1;
1096 }
1097 
1098 /*
1099 //////////////////////////////////////
1100 //
1101 // POSIX --> LCID
1102 // This should only be called from uloc_getLCID.
1103 // The locale ID must be in canonical form.
1104 // langID is separate so that this file doesn't depend on the uloc_* API.
1105 //
1106 /////////////////////////////////////
1107 */
1108 
1109 U_CAPI uint32_t
uprv_convertToLCID(const char * langID,const char * posixID,UErrorCode * status)1110 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1111 {
1112 
1113     uint32_t   low    = 0;
1114     uint32_t   high   = gLocaleCount;
1115     uint32_t   mid;
1116     uint32_t   oldmid = 0;
1117     int32_t    compVal;
1118 
1119     uint32_t   value         = 0;
1120     uint32_t   fallbackValue = (uint32_t)-1;
1121     UErrorCode myStatus;
1122     uint32_t   idx;
1123 
1124     /* Check for incomplete id. */
1125     if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1126         return 0;
1127     }
1128 
1129     /*Binary search for the map entry for normal cases */
1130 
1131     while (high > low)  /*binary search*/{
1132 
1133         mid = (high+low) >> 1; /*Finds median*/
1134 
1135         if (mid == oldmid)
1136             break;
1137 
1138         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1139         if (compVal < 0){
1140             high = mid;
1141         }
1142         else if (compVal > 0){
1143             low = mid;
1144         }
1145         else /*we found it*/{
1146             return getHostID(&gPosixIDmap[mid], posixID, status);
1147         }
1148         oldmid = mid;
1149     }
1150 
1151     /*
1152      * Sometimes we can't do a binary search on posixID because some LCIDs
1153      * go to different locales.  We hit one of those special cases.
1154      */
1155     for (idx = 0; idx < gLocaleCount; idx++ ) {
1156         myStatus = U_ZERO_ERROR;
1157         value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1158         if (myStatus == U_ZERO_ERROR) {
1159             return value;
1160         }
1161         else if (myStatus == U_USING_FALLBACK_WARNING) {
1162             fallbackValue = value;
1163         }
1164     }
1165 
1166     if (fallbackValue != (uint32_t)-1) {
1167         *status = U_USING_FALLBACK_WARNING;
1168         return fallbackValue;
1169     }
1170 
1171     /* no match found */
1172     *status = U_ILLEGAL_ARGUMENT_ERROR;
1173     return 0;   /* return international (root) */
1174 }
1175 
1176