• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  *   Copyright (C) 1996-2016, International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  **********************************************************************
8  *
9  * Provides functionality for mapping between
10  * LCID and Posix IDs or ICU locale to codepage
11  *
12  * Note: All classes and code in this file are
13  *       intended for internal use only.
14  *
15  * Methods of interest:
16  *   unsigned long convertToLCID(const char*);
17  *   const char* convertToPosix(unsigned long);
18  *
19  * Kathleen Wilson, 4/30/96
20  *
21  *  Date        Name        Description
22  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
23  *                          setId() method and safety check against
24  *                          MAX_ID_LENGTH.
25  * 04/23/99     stephen     Added C wrapper for convertToPosix.
26  * 09/18/00     george      Removed the memory leaks.
27  * 08/23/01     george      Convert to C
28  */
29 
30 #include "locmap.h"
31 #include "cstring.h"
32 #include "cmemory.h"
33 #include "unicode/uloc.h"
34 
35 #if 0
36 #if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
37 /*
38  * TODO: It seems like we should widen this to
39  * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
40  * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
41  * but those use gcc and won't have defined(_MSC_VER).
42  * We might need to #include some Windows header and test for some version macro from there.
43  * Or call some Windows function and see what it returns.
44  */
45 #define USE_WINDOWS_LCID_MAPPING_API
46 #include <windows.h>
47 #include <winnls.h>
48 #endif
49 #endif
50 
51 /*
52  * Note:
53  * The mapping from Win32 locale ID numbers to POSIX locale strings should
54  * be the faster one.
55  *
56  * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
57  * [MS-LCID] Windows Language Code Identifier (LCID) Reference
58  */
59 
60 /*
61 ////////////////////////////////////////////////
62 //
63 // Internal Classes for LCID <--> POSIX Mapping
64 //
65 /////////////////////////////////////////////////
66 */
67 
68 typedef struct ILcidPosixElement
69 {
70     const uint32_t hostID;
71     const char * const posixID;
72 } ILcidPosixElement;
73 
74 typedef struct ILcidPosixMap
75 {
76     const uint32_t numRegions;
77     const struct ILcidPosixElement* const regionMaps;
78 } ILcidPosixMap;
79 
80 
81 /*
82 /////////////////////////////////////////////////
83 //
84 // Easy macros to make the LCID <--> POSIX Mapping
85 //
86 /////////////////////////////////////////////////
87 */
88 
89 /**
90  * The standard one language/one country mapping for LCID.
91  * The first element must be the language, and the following
92  * elements are the language with the country.
93  * @param hostID LCID in host format such as 0x044d
94  * @param languageID posix ID of just the language such as 'de'
95  * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
96  */
97 #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
98 static const ILcidPosixElement locmap_ ## languageID [] = { \
99     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
100     {hostID, #posixID}, \
101 };
102 
103 /**
104  * Define a subtable by ID
105  * @param id the POSIX ID, either a language or language_TERRITORY
106  */
107 #define ILCID_POSIX_SUBTABLE(id) \
108 static const ILcidPosixElement locmap_ ## id [] =
109 
110 
111 /**
112  * Create the map for the posixID. This macro supposes that the language string
113  * name is the same as the global variable name, and that the first element
114  * in the ILcidPosixElement is just the language.
115  * @param _posixID the full POSIX ID for this entry.
116  */
117 #define ILCID_POSIX_MAP(_posixID) \
118     {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
119 
120 /*
121 ////////////////////////////////////////////
122 //
123 // Create the table of LCID to POSIX Mapping
124 // None of it should be dynamically created.
125 //
126 // Keep static locale variables inside the function so that
127 // it can be created properly during static init.
128 //
129 // Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier
130 //       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
131 //
132 //       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
133 //       maintained for support of older Windows version.
134 //       Update: Windows 7 (091130)
135 //
136 // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
137 //       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
138 //       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
139 //       to support other keywords in this mapping data, we must update the implementation.
140 ////////////////////////////////////////////
141 */
142 
143 // TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as
144 // LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
145 
146 ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
147 
ILCID_POSIX_SUBTABLE(ar)148 ILCID_POSIX_SUBTABLE(ar) {
149     {0x01,   "ar"},
150     {0x3801, "ar_AE"},
151     {0x3c01, "ar_BH"},
152     {0x1401, "ar_DZ"},
153     {0x0c01, "ar_EG"},
154     {0x0801, "ar_IQ"},
155     {0x2c01, "ar_JO"},
156     {0x3401, "ar_KW"},
157     {0x3001, "ar_LB"},
158     {0x1001, "ar_LY"},
159     {0x1801, "ar_MA"},
160     {0x1801, "ar_MO"},
161     {0x2001, "ar_OM"},
162     {0x4001, "ar_QA"},
163     {0x0401, "ar_SA"},
164     {0x2801, "ar_SY"},
165     {0x1c01, "ar_TN"},
166     {0x2401, "ar_YE"}
167 };
168 
169 ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
170 ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
171 ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
172 
ILCID_POSIX_SUBTABLE(az)173 ILCID_POSIX_SUBTABLE(az) {
174     {0x2c,   "az"},
175     {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
176     {0x742c, "az_Cyrl"},  /* Cyrillic based */
177     {0x042c, "az_Latn_AZ"}, /* Latin based */
178     {0x782c, "az_Latn"}, /* Latin based */
179     {0x042c, "az_AZ"} /* Latin based */
180 };
181 
182 ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
183 ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
184 
185 /*ILCID_POSIX_SUBTABLE(ber) {
186     {0x5f,   "ber"},
187     {0x045f, "ber_Arab_DZ"},
188     {0x045f, "ber_Arab"},
189     {0x085f, "ber_Latn_DZ"},
190     {0x085f, "ber_Latn"}
191 };*/
192 
193 ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
194 
ILCID_POSIX_SUBTABLE(bin)195 ILCID_POSIX_SUBTABLE(bin) {
196     {0x66, "bin"},
197     {0x0466, "bin_NG"}
198 };
199 
ILCID_POSIX_SUBTABLE(bn)200 ILCID_POSIX_SUBTABLE(bn) {
201     {0x45,   "bn"},
202     {0x0845, "bn_BD"},
203     {0x0445, "bn_IN"}
204 };
205 
ILCID_POSIX_SUBTABLE(bo)206 ILCID_POSIX_SUBTABLE(bo) {
207     {0x51,   "bo"},
208     {0x0851, "bo_BT"},
209     {0x0451, "bo_CN"},
210     {0x0c51, "dz_BT"}
211 };
212 
213 ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
214 
ILCID_POSIX_SUBTABLE(ca)215 ILCID_POSIX_SUBTABLE(ca) {
216     {0x03,   "ca"},
217     {0x0403, "ca_ES"},
218     {0x0803, "ca_ES_VALENCIA"}
219 };
220 
221 ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
222 
ILCID_POSIX_SUBTABLE(chr)223 ILCID_POSIX_SUBTABLE(chr) {
224     {0x05c,  "chr"},
225     {0x7c5c, "chr_Cher"},
226     {0x045c, "chr_Cher_US"},
227     {0x045c, "chr_US"}
228 };
229 
230 // ICU has chosen different names for these.
ILCID_POSIX_SUBTABLE(ckb)231 ILCID_POSIX_SUBTABLE(ckb) {
232     {0x92,   "ckb"},
233     {0x7c92, "ckb_Arab"},
234     {0x0492, "ckb_Arab_IQ"}
235 };
236 
237 /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
238 ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
239 
240 ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
241 ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
242 
243 // Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
ILCID_POSIX_SUBTABLE(de)244 ILCID_POSIX_SUBTABLE(de) {
245     {0x07,   "de"},
246     {0x0c07, "de_AT"},
247     {0x0807, "de_CH"},
248     {0x0407, "de_DE"},
249     {0x1407, "de_LI"},
250     {0x1007, "de_LU"},
251     {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
252     {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
253 };
254 
255 ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
256 ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
257 
258 // Windows uses an empty string for 'invariant'
ILCID_POSIX_SUBTABLE(en)259 ILCID_POSIX_SUBTABLE(en) {
260     {0x09,   "en"},
261     {0x0c09, "en_AU"},
262     {0x2809, "en_BZ"},
263     {0x1009, "en_CA"},
264     {0x0809, "en_GB"},
265     {0x3c09, "en_HK"},
266     {0x3809, "en_ID"},
267     {0x1809, "en_IE"},
268     {0x4009, "en_IN"},
269     {0x2009, "en_JM"},
270     {0x4409, "en_MY"},
271     {0x1409, "en_NZ"},
272     {0x3409, "en_PH"},
273     {0x4809, "en_SG"},
274     {0x2C09, "en_TT"},
275     {0x0409, "en_US"},
276     {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
277     {0x2409, "en_029"},
278     {0x1c09, "en_ZA"},
279     {0x3009, "en_ZW"},
280     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
281     {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
282     {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
283     {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
284     {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
285     {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
286 };
287 
ILCID_POSIX_SUBTABLE(en_US_POSIX)288 ILCID_POSIX_SUBTABLE(en_US_POSIX) {
289     {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
290 };
291 
292 // Windows doesn't know POSIX or BCP47 Unicode traditional sort names
ILCID_POSIX_SUBTABLE(es)293 ILCID_POSIX_SUBTABLE(es) {
294     {0x0a,   "es"},
295     {0x2c0a, "es_AR"},
296     {0x400a, "es_BO"},
297     {0x340a, "es_CL"},
298     {0x240a, "es_CO"},
299     {0x140a, "es_CR"},
300     {0x5c0a, "es_CU"},
301     {0x1c0a, "es_DO"},
302     {0x300a, "es_EC"},
303     {0x0c0a, "es_ES"},      /*Modern sort.*/
304     {0x100a, "es_GT"},
305     {0x480a, "es_HN"},
306     {0x080a, "es_MX"},
307     {0x4c0a, "es_NI"},
308     {0x180a, "es_PA"},
309     {0x280a, "es_PE"},
310     {0x500a, "es_PR"},
311     {0x3c0a, "es_PY"},
312     {0x440a, "es_SV"},
313     {0x540a, "es_US"},
314     {0x380a, "es_UY"},
315     {0x200a, "es_VE"},
316     {0x580a, "es_419"},
317     {0x040a, "es_ES@collation=traditional"},
318     {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
319 };
320 
321 ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
322 ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
323 
324 /* ISO-639 doesn't distinguish between Persian and Dari.*/
ILCID_POSIX_SUBTABLE(fa)325 ILCID_POSIX_SUBTABLE(fa) {
326     {0x29,   "fa"},
327     {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
328     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
329 };
330 
331 
332 /* duplicate for roundtripping */
ILCID_POSIX_SUBTABLE(fa_AF)333 ILCID_POSIX_SUBTABLE(fa_AF) {
334     {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
335     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
336 };
337 
ILCID_POSIX_SUBTABLE(ff)338 ILCID_POSIX_SUBTABLE(ff) {
339     {0x67,   "ff"},
340     {0x7c67, "ff_Latn"},
341     {0x0867, "ff_Latn_SN"},
342     {0x0467, "ff_NG"}
343 };
344 
345 ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
346 ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
347 ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
348 
ILCID_POSIX_SUBTABLE(fr)349 ILCID_POSIX_SUBTABLE(fr) {
350     {0x0c,   "fr"},
351     {0x080c, "fr_BE"},
352     {0x0c0c, "fr_CA"},
353     {0x240c, "fr_CD"},
354     {0x240c, "fr_CG"},
355     {0x100c, "fr_CH"},
356     {0x300c, "fr_CI"},
357     {0x2c0c, "fr_CM"},
358     {0x040c, "fr_FR"},
359     {0x3c0c, "fr_HT"},
360     {0x140c, "fr_LU"},
361     {0x380c, "fr_MA"},
362     {0x180c, "fr_MC"},
363     {0x340c, "fr_ML"},
364     {0x200c, "fr_RE"},
365     {0x280c, "fr_SN"},
366     {0xe40c, "fr_015"},
367     {0x1c0c, "fr_029"}
368 };
369 
370 ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
371 
372 ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
373 
ILCID_POSIX_SUBTABLE(ga)374 ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
375     {0x3c,   "ga"},
376     {0x083c, "ga_IE"},
377     {0x043c, "gd_GB"}
378 };
379 
ILCID_POSIX_SUBTABLE(gd)380 ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
381     {0x91,   "gd"},
382     {0x0491, "gd_GB"}
383 };
384 
385 ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
386 ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
387 ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
388 ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
389 
ILCID_POSIX_SUBTABLE(ha)390 ILCID_POSIX_SUBTABLE(ha) {
391     {0x68,   "ha"},
392     {0x7c68, "ha_Latn"},
393     {0x0468, "ha_Latn_NG"},
394 };
395 
396 ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
397 ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
398 ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
399 
400 /* This LCID is really four different locales.*/
ILCID_POSIX_SUBTABLE(hr)401 ILCID_POSIX_SUBTABLE(hr) {
402     {0x1a,   "hr"},
403     {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
404     {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
405     {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
406     {0x781a, "bs"},     /* Bosnian */
407     {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
408     {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
409     {0x101a, "hr_BA"},  /* Croatian in Bosnia */
410     {0x041a, "hr_HR"},  /* Croatian*/
411     {0x2c1a, "sr_Latn_ME"},
412     {0x241a, "sr_Latn_RS"},
413     {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
414     {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
415     {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
416     {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
417     {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
418     {0x301a, "sr_Cyrl_ME"},
419     {0x281a, "sr_Cyrl_RS"},
420     {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
421     {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
422 };
423 
ILCID_POSIX_SUBTABLE(hsb)424 ILCID_POSIX_SUBTABLE(hsb) {
425     {0x2E,   "hsb"},
426     {0x042E, "hsb_DE"},
427     {0x082E, "dsb_DE"},
428     {0x7C2E, "dsb"},
429 };
430 
431 ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
432 ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
433 
ILCID_POSIX_SUBTABLE(ibb)434 ILCID_POSIX_SUBTABLE(ibb) {
435     {0x69, "ibb"},
436     {0x0469, "ibb_NG"}
437 };
438 
439 ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
440 ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
441 ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
442 ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
443 
ILCID_POSIX_SUBTABLE(it)444 ILCID_POSIX_SUBTABLE(it) {
445     {0x10,   "it"},
446     {0x0810, "it_CH"},
447     {0x0410, "it_IT"}
448 };
449 
ILCID_POSIX_SUBTABLE(iu)450 ILCID_POSIX_SUBTABLE(iu) {
451     {0x5d,   "iu"},
452     {0x045d, "iu_Cans_CA"},
453     {0x785d, "iu_Cans"},
454     {0x085d, "iu_Latn_CA"},
455     {0x7c5d, "iu_Latn"}
456 };
457 
458 ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
459 ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
460 ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
461 ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
462 ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
463 ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
464 ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
465 
ILCID_POSIX_SUBTABLE(ko)466 ILCID_POSIX_SUBTABLE(ko) {
467     {0x12,   "ko"},
468     {0x0812, "ko_KP"},
469     {0x0412, "ko_KR"}
470 };
471 
472 ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
473 ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
474 
ILCID_POSIX_SUBTABLE(ks)475 ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
476     {0x60,   "ks"},
477     {0x0460, "ks_Arab_IN"},
478     {0x0860, "ks_Deva_IN"}
479 };
480 
481 ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
482 
ILCID_POSIX_SUBTABLE(la)483 ILCID_POSIX_SUBTABLE(la) {
484     {0x76,   "la"},
485     {0x0476, "la_001"},
486     {0x0476, "la_IT"}       /*Left in for compatibility*/
487 };
488 
489 ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
490 ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
491 ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
492 ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
493 ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
494 ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
495 ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
496 
ILCID_POSIX_SUBTABLE(mn)497 ILCID_POSIX_SUBTABLE(mn) {
498     {0x50,   "mn"},
499     {0x0450, "mn_MN"},
500     {0x7c50, "mn_Mong"},
501     {0x0850, "mn_Mong_CN"},
502     {0x0850, "mn_CN"},
503     {0x7850, "mn_Cyrl"},
504     {0x0c50, "mn_Mong_MN"}
505 };
506 
507 ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
508 ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
509 ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
510 
ILCID_POSIX_SUBTABLE(ms)511 ILCID_POSIX_SUBTABLE(ms) {
512     {0x3e,   "ms"},
513     {0x083e, "ms_BN"},   /* Brunei Darussalam*/
514     {0x043e, "ms_MY"}    /* Malaysia*/
515 };
516 
517 ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
518 ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
519 
ILCID_POSIX_SUBTABLE(ne)520 ILCID_POSIX_SUBTABLE(ne) {
521     {0x61,   "ne"},
522     {0x0861, "ne_IN"},   /* India*/
523     {0x0461, "ne_NP"}    /* Nepal*/
524 };
525 
ILCID_POSIX_SUBTABLE(nl)526 ILCID_POSIX_SUBTABLE(nl) {
527     {0x13,   "nl"},
528     {0x0813, "nl_BE"},
529     {0x0413, "nl_NL"}
530 };
531 
532 /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
533 // TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
ILCID_POSIX_SUBTABLE(no)534 ILCID_POSIX_SUBTABLE(no) {
535     {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */
536     {0x7c14, "nb"},     /* really nb */
537     {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
538     {0x0414, "no_NO"},  /* really nb_NO */
539     {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
540     {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
541     {0x0814, "no_NO_NY"}/* really nn_NO */
542 };
543 
544 ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
545 ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
546 
ILCID_POSIX_SUBTABLE(om)547 ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
548     {0x72,   "om"},
549     {0x0472, "om_ET"},
550     {0x0472, "gaz_ET"}
551 };
552 
553 /* Declared as or_IN to get around compiler errors*/
ILCID_POSIX_SUBTABLE(or_IN)554 ILCID_POSIX_SUBTABLE(or_IN) {
555     {0x48,   "or"},
556     {0x0448, "or_IN"},
557 };
558 
ILCID_POSIX_SUBTABLE(pa)559 ILCID_POSIX_SUBTABLE(pa) {
560     {0x46,   "pa"},
561     {0x0446, "pa_IN"},
562     {0x0846, "pa_Arab_PK"},
563     {0x0846, "pa_PK"}
564 };
565 
ILCID_POSIX_SUBTABLE(pap)566 ILCID_POSIX_SUBTABLE(pap) {
567     {0x79, "pap"},
568     {0x0479, "pap_029"},
569     {0x0479, "pap_AN"}     /*Left in for compatibility*/
570 };
571 
572 ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
573 ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
574 
ILCID_POSIX_SUBTABLE(pt)575 ILCID_POSIX_SUBTABLE(pt) {
576     {0x16,   "pt"},
577     {0x0416, "pt_BR"},
578     {0x0816, "pt_PT"}
579 };
580 
ILCID_POSIX_SUBTABLE(qu)581 ILCID_POSIX_SUBTABLE(qu) {
582     {0x6b,   "qu"},
583     {0x046b, "qu_BO"},
584     {0x086b, "qu_EC"},
585     {0x0C6b, "qu_PE"},
586     {0x046b, "quz_BO"},
587     {0x086b, "quz_EC"},
588     {0x0C6b, "quz_PE"}
589 };
590 
ILCID_POSIX_SUBTABLE(quc)591 ILCID_POSIX_SUBTABLE(quc) {
592     {0x93,   "quc"},
593     {0x0493, "quc_CO"},
594     /*
595         "quc_Latn_GT" is an exceptional case. Language ID of "quc"
596         is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
597         under the group of "qut". "qut" is a retired ISO 639-3 language
598         code for West Central Quiche, and merged to "quc".
599         It looks Windows previously reserved "qut" for K'iche', but,
600         decided to use "quc" when adding a locale for K'iche' (Guatemala).
601 
602         This data structure used here assumes language ID bits in
603         LCID is unique for alphabetic language code. But this is not true
604         for "quc_Latn_GT". If we don't have the data below, LCID look up
605         by alphabetic locale ID (POSIX) will fail. The same entry is found
606         under "qut" below, which is required for reverse look up.
607     */
608     {0x0486, "quc_Latn_GT"}
609 };
610 
ILCID_POSIX_SUBTABLE(qut)611 ILCID_POSIX_SUBTABLE(qut) {
612     {0x86,   "qut"},
613     {0x0486, "qut_GT"},
614     /*
615         See the note in "quc" above.
616     */
617     {0x0486, "quc_Latn_GT"}
618 };
619 
620 ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
621 
ILCID_POSIX_SUBTABLE(ro)622 ILCID_POSIX_SUBTABLE(ro) {
623     {0x18,   "ro"},
624     {0x0418, "ro_RO"},
625     {0x0818, "ro_MD"}
626 };
627 
628 // TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
629 // More likely this is a similar concept to the Windows 0x7f Invariant locale ""
630 // (Except that it's not invariant in ICU)
ILCID_POSIX_SUBTABLE(root)631 ILCID_POSIX_SUBTABLE(root) {
632     {0x00,   "root"}
633 };
634 
ILCID_POSIX_SUBTABLE(ru)635 ILCID_POSIX_SUBTABLE(ru) {
636     {0x19,   "ru"},
637     {0x0419, "ru_RU"},
638     {0x0819, "ru_MD"}
639 };
640 
641 ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
642 ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
643 ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
644 
ILCID_POSIX_SUBTABLE(sd)645 ILCID_POSIX_SUBTABLE(sd) {
646     {0x59,   "sd"},
647     {0x0459, "sd_Deva_IN"},
648     {0x0459, "sd_IN"},
649     {0x0859, "sd_Arab_PK"},
650     {0x0859, "sd_PK"},
651     {0x7c59, "sd_Arab"}
652 };
653 
ILCID_POSIX_SUBTABLE(se)654 ILCID_POSIX_SUBTABLE(se) {
655     {0x3b,   "se"},
656     {0x0c3b, "se_FI"},
657     {0x043b, "se_NO"},
658     {0x083b, "se_SE"},
659     {0x783b, "sma"},
660     {0x183b, "sma_NO"},
661     {0x1c3b, "sma_SE"},
662     {0x7c3b, "smj"},
663     {0x703b, "smn"},
664     {0x743b, "sms"},
665     {0x103b, "smj_NO"},
666     {0x143b, "smj_SE"},
667     {0x243b, "smn_FI"},
668     {0x203b, "sms_FI"},
669 };
670 
671 ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
672 ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
673 ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
674 
ILCID_POSIX_SUBTABLE(so)675 ILCID_POSIX_SUBTABLE(so) {
676     {0x77,   "so"},
677     {0x0477, "so_SO"}
678 };
679 
680 ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
681 ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
682 
ILCID_POSIX_SUBTABLE(sv)683 ILCID_POSIX_SUBTABLE(sv) {
684     {0x1d,   "sv"},
685     {0x081d, "sv_FI"},
686     {0x041d, "sv_SE"}
687 };
688 
689 ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
690 ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
691 
ILCID_POSIX_SUBTABLE(ta)692 ILCID_POSIX_SUBTABLE(ta) {
693     {0x49,   "ta"},
694     {0x0449, "ta_IN"},
695     {0x0849, "ta_LK"}
696 };
697 
698 ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
699 
700 /* Cyrillic based by default */
ILCID_POSIX_SUBTABLE(tg)701 ILCID_POSIX_SUBTABLE(tg) {
702     {0x28,   "tg"},
703     {0x7c28, "tg_Cyrl"},
704     {0x0428, "tg_Cyrl_TJ"}
705 };
706 
707 ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
708 
ILCID_POSIX_SUBTABLE(ti)709 ILCID_POSIX_SUBTABLE(ti) {
710     {0x73,   "ti"},
711     {0x0873, "ti_ER"},
712     {0x0473, "ti_ET"}
713 };
714 
715 ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
716 
ILCID_POSIX_SUBTABLE(tn)717 ILCID_POSIX_SUBTABLE(tn) {
718     {0x32,   "tn"},
719     {0x0832, "tn_BW"},
720     {0x0432, "tn_ZA"}
721 };
722 
723 ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
724 ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
725 ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
726 
ILCID_POSIX_SUBTABLE(tzm)727 ILCID_POSIX_SUBTABLE(tzm) {
728     {0x5f,   "tzm"},
729     {0x7c5f, "tzm_Latn"},
730     {0x085f, "tzm_Latn_DZ"},
731     {0x105f, "tzm_Tfng_MA"},
732     {0x045f, "tzm_Arab_MA"},
733     {0x045f, "tmz"}
734 };
735 
ILCID_POSIX_SUBTABLE(ug)736 ILCID_POSIX_SUBTABLE(ug) {
737     {0x80,   "ug"},
738     {0x0480, "ug_CN"},
739     {0x0480, "ug_Arab_CN"}
740 };
741 
742 ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
743 
ILCID_POSIX_SUBTABLE(ur)744 ILCID_POSIX_SUBTABLE(ur) {
745     {0x20,   "ur"},
746     {0x0820, "ur_IN"},
747     {0x0420, "ur_PK"}
748 };
749 
ILCID_POSIX_SUBTABLE(uz)750 ILCID_POSIX_SUBTABLE(uz) {
751     {0x43,   "uz"},
752     {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
753     {0x7843, "uz_Cyrl"},  /* Cyrillic based */
754     {0x0843, "uz_UZ"},  /* Cyrillic based */
755     {0x0443, "uz_Latn_UZ"}, /* Latin based */
756     {0x7c43, "uz_Latn"} /* Latin based */
757 };
758 
ILCID_POSIX_SUBTABLE(ve)759 ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
760     {0x33,   "ve"},
761     {0x0433, "ve_ZA"},
762     {0x0433, "ven_ZA"}
763 };
764 
765 ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
766 ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
767 ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
768 
ILCID_POSIX_SUBTABLE(yi)769 ILCID_POSIX_SUBTABLE(yi) {
770     {0x003d, "yi"},
771     {0x043d, "yi_001"}
772 };
773 
774 ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
775 
776 // Windows & ICU tend to different names for some of these
777 // TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
ILCID_POSIX_SUBTABLE(zh)778 ILCID_POSIX_SUBTABLE(zh) {
779     {0x0004, "zh_Hans"},
780     {0x7804, "zh"},
781     {0x0804, "zh_CN"},
782     {0x0804, "zh_Hans_CN"},
783     {0x0c04, "zh_Hant_HK"},
784     {0x0c04, "zh_HK"},
785     {0x1404, "zh_Hant_MO"},
786     {0x1404, "zh_MO"},
787     {0x1004, "zh_Hans_SG"},
788     {0x1004, "zh_SG"},
789     {0x0404, "zh_Hant_TW"},
790     {0x7c04, "zh_Hant"},
791     {0x0404, "zh_TW"},
792     {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
793     {0x30404,"zh_TW"},          /* Bopomofo order */
794     {0x20004,"zh@collation=stroke"},
795     {0x20404,"zh_Hant@collation=stroke"},
796     {0x20404,"zh_Hant_TW@collation=stroke"},
797     {0x20404,"zh_TW@collation=stroke"},
798     {0x20804,"zh_Hans@collation=stroke"},
799     {0x20804,"zh_Hans_CN@collation=stroke"},
800     {0x20804,"zh_CN@collation=stroke"}
801     // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
802 };
803 
804 ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
805 
806 /* This must be static and grouped by LCID. */
807 static const ILcidPosixMap gPosixIDmap[] = {
808     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
809     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
810     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
811     ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
812     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
813     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
814     ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
815     ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
816 /*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
817     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
818     ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
819     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
820     ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
821     ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
822     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
823     ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
824     ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
825     ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
826     ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
827     ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
828     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
829     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
830     ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
831     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
832     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
833     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
834     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
835     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
836     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
837     ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
838     ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
839     ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
840     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
841     ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
842     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
843     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
844     ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
845     ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
846     ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
847     ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
848     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
849     ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
850     ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
851     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
852     ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
853     ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
854     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
855     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
856     ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
857     ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
858     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
859     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
860     ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
861     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
862     ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
863     ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
864     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
865     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
866     ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
867     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
868     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
869     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
870     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
871     ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
872     ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
873     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
874     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
875     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
876     ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
877     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
878     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
879     ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
880     ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
881     ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
882     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
883     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
884     ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
885     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
886     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
887     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
888     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
889     ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
890     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
891     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
892     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
893     ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
894 /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
895     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
896     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
897 /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
898     ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
899     ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
900     ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
901     ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
902     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
903     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
904     ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
905     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
906     ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
907     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
908     ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
909     ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
910     ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
911     ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
912     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
913     ILCID_POSIX_MAP(root),  /*  root                          0x00 */
914     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
915     ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
916     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
917     ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
918     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
919     ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
920 /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
921     ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
922     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
923     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
924     ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
925     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
926 /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
927     ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
928     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
929     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
930     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
931     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
932     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
933     ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
934     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
935     ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
936     ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
937     ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
938     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
939     ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
940     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
941     ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
942     ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
943     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
944     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
945     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
946     ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
947     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
948     ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
949     ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
950     ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
951     ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
952     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
953     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
954 };
955 
956 static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
957 
958 /**
959  * Do not call this function. It is called by hostID.
960  * The function is not private because this struct must stay as a C struct,
961  * and this is an internal class.
962  */
963 static int32_t
idCmp(const char * id1,const char * id2)964 idCmp(const char* id1, const char* id2)
965 {
966     int32_t diffIdx = 0;
967     while (*id1 == *id2 && *id1 != 0) {
968         diffIdx++;
969         id1++;
970         id2++;
971     }
972     return diffIdx;
973 }
974 
975 /**
976  * Searches for a Windows LCID
977  *
978  * @param posixid the Posix style locale id.
979  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
980  *               no equivalent Windows LCID.
981  * @return the LCID
982  */
983 static uint32_t
getHostID(const ILcidPosixMap * this_0,const char * posixID,UErrorCode * status)984 getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
985 {
986     int32_t bestIdx = 0;
987     int32_t bestIdxDiff = 0;
988     int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
989     uint32_t idx;
990 
991     for (idx = 0; idx < this_0->numRegions; idx++ ) {
992         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
993         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
994             if (posixIDlen == sameChars) {
995                 /* Exact match */
996                 return this_0->regionMaps[idx].hostID;
997             }
998             bestIdxDiff = sameChars;
999             bestIdx = idx;
1000         }
1001     }
1002     /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
1003     /* We also have to make sure that sid and si and similar string subsets don't match. */
1004     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
1005         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
1006     {
1007         *status = U_USING_FALLBACK_WARNING;
1008         return this_0->regionMaps[bestIdx].hostID;
1009     }
1010 
1011     /*no match found */
1012     *status = U_ILLEGAL_ARGUMENT_ERROR;
1013     return this_0->regionMaps->hostID;
1014 }
1015 
1016 static const char*
getPosixID(const ILcidPosixMap * this_0,uint32_t hostID)1017 getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
1018 {
1019     uint32_t i;
1020     for (i = 0; i < this_0->numRegions; i++)
1021     {
1022         if (this_0->regionMaps[i].hostID == hostID)
1023         {
1024             return this_0->regionMaps[i].posixID;
1025         }
1026     }
1027 
1028     /* If you get here, then no matching region was found,
1029        so return the language id with the wild card region. */
1030     return this_0->regionMaps[0].posixID;
1031 }
1032 
1033 /*
1034 //////////////////////////////////////
1035 //
1036 // LCID --> POSIX
1037 //
1038 /////////////////////////////////////
1039 */
1040 #ifdef USE_WINDOWS_LCID_MAPPING_API
1041 /*
1042  * Various language tags needs to be changed:
1043  * quz -> qu
1044  * prs -> fa
1045  */
1046 #define FIX_LANGUAGE_ID_TAG(buffer, len) \
1047     if (len >= 3) { \
1048         if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1049             buffer[2] = 0; \
1050             uprv_strcat(buffer, buffer+3); \
1051         } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1052             buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1053             uprv_strcat(buffer, buffer+3); \
1054         } \
1055     }
1056 
1057 #endif
1058 U_CAPI int32_t
uprv_convertToPosix(uint32_t hostid,char * posixID,int32_t posixIDCapacity,UErrorCode * status)1059 uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1060 {
1061     uint16_t langID;
1062     uint32_t localeIndex;
1063     UBool bLookup = TRUE;
1064     const char *pPosixID = NULL;
1065 
1066 #ifdef USE_WINDOWS_LCID_MAPPING_API
1067     char locName[LOCALE_NAME_MAX_LENGTH] = {};      // ICU name can't be longer than Windows name
1068 
1069     // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1070     // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1071     // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1072     // use the Windows API to resolve locale ID for this specific case.
1073     if ((hostid & 0x3FF) != 0x92) {
1074         int32_t tmpLen = 0;
1075         UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH];  // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
1076 
1077         // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1078         tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1079         if (tmpLen > 1) {
1080             int32_t i = 0;
1081             // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1082             bLookup = FALSE;
1083             for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1084             {
1085                 locName[i] = (char)(windowsLocaleName[i]);
1086 
1087                 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1088                 // In such cases, we need special mapping data found in the hardcoded table
1089                 // in this source file.
1090                 if (windowsLocaleName[i] == L'_')
1091                 {
1092                     // Keep the base locale, without variant
1093                     // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1094                     locName[i] = '\0';
1095                     tmpLen = i;
1096                     bLookup = TRUE;
1097                     break;
1098                 }
1099                 else if (windowsLocaleName[i] == L'-')
1100                 {
1101                     // Windows names use -, ICU uses _
1102                     locName[i] = '_';
1103                 }
1104                 else if (windowsLocaleName[i] == L'\0')
1105                 {
1106                     // No point in doing more work than necessary
1107                     break;
1108                 }
1109             }
1110             // TODO: Need to understand this better, why isn't it an alias?
1111             FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1112             pPosixID = locName;
1113         }
1114     }
1115 #endif // USE_WINDOWS_LCID_MAPPING_API
1116 
1117     if (bLookup) {
1118         const char *pCandidate = NULL;
1119         langID = LANGUAGE_LCID(hostid);
1120 
1121         for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1122             if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1123                 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1124                 break;
1125             }
1126         }
1127 
1128         /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1129            If a match in the hardcoded table is longer than the Windows locale name without
1130            variant, we use the one as the result */
1131         if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1132             pPosixID = pCandidate;
1133         }
1134     }
1135 
1136     if (pPosixID) {
1137         int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
1138         int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1139         uprv_memcpy(posixID, pPosixID, copyLen);
1140         if (resLen < posixIDCapacity) {
1141             posixID[resLen] = 0;
1142             if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1143                 *status = U_ZERO_ERROR;
1144             }
1145         } else if (resLen == posixIDCapacity) {
1146             *status = U_STRING_NOT_TERMINATED_WARNING;
1147         } else {
1148             *status = U_BUFFER_OVERFLOW_ERROR;
1149         }
1150         return resLen;
1151     }
1152 
1153     /* no match found */
1154     *status = U_ILLEGAL_ARGUMENT_ERROR;
1155     return -1;
1156 }
1157 
1158 /*
1159 //////////////////////////////////////
1160 //
1161 // POSIX --> LCID
1162 // This should only be called from uloc_getLCID.
1163 // The locale ID must be in canonical form.
1164 //
1165 /////////////////////////////////////
1166 */
1167 U_CAPI uint32_t
uprv_convertToLCIDPlatform(const char * localeID)1168 uprv_convertToLCIDPlatform(const char* localeID)
1169 {
1170     // The purpose of this function is to leverage native platform name->lcid
1171     // conversion functionality when available.
1172 #ifdef USE_WINDOWS_LCID_MAPPING_API
1173     DWORD nameLCIDFlags = 0;
1174     UErrorCode myStatus = U_ZERO_ERROR;
1175 
1176     // First check for a Windows name->LCID match, fall through to catch
1177     // ICU special cases, but Windows may know it already.
1178 #if LOCALE_ALLOW_NEUTRAL_NAMES
1179     nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1180 #endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1181 
1182     int32_t len;
1183     char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1184     char baseName[ULOC_FULLNAME_CAPACITY] = {};
1185     const char * mylocaleID = localeID;
1186 
1187     // Check any for keywords.
1188     if (uprv_strchr(localeID, '@'))
1189     {
1190         len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1191         if (U_SUCCESS(myStatus) && len > 0)
1192         {
1193             // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1194             return 0;
1195         }
1196         else
1197         {
1198             // If the locale ID contains keywords other than collation, just use the base name.
1199             len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1200 
1201             if (U_SUCCESS(myStatus) && len > 0)
1202             {
1203                 baseName[len] = 0;
1204                 mylocaleID = baseName;
1205             }
1206         }
1207     }
1208 
1209     char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1210     // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1211     (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
1212 
1213     if (U_SUCCESS(myStatus))
1214     {
1215         // Need it to be UTF-16, not 8-bit
1216         wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1217         int32_t i;
1218         for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1219         {
1220             if (asciiBCP47Tag[i] == '\0')
1221             {
1222                 break;
1223             }
1224             else
1225             {
1226                 // Copy the character
1227                 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1228             }
1229         }
1230 
1231         if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1232         {
1233             // Ensure it's null terminated
1234             bcp47Tag[i] = L'\0';
1235             LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1236             if (lcid > 0)
1237             {
1238                 // Found LCID from windows, return that one, unless its completely ambiguous
1239                 // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1240                 // for this process.
1241                 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1242                 {
1243                     return lcid;
1244                 }
1245             }
1246         }
1247     }
1248 #else
1249     (void)localeID; // Suppress unused variable warning.
1250 #endif /* USE_WINDOWS_LCID_MAPPING_API */
1251 
1252     // No found, or not implemented on platforms without native name->lcid conversion
1253     return 0;
1254 }
1255 
1256 U_CAPI uint32_t
uprv_convertToLCID(const char * langID,const char * posixID,UErrorCode * status)1257 uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1258 {
1259     // This function does the table lookup when native platform name->lcid conversion isn't available,
1260     // or for locales that don't follow patterns the platform expects.
1261     uint32_t   low    = 0;
1262     uint32_t   high   = gLocaleCount;
1263     uint32_t   mid;
1264     uint32_t   oldmid = 0;
1265     int32_t    compVal;
1266 
1267     uint32_t   value         = 0;
1268     uint32_t   fallbackValue = (uint32_t)-1;
1269     UErrorCode myStatus;
1270     uint32_t   idx;
1271 
1272     /* Check for incomplete id. */
1273     if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1274         return 0;
1275     }
1276 
1277     /*Binary search for the map entry for normal cases */
1278 
1279     while (high > low)  /*binary search*/{
1280 
1281         mid = (high+low) >> 1; /*Finds median*/
1282 
1283         if (mid == oldmid)
1284             break;
1285 
1286         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1287         if (compVal < 0){
1288             high = mid;
1289         }
1290         else if (compVal > 0){
1291             low = mid;
1292         }
1293         else /*we found it*/{
1294             return getHostID(&gPosixIDmap[mid], posixID, status);
1295         }
1296         oldmid = mid;
1297     }
1298 
1299     /*
1300      * Sometimes we can't do a binary search on posixID because some LCIDs
1301      * go to different locales.  We hit one of those special cases.
1302      */
1303     for (idx = 0; idx < gLocaleCount; idx++ ) {
1304         myStatus = U_ZERO_ERROR;
1305         value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1306         if (myStatus == U_ZERO_ERROR) {
1307             return value;
1308         }
1309         else if (myStatus == U_USING_FALLBACK_WARNING) {
1310             fallbackValue = value;
1311         }
1312     }
1313 
1314     if (fallbackValue != (uint32_t)-1) {
1315         *status = U_USING_FALLBACK_WARNING;
1316         return fallbackValue;
1317     }
1318 
1319     /* no match found */
1320     *status = U_ILLEGAL_ARGUMENT_ERROR;
1321     return 0;   /* return international (root) */
1322 }
1323