1 /* setlocale() function that respects the locale chosen by the user.
2 Copyright (C) 2009, 2011, 2013, 2018-2019 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2009.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18 #ifdef HAVE_CONFIG_H
19 # include <config.h>
20 #endif
21
22 /* Override setlocale() and newlocale() so that when the default locale is
23 requested (locale = "") and no relevant environment variable is set, the
24 locale chosen by the user is used.
25 This matters on MacOS X 10 and Windows.
26 See the comments in localename.c, function gl_locale_name_default. */
27
28 #include <locale.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 /* When building a DLL, we must export some functions. Note that because
34 the functions are only defined for binary backward compatibility, we
35 don't need to use __declspec(dllimport) in any case. */
36 #if HAVE_VISIBILITY && BUILDING_DLL
37 # define DLL_EXPORTED __attribute__((__visibility__("default")))
38 #elif defined _MSC_VER && BUILDING_DLL
39 # define DLL_EXPORTED __declspec(dllexport)
40 #else
41 # define DLL_EXPORTED
42 #endif
43
44 #include "gettextP.h"
45
46 #if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE
47 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES
48 # include <CoreFoundation/CFLocale.h>
49 # elif HAVE_CFPREFERENCESCOPYAPPVALUE
50 # include <CoreFoundation/CFPreferences.h>
51 # endif
52 # include <CoreFoundation/CFPropertyList.h>
53 # include <CoreFoundation/CFArray.h>
54 # include <CoreFoundation/CFString.h>
55 #endif
56
57 #if (defined __APPLE__ && defined __MACH__) || defined _WIN32 || defined __CYGWIN__
58
59 # undef setlocale
60 # undef newlocale
61
62 /* Return string representation of locale category CATEGORY. */
63 static const char *
category_to_name(int category)64 category_to_name (int category)
65 {
66 const char *retval;
67
68 switch (category)
69 {
70 case LC_COLLATE:
71 retval = "LC_COLLATE";
72 break;
73 case LC_CTYPE:
74 retval = "LC_CTYPE";
75 break;
76 case LC_MONETARY:
77 retval = "LC_MONETARY";
78 break;
79 case LC_NUMERIC:
80 retval = "LC_NUMERIC";
81 break;
82 case LC_TIME:
83 retval = "LC_TIME";
84 break;
85 case LC_MESSAGES:
86 retval = "LC_MESSAGES";
87 break;
88 default:
89 /* If you have a better idea for a default value let me know. */
90 retval = "LC_XXX";
91 }
92
93 return retval;
94 }
95
96 # if defined _WIN32 && ! defined __CYGWIN__
97
98 /* The native Windows setlocale() function expects locale names of the form
99 "German" or "German_Germany" or "DEU", but not "de" or "de_DE". We need
100 to convert the names from the form with ISO 639 language code and ISO 3166
101 country code to the form with English names or with three-letter identifier.
102 The three-letter identifiers known by a Windows XP SP2 or SP3 are:
103 AFK Afrikaans_South Africa.1252
104 ARA Arabic_Saudi Arabia.1256
105 ARB Arabic_Lebanon.1256
106 ARE Arabic_Egypt.1256
107 ARG Arabic_Algeria.1256
108 ARH Arabic_Bahrain.1256
109 ARI Arabic_Iraq.1256
110 ARJ Arabic_Jordan.1256
111 ARK Arabic_Kuwait.1256
112 ARL Arabic_Libya.1256
113 ARM Arabic_Morocco.1256
114 ARO Arabic_Oman.1256
115 ARQ Arabic_Qatar.1256
116 ARS Arabic_Syria.1256
117 ART Arabic_Tunisia.1256
118 ARU Arabic_U.A.E..1256
119 ARY Arabic_Yemen.1256
120 AZE Azeri (Latin)_Azerbaijan.1254
121 BEL Belarusian_Belarus.1251
122 BGR Bulgarian_Bulgaria.1251
123 BSB Bosnian_Bosnia and Herzegovina.1250
124 BSC Bosnian (Cyrillic)_Bosnia and Herzegovina.1250 (wrong encoding!)
125 CAT Catalan_Spain.1252
126 CHH Chinese_Hong Kong S.A.R..950
127 CHI Chinese_Singapore.936
128 CHS Chinese_People's Republic of China.936
129 CHT Chinese_Taiwan.950
130 CSY Czech_Czech Republic.1250
131 CYM Welsh_United Kingdom.1252
132 DAN Danish_Denmark.1252
133 DEA German_Austria.1252
134 DEC German_Liechtenstein.1252
135 DEL German_Luxembourg.1252
136 DES German_Switzerland.1252
137 DEU German_Germany.1252
138 ELL Greek_Greece.1253
139 ENA English_Australia.1252
140 ENB English_Caribbean.1252
141 ENC English_Canada.1252
142 ENG English_United Kingdom.1252
143 ENI English_Ireland.1252
144 ENJ English_Jamaica.1252
145 ENL English_Belize.1252
146 ENP English_Republic of the Philippines.1252
147 ENS English_South Africa.1252
148 ENT English_Trinidad and Tobago.1252
149 ENU English_United States.1252
150 ENW English_Zimbabwe.1252
151 ENZ English_New Zealand.1252
152 ESA Spanish_Panama.1252
153 ESB Spanish_Bolivia.1252
154 ESC Spanish_Costa Rica.1252
155 ESD Spanish_Dominican Republic.1252
156 ESE Spanish_El Salvador.1252
157 ESF Spanish_Ecuador.1252
158 ESG Spanish_Guatemala.1252
159 ESH Spanish_Honduras.1252
160 ESI Spanish_Nicaragua.1252
161 ESL Spanish_Chile.1252
162 ESM Spanish_Mexico.1252
163 ESN Spanish_Spain.1252
164 ESO Spanish_Colombia.1252
165 ESP Spanish_Spain.1252
166 ESR Spanish_Peru.1252
167 ESS Spanish_Argentina.1252
168 ESU Spanish_Puerto Rico.1252
169 ESV Spanish_Venezuela.1252
170 ESY Spanish_Uruguay.1252
171 ESZ Spanish_Paraguay.1252
172 ETI Estonian_Estonia.1257
173 EUQ Basque_Spain.1252
174 FAR Farsi_Iran.1256
175 FIN Finnish_Finland.1252
176 FOS Faroese_Faroe Islands.1252
177 FPO Filipino_Philippines.1252
178 FRA French_France.1252
179 FRB French_Belgium.1252
180 FRC French_Canada.1252
181 FRL French_Luxembourg.1252
182 FRM French_Principality of Monaco.1252
183 FRS French_Switzerland.1252
184 FYN Frisian_Netherlands.1252
185 GLC Galician_Spain.1252
186 HEB Hebrew_Israel.1255
187 HRB Croatian_Bosnia and Herzegovina.1250
188 HRV Croatian_Croatia.1250
189 HUN Hungarian_Hungary.1250
190 IND Indonesian_Indonesia.1252
191 IRE Irish_Ireland.1252
192 ISL Icelandic_Iceland.1252
193 ITA Italian_Italy.1252
194 ITS Italian_Switzerland.1252
195 IUK Inuktitut (Latin)_Canada.1252
196 JPN Japanese_Japan.932
197 KKZ Kazakh_Kazakhstan.1251
198 KOR Korean_Korea.949
199 KYR Kyrgyz_Kyrgyzstan.1251
200 LBX Luxembourgish_Luxembourg.1252
201 LTH Lithuanian_Lithuania.1257
202 LVI Latvian_Latvia.1257
203 MKI FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251
204 MON Mongolian_Mongolia.1251
205 MPD Mapudungun_Chile.1252
206 MSB Malay_Brunei Darussalam.1252
207 MSL Malay_Malaysia.1252
208 MWK Mohawk_Canada.1252
209 NLB Dutch_Belgium.1252
210 NLD Dutch_Netherlands.1252
211 NON Norwegian-Nynorsk_Norway.1252
212 NOR Norwegian (Bokmål)_Norway.1252
213 NSO Northern Sotho_South Africa.1252
214 PLK Polish_Poland.1250
215 PTB Portuguese_Brazil.1252
216 PTG Portuguese_Portugal.1252
217 QUB Quechua_Bolivia.1252
218 QUE Quechua_Ecuador.1252
219 QUP Quechua_Peru.1252
220 RMC Romansh_Switzerland.1252
221 ROM Romanian_Romania.1250
222 RUS Russian_Russia.1251
223 SKY Slovak_Slovakia.1250
224 SLV Slovenian_Slovenia.1250
225 SMA Sami (Southern)_Norway.1252
226 SMB Sami (Southern)_Sweden.1252
227 SME Sami (Northern)_Norway.1252
228 SMF Sami (Northern)_Sweden.1252
229 SMG Sami (Northern)_Finland.1252
230 SMJ Sami (Lule)_Norway.1252
231 SMK Sami (Lule)_Sweden.1252
232 SMN Sami (Inari)_Finland.1252
233 SMS Sami (Skolt)_Finland.1252
234 SQI Albanian_Albania.1250
235 SRB Serbian (Cyrillic)_Serbia and Montenegro.1251
236 SRL Serbian (Latin)_Serbia and Montenegro.1250
237 SRN Serbian (Cyrillic)_Bosnia and Herzegovina.1251
238 SRS Serbian (Latin)_Bosnia and Herzegovina.1250
239 SVE Swedish_Sweden.1252
240 SVF Swedish_Finland.1252
241 SWK Swahili_Kenya.1252
242 THA Thai_Thailand.874
243 TRK Turkish_Turkey.1254
244 TSN Tswana_South Africa.1252
245 TTT Tatar_Russia.1251
246 UKR Ukrainian_Ukraine.1251
247 URD Urdu_Islamic Republic of Pakistan.1256
248 USA English_United States.1252
249 UZB Uzbek (Latin)_Uzbekistan.1254
250 VIT Vietnamese_Viet Nam.1258
251 XHO Xhosa_South Africa.1252
252 ZHH Chinese_Hong Kong S.A.R..950
253 ZHI Chinese_Singapore.936
254 ZHM Chinese_Macau S.A.R..950
255 ZUL Zulu_South Africa.1252
256 */
257
258 /* Table from ISO 639 language code, optionally with country or script suffix,
259 to English name.
260 Keep in sync with the gl_locale_name_from_win32_LANGID function in
261 localename.c! */
262 struct table_entry
263 {
264 const char *code;
265 const char *english;
266 };
267 static const struct table_entry language_table[] =
268 {
269 { "af", "Afrikaans" },
270 { "am", "Amharic" },
271 { "ar", "Arabic" },
272 { "arn", "Mapudungun" },
273 { "as", "Assamese" },
274 { "az@cyrillic", "Azeri (Cyrillic)" },
275 { "az@latin", "Azeri (Latin)" },
276 { "ba", "Bashkir" },
277 { "be", "Belarusian" },
278 { "ber", "Tamazight" },
279 { "ber@arabic", "Tamazight (Arabic)" },
280 { "ber@latin", "Tamazight (Latin)" },
281 { "bg", "Bulgarian" },
282 { "bin", "Edo" },
283 { "bn", "Bengali" },
284 { "bn_BD", "Bengali (Bangladesh)" },
285 { "bn_IN", "Bengali (India)" },
286 { "bnt", "Sutu" },
287 { "bo", "Tibetan" },
288 { "br", "Breton" },
289 { "bs", "BSB" }, /* "Bosnian (Latin)" */
290 { "bs@cyrillic", "BSC" }, /* Bosnian (Cyrillic) */
291 { "ca", "Catalan" },
292 { "chr", "Cherokee" },
293 { "co", "Corsican" },
294 { "cpe", "Hawaiian" },
295 { "cs", "Czech" },
296 { "cy", "Welsh" },
297 { "da", "Danish" },
298 { "de", "German" },
299 { "dsb", "Lower Sorbian" },
300 { "dv", "Divehi" },
301 { "el", "Greek" },
302 { "en", "English" },
303 { "es", "Spanish" },
304 { "et", "Estonian" },
305 { "eu", "Basque" },
306 { "fa", "Farsi" },
307 { "ff", "Fulfulde" },
308 { "fi", "Finnish" },
309 { "fo", "Faroese" }, /* "Faeroese" does not work */
310 { "fr", "French" },
311 { "fy", "Frisian" },
312 { "ga", "IRE" }, /* Gaelic (Ireland) */
313 { "gd", "Gaelic (Scotland)" },
314 { "gd", "Scottish Gaelic" },
315 { "gl", "Galician" },
316 { "gn", "Guarani" },
317 { "gsw", "Alsatian" },
318 { "gu", "Gujarati" },
319 { "ha", "Hausa" },
320 { "he", "Hebrew" },
321 { "hi", "Hindi" },
322 { "hr", "Croatian" },
323 { "hsb", "Upper Sorbian" },
324 { "hu", "Hungarian" },
325 { "hy", "Armenian" },
326 { "id", "Indonesian" },
327 { "ig", "Igbo" },
328 { "ii", "Yi" },
329 { "is", "Icelandic" },
330 { "it", "Italian" },
331 { "iu", "IUK" }, /* Inuktitut */
332 { "ja", "Japanese" },
333 { "ka", "Georgian" },
334 { "kk", "Kazakh" },
335 { "kl", "Greenlandic" },
336 { "km", "Cambodian" },
337 { "km", "Khmer" },
338 { "kn", "Kannada" },
339 { "ko", "Korean" },
340 { "kok", "Konkani" },
341 { "kr", "Kanuri" },
342 { "ks", "Kashmiri" },
343 { "ks_IN", "Kashmiri_India" },
344 { "ks_PK", "Kashmiri (Arabic)_Pakistan" },
345 { "ky", "Kyrgyz" },
346 { "la", "Latin" },
347 { "lb", "Luxembourgish" },
348 { "lo", "Lao" },
349 { "lt", "Lithuanian" },
350 { "lv", "Latvian" },
351 { "mi", "Maori" },
352 { "mk", "FYRO Macedonian" },
353 { "mk", "Macedonian" },
354 { "ml", "Malayalam" },
355 { "mn", "Mongolian" },
356 { "mni", "Manipuri" },
357 { "moh", "Mohawk" },
358 { "mr", "Marathi" },
359 { "ms", "Malay" },
360 { "mt", "Maltese" },
361 { "my", "Burmese" },
362 { "nb", "NOR" }, /* Norwegian Bokmål */
363 { "ne", "Nepali" },
364 { "nic", "Ibibio" },
365 { "nl", "Dutch" },
366 { "nn", "NON" }, /* Norwegian Nynorsk */
367 { "no", "Norwegian" },
368 { "nso", "Northern Sotho" },
369 { "nso", "Sepedi" },
370 { "oc", "Occitan" },
371 { "om", "Oromo" },
372 { "or", "Oriya" },
373 { "pa", "Punjabi" },
374 { "pap", "Papiamentu" },
375 { "pl", "Polish" },
376 { "prs", "Dari" },
377 { "ps", "Pashto" },
378 { "pt", "Portuguese" },
379 { "qu", "Quechua" },
380 { "qut", "K'iche'" },
381 { "rm", "Romansh" },
382 { "ro", "Romanian" },
383 { "ru", "Russian" },
384 { "rw", "Kinyarwanda" },
385 { "sa", "Sanskrit" },
386 { "sah", "Yakut" },
387 { "sd", "Sindhi" },
388 { "se", "Sami (Northern)" },
389 { "se", "Northern Sami" },
390 { "si", "Sinhalese" },
391 { "sk", "Slovak" },
392 { "sl", "Slovenian" },
393 { "sma", "Sami (Southern)" },
394 { "sma", "Southern Sami" },
395 { "smj", "Sami (Lule)" },
396 { "smj", "Lule Sami" },
397 { "smn", "Sami (Inari)" },
398 { "smn", "Inari Sami" },
399 { "sms", "Sami (Skolt)" },
400 { "sms", "Skolt Sami" },
401 { "so", "Somali" },
402 { "sq", "Albanian" },
403 { "sr", "Serbian (Latin)" },
404 { "sr@cyrillic", "SRB" }, /* Serbian (Cyrillic) */
405 { "sv", "Swedish" },
406 { "sw", "Swahili" },
407 { "syr", "Syriac" },
408 { "ta", "Tamil" },
409 { "te", "Telugu" },
410 { "tg", "Tajik" },
411 { "th", "Thai" },
412 { "ti", "Tigrinya" },
413 { "tk", "Turkmen" },
414 { "tl", "Filipino" },
415 { "tn", "Tswana" },
416 { "tr", "Turkish" },
417 { "ts", "Tsonga" },
418 { "tt", "Tatar" },
419 { "ug", "Uighur" },
420 { "uk", "Ukrainian" },
421 { "ur", "Urdu" },
422 { "uz", "Uzbek" },
423 { "uz", "Uzbek (Latin)" },
424 { "uz@cyrillic", "Uzbek (Cyrillic)" },
425 { "ve", "Venda" },
426 { "vi", "Vietnamese" },
427 { "wen", "Sorbian" },
428 { "wo", "Wolof" },
429 { "xh", "Xhosa" },
430 { "yi", "Yiddish" },
431 { "yo", "Yoruba" },
432 { "zh", "Chinese" },
433 { "zu", "Zulu" }
434 };
435
436 /* Table from ISO 3166 country code to English name.
437 Keep in sync with the gl_locale_name_from_win32_LANGID function in
438 localename.c! */
439 static const struct table_entry country_table[] =
440 {
441 { "AE", "U.A.E." },
442 { "AF", "Afghanistan" },
443 { "AL", "Albania" },
444 { "AM", "Armenia" },
445 { "AN", "Netherlands Antilles" },
446 { "AR", "Argentina" },
447 { "AT", "Austria" },
448 { "AU", "Australia" },
449 { "AZ", "Azerbaijan" },
450 { "BA", "Bosnia and Herzegovina" },
451 { "BD", "Bangladesh" },
452 { "BE", "Belgium" },
453 { "BG", "Bulgaria" },
454 { "BH", "Bahrain" },
455 { "BN", "Brunei Darussalam" },
456 { "BO", "Bolivia" },
457 { "BR", "Brazil" },
458 { "BT", "Bhutan" },
459 { "BY", "Belarus" },
460 { "BZ", "Belize" },
461 { "CA", "Canada" },
462 { "CG", "Congo" },
463 { "CH", "Switzerland" },
464 { "CI", "Cote d'Ivoire" },
465 { "CL", "Chile" },
466 { "CM", "Cameroon" },
467 { "CN", "People's Republic of China" },
468 { "CO", "Colombia" },
469 { "CR", "Costa Rica" },
470 { "CS", "Serbia and Montenegro" },
471 { "CZ", "Czech Republic" },
472 { "DE", "Germany" },
473 { "DK", "Denmark" },
474 { "DO", "Dominican Republic" },
475 { "DZ", "Algeria" },
476 { "EC", "Ecuador" },
477 { "EE", "Estonia" },
478 { "EG", "Egypt" },
479 { "ER", "Eritrea" },
480 { "ES", "Spain" },
481 { "ET", "Ethiopia" },
482 { "FI", "Finland" },
483 { "FO", "Faroe Islands" },
484 { "FR", "France" },
485 { "GB", "United Kingdom" },
486 { "GD", "Caribbean" },
487 { "GE", "Georgia" },
488 { "GL", "Greenland" },
489 { "GR", "Greece" },
490 { "GT", "Guatemala" },
491 { "HK", "Hong Kong" },
492 { "HK", "Hong Kong S.A.R." },
493 { "HN", "Honduras" },
494 { "HR", "Croatia" },
495 { "HT", "Haiti" },
496 { "HU", "Hungary" },
497 { "ID", "Indonesia" },
498 { "IE", "Ireland" },
499 { "IL", "Israel" },
500 { "IN", "India" },
501 { "IQ", "Iraq" },
502 { "IR", "Iran" },
503 { "IS", "Iceland" },
504 { "IT", "Italy" },
505 { "JM", "Jamaica" },
506 { "JO", "Jordan" },
507 { "JP", "Japan" },
508 { "KE", "Kenya" },
509 { "KG", "Kyrgyzstan" },
510 { "KH", "Cambodia" },
511 { "KR", "South Korea" },
512 { "KW", "Kuwait" },
513 { "KZ", "Kazakhstan" },
514 { "LA", "Laos" },
515 { "LB", "Lebanon" },
516 { "LI", "Liechtenstein" },
517 { "LK", "Sri Lanka" },
518 { "LT", "Lithuania" },
519 { "LU", "Luxembourg" },
520 { "LV", "Latvia" },
521 { "LY", "Libya" },
522 { "MA", "Morocco" },
523 { "MC", "Principality of Monaco" },
524 { "MD", "Moldava" },
525 { "MD", "Moldova" },
526 { "ME", "Montenegro" },
527 { "MK", "Former Yugoslav Republic of Macedonia" },
528 { "ML", "Mali" },
529 { "MM", "Myanmar" },
530 { "MN", "Mongolia" },
531 { "MO", "Macau S.A.R." },
532 { "MT", "Malta" },
533 { "MV", "Maldives" },
534 { "MX", "Mexico" },
535 { "MY", "Malaysia" },
536 { "NG", "Nigeria" },
537 { "NI", "Nicaragua" },
538 { "NL", "Netherlands" },
539 { "NO", "Norway" },
540 { "NP", "Nepal" },
541 { "NZ", "New Zealand" },
542 { "OM", "Oman" },
543 { "PA", "Panama" },
544 { "PE", "Peru" },
545 { "PH", "Philippines" },
546 { "PK", "Islamic Republic of Pakistan" },
547 { "PL", "Poland" },
548 { "PR", "Puerto Rico" },
549 { "PT", "Portugal" },
550 { "PY", "Paraguay" },
551 { "QA", "Qatar" },
552 { "RE", "Reunion" },
553 { "RO", "Romania" },
554 { "RS", "Serbia" },
555 { "RU", "Russia" },
556 { "RW", "Rwanda" },
557 { "SA", "Saudi Arabia" },
558 { "SE", "Sweden" },
559 { "SG", "Singapore" },
560 { "SI", "Slovenia" },
561 { "SK", "Slovak" },
562 { "SN", "Senegal" },
563 { "SO", "Somalia" },
564 { "SR", "Suriname" },
565 { "SV", "El Salvador" },
566 { "SY", "Syria" },
567 { "TH", "Thailand" },
568 { "TJ", "Tajikistan" },
569 { "TM", "Turkmenistan" },
570 { "TN", "Tunisia" },
571 { "TR", "Turkey" },
572 { "TT", "Trinidad and Tobago" },
573 { "TW", "Taiwan" },
574 { "TZ", "Tanzania" },
575 { "UA", "Ukraine" },
576 { "US", "United States" },
577 { "UY", "Uruguay" },
578 { "VA", "Vatican" },
579 { "VE", "Venezuela" },
580 { "VN", "Viet Nam" },
581 { "YE", "Yemen" },
582 { "ZA", "South Africa" },
583 { "ZW", "Zimbabwe" }
584 };
585
586 /* Given a string STRING, find the set of indices i such that TABLE[i].code is
587 the given STRING. It is a range [lo,hi-1]. */
588 typedef struct { size_t lo; size_t hi; } range_t;
589 static void
search(const struct table_entry * table,size_t table_size,const char * string,range_t * result)590 search (const struct table_entry *table, size_t table_size, const char *string,
591 range_t *result)
592 {
593 /* The table is sorted. Perform a binary search. */
594 size_t hi = table_size;
595 size_t lo = 0;
596 while (lo < hi)
597 {
598 /* Invariant:
599 for i < lo, strcmp (table[i].code, string) < 0,
600 for i >= hi, strcmp (table[i].code, string) > 0. */
601 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
602 int cmp = strcmp (table[mid].code, string);
603 if (cmp < 0)
604 lo = mid + 1;
605 else if (cmp > 0)
606 hi = mid;
607 else
608 {
609 /* Found an i with
610 strcmp (language_table[i].code, string) == 0.
611 Find the entire interval of such i. */
612 {
613 size_t i;
614
615 for (i = mid; i > lo; )
616 {
617 i--;
618 if (strcmp (table[i].code, string) < 0)
619 {
620 lo = i + 1;
621 break;
622 }
623 }
624 }
625 {
626 size_t i;
627
628 for (i = mid + 1; i < hi; i++)
629 {
630 if (strcmp (table[i].code, string) > 0)
631 {
632 hi = i;
633 break;
634 }
635 }
636 }
637 /* The set of i with
638 strcmp (language_table[i].code, string) == 0
639 is the interval [lo, hi-1]. */
640 break;
641 }
642 }
643 result->lo = lo;
644 result->hi = hi;
645 }
646
647 /* Like setlocale, but accept also locale names in the form ll or ll_CC,
648 where ll is an ISO 639 language code and CC is an ISO 3166 country code. */
649 static char *
setlocale_unixlike(int category,const char * locale)650 setlocale_unixlike (int category, const char *locale)
651 {
652 char *result;
653 char llCC_buf[64];
654 char ll_buf[64];
655 char CC_buf[64];
656
657 /* The native Windows implementation of setlocale understands the special
658 locale name "C", but not "POSIX". Therefore map "POSIX" to "C". */
659 if (locale != NULL && strcmp (locale, "POSIX") == 0)
660 locale = "C";
661
662 /* First, try setlocale with the original argument unchanged. */
663 result = setlocale (category, locale);
664 if (result != NULL)
665 return result;
666
667 /* Otherwise, assume the argument is in the form
668 language[_territory][.codeset][@modifier]
669 and try to map it using the tables. */
670 if (strlen (locale) < sizeof (llCC_buf))
671 {
672 /* Second try: Remove the codeset part. */
673 {
674 const char *p = locale;
675 char *q = llCC_buf;
676
677 /* Copy the part before the dot. */
678 for (; *p != '\0' && *p != '.'; p++, q++)
679 *q = *p;
680 if (*p == '.')
681 /* Skip the part up to the '@', if any. */
682 for (; *p != '\0' && *p != '@'; p++)
683 ;
684 /* Copy the part starting with '@', if any. */
685 for (; *p != '\0'; p++, q++)
686 *q = *p;
687 *q = '\0';
688 }
689 /* llCC_buf now contains
690 language[_territory][@modifier]
691 */
692 if (strcmp (llCC_buf, locale) != 0)
693 {
694 result = setlocale (category, llCC_buf);
695 if (result != NULL)
696 return result;
697 }
698 /* Look it up in language_table. */
699 {
700 range_t range;
701 size_t i;
702
703 search (language_table,
704 sizeof (language_table) / sizeof (language_table[0]),
705 llCC_buf,
706 &range);
707
708 for (i = range.lo; i < range.hi; i++)
709 {
710 /* Try the replacement in language_table[i]. */
711 result = setlocale (category, language_table[i].english);
712 if (result != NULL)
713 return result;
714 }
715 }
716 /* Split language[_territory][@modifier]
717 into ll_buf = language[@modifier]
718 and CC_buf = territory
719 */
720 {
721 const char *underscore = strchr (llCC_buf, '_');
722 if (underscore != NULL)
723 {
724 const char *territory_start = underscore + 1;
725 const char *territory_end = strchr (territory_start, '@');
726 if (territory_end == NULL)
727 territory_end = territory_start + strlen (territory_start);
728
729 memcpy (ll_buf, llCC_buf, underscore - llCC_buf);
730 strcpy (ll_buf + (underscore - llCC_buf), territory_end);
731
732 memcpy (CC_buf, territory_start, territory_end - territory_start);
733 CC_buf[territory_end - territory_start] = '\0';
734
735 {
736 /* Look up ll_buf in language_table
737 and CC_buf in country_table. */
738 range_t language_range;
739
740 search (language_table,
741 sizeof (language_table) / sizeof (language_table[0]),
742 ll_buf,
743 &language_range);
744 if (language_range.lo < language_range.hi)
745 {
746 range_t country_range;
747
748 search (country_table,
749 sizeof (country_table) / sizeof (country_table[0]),
750 CC_buf,
751 &country_range);
752 if (country_range.lo < country_range.hi)
753 {
754 size_t i;
755 size_t j;
756
757 for (i = language_range.lo; i < language_range.hi; i++)
758 for (j = country_range.lo; j < country_range.hi; j++)
759 {
760 /* Concatenate the replacements. */
761 const char *part1 = language_table[i].english;
762 size_t part1_len = strlen (part1);
763 const char *part2 = country_table[j].english;
764 size_t part2_len = strlen (part2) + 1;
765 char buf[64+64];
766
767 if (!(part1_len + 1 + part2_len <= sizeof (buf)))
768 abort ();
769 memcpy (buf, part1, part1_len);
770 buf[part1_len] = '_';
771 memcpy (buf + part1_len + 1, part2, part2_len);
772
773 /* Try the concatenated replacements. */
774 result = setlocale (category, buf);
775 if (result != NULL)
776 return result;
777 }
778 }
779
780 /* Try omitting the country entirely. This may set a locale
781 corresponding to the wrong country, but is better than
782 failing entirely. */
783 {
784 size_t i;
785
786 for (i = language_range.lo; i < language_range.hi; i++)
787 {
788 /* Try only the language replacement. */
789 result =
790 setlocale (category, language_table[i].english);
791 if (result != NULL)
792 return result;
793 }
794 }
795 }
796 }
797 }
798 }
799 }
800
801 /* Failed. */
802 return NULL;
803 }
804
805 # elif defined __ANDROID__
806
807 /* Like setlocale, but accept also the locale names "C" and "POSIX". */
808 static char *
setlocale_unixlike(int category,const char * locale)809 setlocale_unixlike (int category, const char *locale)
810 {
811 char *result = setlocale (category, locale);
812 if (result == NULL)
813 switch (category)
814 {
815 case LC_CTYPE:
816 case LC_NUMERIC:
817 case LC_TIME:
818 case LC_COLLATE:
819 case LC_MONETARY:
820 case LC_MESSAGES:
821 case LC_ALL:
822 case LC_PAPER:
823 case LC_NAME:
824 case LC_ADDRESS:
825 case LC_TELEPHONE:
826 case LC_MEASUREMENT:
827 if (locale == NULL
828 || strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
829 result = (char *) "C";
830 break;
831 default:
832 break;
833 }
834 return result;
835 }
836 # define setlocale setlocale_unixlike
837
838 # else
839 # define setlocale_unixlike setlocale
840 # endif
841
842 # if LC_MESSAGES == 1729
843
844 /* The system does not store an LC_MESSAGES locale category. Do it here. */
845 static char lc_messages_name[64] = "C";
846
847 /* Like setlocale, but support also LC_MESSAGES. */
848 static char *
setlocale_single(int category,const char * locale)849 setlocale_single (int category, const char *locale)
850 {
851 if (category == LC_MESSAGES)
852 {
853 if (locale != NULL)
854 {
855 lc_messages_name[sizeof (lc_messages_name) - 1] = '\0';
856 strncpy (lc_messages_name, locale, sizeof (lc_messages_name) - 1);
857 }
858 return lc_messages_name;
859 }
860 else
861 return setlocale_unixlike (category, locale);
862 }
863
864 # else
865 # define setlocale_single setlocale_unixlike
866 # endif
867
868 # if defined __APPLE__ && defined __MACH__
869
870 /* Mapping from language to main territory where that language is spoken. */
871 static char const locales_with_principal_territory[][6 + 1] =
872 {
873 /* Language Main territory */
874 "ace_ID", /* Achinese Indonesia */
875 "af_ZA", /* Afrikaans South Africa */
876 "ak_GH", /* Akan Ghana */
877 "am_ET", /* Amharic Ethiopia */
878 "an_ES", /* Aragonese Spain */
879 "ang_GB", /* Old English Britain */
880 "arn_CL", /* Mapudungun Chile */
881 "as_IN", /* Assamese India */
882 "ast_ES", /* Asturian Spain */
883 "av_RU", /* Avaric Russia */
884 "awa_IN", /* Awadhi India */
885 "az_AZ", /* Azerbaijani Azerbaijan */
886 "ban_ID", /* Balinese Indonesia */
887 "be_BY", /* Belarusian Belarus */
888 "bej_SD", /* Beja Sudan */
889 "bem_ZM", /* Bemba Zambia */
890 "bg_BG", /* Bulgarian Bulgaria */
891 "bho_IN", /* Bhojpuri India */
892 "bi_VU", /* Bislama Vanuatu */
893 "bik_PH", /* Bikol Philippines */
894 "bin_NG", /* Bini Nigeria */
895 "bm_ML", /* Bambara Mali */
896 "bn_IN", /* Bengali India */
897 "bo_CN", /* Tibetan China */
898 "br_FR", /* Breton France */
899 "bs_BA", /* Bosnian Bosnia */
900 "bug_ID", /* Buginese Indonesia */
901 "ca_ES", /* Catalan Spain */
902 "ce_RU", /* Chechen Russia */
903 "ceb_PH", /* Cebuano Philippines */
904 "co_FR", /* Corsican France */
905 "cr_CA", /* Cree Canada */
906 /* Don't put "crh_UZ" or "crh_UA" here. That would be asking for fruitless
907 political discussion. */
908 "cs_CZ", /* Czech Czech Republic */
909 "csb_PL", /* Kashubian Poland */
910 "cy_GB", /* Welsh Britain */
911 "da_DK", /* Danish Denmark */
912 "de_DE", /* German Germany */
913 "din_SD", /* Dinka Sudan */
914 "doi_IN", /* Dogri India */
915 "dsb_DE", /* Lower Sorbian Germany */
916 "dv_MV", /* Divehi Maldives */
917 "dz_BT", /* Dzongkha Bhutan */
918 "ee_GH", /* Éwé Ghana */
919 "el_GR", /* Greek Greece */
920 /* Don't put "en_GB" or "en_US" here. That would be asking for fruitless
921 political discussion. */
922 "es_ES", /* Spanish Spain */
923 "et_EE", /* Estonian Estonia */
924 "fa_IR", /* Persian Iran */
925 "fi_FI", /* Finnish Finland */
926 "fil_PH", /* Filipino Philippines */
927 "fj_FJ", /* Fijian Fiji */
928 "fo_FO", /* Faroese Faeroe Islands */
929 "fon_BJ", /* Fon Benin */
930 "fr_FR", /* French France */
931 "fur_IT", /* Friulian Italy */
932 "fy_NL", /* Western Frisian Netherlands */
933 "ga_IE", /* Irish Ireland */
934 "gd_GB", /* Scottish Gaelic Britain */
935 "gon_IN", /* Gondi India */
936 "gsw_CH", /* Swiss German Switzerland */
937 "gu_IN", /* Gujarati India */
938 "he_IL", /* Hebrew Israel */
939 "hi_IN", /* Hindi India */
940 "hil_PH", /* Hiligaynon Philippines */
941 "hr_HR", /* Croatian Croatia */
942 "hsb_DE", /* Upper Sorbian Germany */
943 "ht_HT", /* Haitian Haiti */
944 "hu_HU", /* Hungarian Hungary */
945 "hy_AM", /* Armenian Armenia */
946 "id_ID", /* Indonesian Indonesia */
947 "ig_NG", /* Igbo Nigeria */
948 "ii_CN", /* Sichuan Yi China */
949 "ilo_PH", /* Iloko Philippines */
950 "is_IS", /* Icelandic Iceland */
951 "it_IT", /* Italian Italy */
952 "ja_JP", /* Japanese Japan */
953 "jab_NG", /* Hyam Nigeria */
954 "jv_ID", /* Javanese Indonesia */
955 "ka_GE", /* Georgian Georgia */
956 "kab_DZ", /* Kabyle Algeria */
957 "kaj_NG", /* Jju Nigeria */
958 "kam_KE", /* Kamba Kenya */
959 "kmb_AO", /* Kimbundu Angola */
960 "kcg_NG", /* Tyap Nigeria */
961 "kdm_NG", /* Kagoma Nigeria */
962 "kg_CD", /* Kongo Democratic Republic of Congo */
963 "kk_KZ", /* Kazakh Kazakhstan */
964 "kl_GL", /* Kalaallisut Greenland */
965 "km_KH", /* Central Khmer Cambodia */
966 "kn_IN", /* Kannada India */
967 "ko_KR", /* Korean Korea (South) */
968 "kok_IN", /* Konkani India */
969 "kr_NG", /* Kanuri Nigeria */
970 "kru_IN", /* Kurukh India */
971 "ky_KG", /* Kyrgyz Kyrgyzstan */
972 "lg_UG", /* Ganda Uganda */
973 "li_BE", /* Limburgish Belgium */
974 "lo_LA", /* Laotian Laos */
975 "lt_LT", /* Lithuanian Lithuania */
976 "lu_CD", /* Luba-Katanga Democratic Republic of Congo */
977 "lua_CD", /* Luba-Lulua Democratic Republic of Congo */
978 "luo_KE", /* Luo Kenya */
979 "lv_LV", /* Latvian Latvia */
980 "mad_ID", /* Madurese Indonesia */
981 "mag_IN", /* Magahi India */
982 "mai_IN", /* Maithili India */
983 "mak_ID", /* Makasar Indonesia */
984 "man_ML", /* Mandingo Mali */
985 "men_SL", /* Mende Sierra Leone */
986 "mfe_MU", /* Mauritian Creole Mauritius */
987 "mg_MG", /* Malagasy Madagascar */
988 "mi_NZ", /* Maori New Zealand */
989 "min_ID", /* Minangkabau Indonesia */
990 "mk_MK", /* Macedonian North Macedonia */
991 "ml_IN", /* Malayalam India */
992 "mn_MN", /* Mongolian Mongolia */
993 "mni_IN", /* Manipuri India */
994 "mos_BF", /* Mossi Burkina Faso */
995 "mr_IN", /* Marathi India */
996 "ms_MY", /* Malay Malaysia */
997 "mt_MT", /* Maltese Malta */
998 "mwr_IN", /* Marwari India */
999 "my_MM", /* Burmese Myanmar */
1000 "na_NR", /* Nauru Nauru */
1001 "nah_MX", /* Nahuatl Mexico */
1002 "nap_IT", /* Neapolitan Italy */
1003 "nb_NO", /* Norwegian Bokmål Norway */
1004 "nds_DE", /* Low Saxon Germany */
1005 "ne_NP", /* Nepali Nepal */
1006 "nl_NL", /* Dutch Netherlands */
1007 "nn_NO", /* Norwegian Nynorsk Norway */
1008 "no_NO", /* Norwegian Norway */
1009 "nr_ZA", /* South Ndebele South Africa */
1010 "nso_ZA", /* Northern Sotho South Africa */
1011 "ny_MW", /* Chichewa Malawi */
1012 "nym_TZ", /* Nyamwezi Tanzania */
1013 "nyn_UG", /* Nyankole Uganda */
1014 "oc_FR", /* Occitan France */
1015 "oj_CA", /* Ojibwa Canada */
1016 "or_IN", /* Oriya India */
1017 "pa_IN", /* Punjabi India */
1018 "pag_PH", /* Pangasinan Philippines */
1019 "pam_PH", /* Pampanga Philippines */
1020 "pap_AN", /* Papiamento Netherlands Antilles - this line can be removed in 2018 */
1021 "pbb_CO", /* Páez Colombia */
1022 "pl_PL", /* Polish Poland */
1023 "ps_AF", /* Pashto Afghanistan */
1024 "pt_PT", /* Portuguese Portugal */
1025 "raj_IN", /* Rajasthani India */
1026 "rm_CH", /* Romansh Switzerland */
1027 "rn_BI", /* Kirundi Burundi */
1028 "ro_RO", /* Romanian Romania */
1029 "ru_RU", /* Russian Russia */
1030 "rw_RW", /* Kinyarwanda Rwanda */
1031 "sa_IN", /* Sanskrit India */
1032 "sah_RU", /* Yakut Russia */
1033 "sas_ID", /* Sasak Indonesia */
1034 "sat_IN", /* Santali India */
1035 "sc_IT", /* Sardinian Italy */
1036 "scn_IT", /* Sicilian Italy */
1037 "sg_CF", /* Sango Central African Republic */
1038 "shn_MM", /* Shan Myanmar */
1039 "si_LK", /* Sinhala Sri Lanka */
1040 "sid_ET", /* Sidamo Ethiopia */
1041 "sk_SK", /* Slovak Slovakia */
1042 "sl_SI", /* Slovenian Slovenia */
1043 "sm_WS", /* Samoan Samoa */
1044 "smn_FI", /* Inari Sami Finland */
1045 "sms_FI", /* Skolt Sami Finland */
1046 "so_SO", /* Somali Somalia */
1047 "sq_AL", /* Albanian Albania */
1048 "sr_RS", /* Serbian Serbia */
1049 "srr_SN", /* Serer Senegal */
1050 "suk_TZ", /* Sukuma Tanzania */
1051 "sus_GN", /* Susu Guinea */
1052 "sv_SE", /* Swedish Sweden */
1053 "te_IN", /* Telugu India */
1054 "tem_SL", /* Timne Sierra Leone */
1055 "tet_ID", /* Tetum Indonesia */
1056 "tg_TJ", /* Tajik Tajikistan */
1057 "th_TH", /* Thai Thailand */
1058 "ti_ER", /* Tigrinya Eritrea */
1059 "tiv_NG", /* Tiv Nigeria */
1060 "tk_TM", /* Turkmen Turkmenistan */
1061 "tl_PH", /* Tagalog Philippines */
1062 "to_TO", /* Tonga Tonga */
1063 "tpi_PG", /* Tok Pisin Papua New Guinea */
1064 "tr_TR", /* Turkish Turkey */
1065 "tum_MW", /* Tumbuka Malawi */
1066 "ug_CN", /* Uighur China */
1067 "uk_UA", /* Ukrainian Ukraine */
1068 "umb_AO", /* Umbundu Angola */
1069 "ur_PK", /* Urdu Pakistan */
1070 "uz_UZ", /* Uzbek Uzbekistan */
1071 "ve_ZA", /* Venda South Africa */
1072 "vi_VN", /* Vietnamese Vietnam */
1073 "wa_BE", /* Walloon Belgium */
1074 "wal_ET", /* Walamo Ethiopia */
1075 "war_PH", /* Waray Philippines */
1076 "wen_DE", /* Sorbian Germany */
1077 "yao_MW", /* Yao Malawi */
1078 "zap_MX" /* Zapotec Mexico */
1079 };
1080
1081 /* Compare just the language part of two locale names. */
1082 static int
langcmp(const char * locale1,const char * locale2)1083 langcmp (const char *locale1, const char *locale2)
1084 {
1085 size_t locale1_len;
1086 size_t locale2_len;
1087 int cmp;
1088
1089 {
1090 const char *locale1_end = strchr (locale1, '_');
1091 if (locale1_end != NULL)
1092 locale1_len = locale1_end - locale1;
1093 else
1094 locale1_len = strlen (locale1);
1095 }
1096 {
1097 const char *locale2_end = strchr (locale2, '_');
1098 if (locale2_end != NULL)
1099 locale2_len = locale2_end - locale2;
1100 else
1101 locale2_len = strlen (locale2);
1102 }
1103
1104 if (locale1_len < locale2_len)
1105 {
1106 cmp = memcmp (locale1, locale2, locale1_len);
1107 if (cmp == 0)
1108 cmp = -1;
1109 }
1110 else
1111 {
1112 cmp = memcmp (locale1, locale2, locale2_len);
1113 if (locale1_len > locale2_len && cmp == 0)
1114 cmp = 1;
1115 }
1116
1117 return cmp;
1118 }
1119
1120 /* Given a locale name, return the main locale with the same language,
1121 or NULL if not found.
1122 For example: "fr_DE" -> "fr_FR". */
1123 static const char *
get_main_locale_with_same_language(const char * locale)1124 get_main_locale_with_same_language (const char *locale)
1125 {
1126 # define table locales_with_principal_territory
1127 /* The table is sorted. Perform a binary search. */
1128 size_t hi = sizeof (table) / sizeof (table[0]);
1129 size_t lo = 0;
1130 while (lo < hi)
1131 {
1132 /* Invariant:
1133 for i < lo, langcmp (table[i], locale) < 0,
1134 for i >= hi, langcmp (table[i], locale) > 0. */
1135 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1136 int cmp = langcmp (table[mid], locale);
1137 if (cmp < 0)
1138 lo = mid + 1;
1139 else if (cmp > 0)
1140 hi = mid;
1141 else
1142 {
1143 /* Found an i with
1144 langcmp (language_table[i], locale) == 0.
1145 Verify that it is the only such i. */
1146 if (mid > lo && langcmp (table[mid - 1], locale) >= 0)
1147 abort ();
1148 if (mid + 1 < hi && langcmp (table[mid + 1], locale) <= 0)
1149 abort ();
1150 return table[mid];
1151 }
1152 }
1153 # undef table
1154 return NULL;
1155 }
1156
1157 /* Mapping from territory to main language that is spoken in that territory. */
1158 static char const locales_with_principal_language[][6 + 1] =
1159 {
1160 /* This is based on the set of existing locales in glibc, with duplicates
1161 removed, and on the Wikipedia pages named "Languages of <territory>".
1162 If in doubt, use the locale that exists in macOS. For example, the only
1163 "*_IN" locale in macOS 10.13 is "hi_IN", so use that. */
1164 /* A useful shell function for producing a line of this table is:
1165 func_line ()
1166 {
1167 # Usage: func_line ll_CC
1168 ll=`echo "$1" | sed -e 's|_.*||'`
1169 cc=`echo "$1" | sed -e 's|^.*_||'`
1170 llx=`sed -n -e "s|^${ll} ||p" < gettext-tools/doc/ISO_639`
1171 ccx=`expand gettext-tools/doc/ISO_3166 | sed -n -e "s|^${cc} *||p"`
1172 echo " \"$1\", /$X* ${llx} ${ccx} *$X/"
1173 }
1174 */
1175 /* Main language Territory */
1176 "ca_AD", /* Catalan Andorra */
1177 "ar_AE", /* Arabic United Arab Emirates */
1178 "ps_AF", /* Pashto Afghanistan */
1179 "en_AG", /* English Antigua and Barbuda */
1180 "sq_AL", /* Albanian Albania */
1181 "hy_AM", /* Armenian Armenia */
1182 "pap_AN", /* Papiamento Netherlands Antilles - this line can be removed in 2018 */
1183 "pt_AO", /* Portuguese Angola */
1184 "es_AR", /* Spanish Argentina */
1185 "de_AT", /* German Austria */
1186 "en_AU", /* English Australia */
1187 /* Aruba has two official languages: "nl_AW", "pap_AW". */
1188 "az_AZ", /* Azerbaijani Azerbaijan */
1189 "bs_BA", /* Bosnian Bosnia */
1190 "bn_BD", /* Bengali Bangladesh */
1191 "nl_BE", /* Dutch Belgium */
1192 "fr_BF", /* French Burkina Faso */
1193 "bg_BG", /* Bulgarian Bulgaria */
1194 "ar_BH", /* Arabic Bahrain */
1195 "rn_BI", /* Kirundi Burundi */
1196 "fr_BJ", /* French Benin */
1197 "es_BO", /* Spanish Bolivia */
1198 "pt_BR", /* Portuguese Brazil */
1199 "dz_BT", /* Dzongkha Bhutan */
1200 "en_BW", /* English Botswana */
1201 "be_BY", /* Belarusian Belarus */
1202 "en_CA", /* English Canada */
1203 "fr_CD", /* French Democratic Republic of Congo */
1204 "sg_CF", /* Sango Central African Republic */
1205 "de_CH", /* German Switzerland */
1206 "es_CL", /* Spanish Chile */
1207 "zh_CN", /* Chinese China */
1208 "es_CO", /* Spanish Colombia */
1209 "es_CR", /* Spanish Costa Rica */
1210 "es_CU", /* Spanish Cuba */
1211 /* Curaçao has three official languages: "nl_CW", "pap_CW", "en_CW". */
1212 "el_CY", /* Greek Cyprus */
1213 "cs_CZ", /* Czech Czech Republic */
1214 "de_DE", /* German Germany */
1215 /* Djibouti has two official languages: "ar_DJ" and "fr_DJ". */
1216 "da_DK", /* Danish Denmark */
1217 "es_DO", /* Spanish Dominican Republic */
1218 "ar_DZ", /* Arabic Algeria */
1219 "es_EC", /* Spanish Ecuador */
1220 "et_EE", /* Estonian Estonia */
1221 "ar_EG", /* Arabic Egypt */
1222 "ti_ER", /* Tigrinya Eritrea */
1223 "es_ES", /* Spanish Spain */
1224 "am_ET", /* Amharic Ethiopia */
1225 "fi_FI", /* Finnish Finland */
1226 /* Fiji has three official languages: "en_FJ", "fj_FJ", "hif_FJ". */
1227 "fo_FO", /* Faroese Faeroe Islands */
1228 "fr_FR", /* French France */
1229 "en_GB", /* English Britain */
1230 "ka_GE", /* Georgian Georgia */
1231 "en_GH", /* English Ghana */
1232 "kl_GL", /* Kalaallisut Greenland */
1233 "fr_GN", /* French Guinea */
1234 "el_GR", /* Greek Greece */
1235 "es_GT", /* Spanish Guatemala */
1236 "zh_HK", /* Chinese Hong Kong */
1237 "es_HN", /* Spanish Honduras */
1238 "hr_HR", /* Croatian Croatia */
1239 "ht_HT", /* Haitian Haiti */
1240 "hu_HU", /* Hungarian Hungary */
1241 "id_ID", /* Indonesian Indonesia */
1242 "en_IE", /* English Ireland */
1243 "he_IL", /* Hebrew Israel */
1244 "hi_IN", /* Hindi India */
1245 "ar_IQ", /* Arabic Iraq */
1246 "fa_IR", /* Persian Iran */
1247 "is_IS", /* Icelandic Iceland */
1248 "it_IT", /* Italian Italy */
1249 "ar_JO", /* Arabic Jordan */
1250 "ja_JP", /* Japanese Japan */
1251 "sw_KE", /* Swahili Kenya */
1252 "ky_KG", /* Kyrgyz Kyrgyzstan */
1253 "km_KH", /* Central Khmer Cambodia */
1254 "ko_KR", /* Korean Korea (South) */
1255 "ar_KW", /* Arabic Kuwait */
1256 "kk_KZ", /* Kazakh Kazakhstan */
1257 "lo_LA", /* Laotian Laos */
1258 "ar_LB", /* Arabic Lebanon */
1259 "de_LI", /* German Liechtenstein */
1260 "si_LK", /* Sinhala Sri Lanka */
1261 "lt_LT", /* Lithuanian Lithuania */
1262 /* Luxembourg has three official languages: "lb_LU", "fr_LU", "de_LU". */
1263 "lv_LV", /* Latvian Latvia */
1264 "ar_LY", /* Arabic Libya */
1265 "ar_MA", /* Arabic Morocco */
1266 "sr_ME", /* Serbian Montenegro */
1267 "mg_MG", /* Malagasy Madagascar */
1268 "mk_MK", /* Macedonian North Macedonia */
1269 "fr_ML", /* French Mali */
1270 "my_MM", /* Burmese Myanmar */
1271 "mn_MN", /* Mongolian Mongolia */
1272 "mt_MT", /* Maltese Malta */
1273 "mfe_MU", /* Mauritian Creole Mauritius */
1274 "dv_MV", /* Divehi Maldives */
1275 "ny_MW", /* Chichewa Malawi */
1276 "es_MX", /* Spanish Mexico */
1277 "ms_MY", /* Malay Malaysia */
1278 "en_NG", /* English Nigeria */
1279 "es_NI", /* Spanish Nicaragua */
1280 "nl_NL", /* Dutch Netherlands */
1281 "no_NO", /* Norwegian Norway */
1282 "ne_NP", /* Nepali Nepal */
1283 "na_NR", /* Nauru Nauru */
1284 "niu_NU", /* Niuean Niue */
1285 "en_NZ", /* English New Zealand */
1286 "ar_OM", /* Arabic Oman */
1287 "es_PA", /* Spanish Panama */
1288 "es_PE", /* Spanish Peru */
1289 "tpi_PG", /* Tok Pisin Papua New Guinea */
1290 "fil_PH", /* Filipino Philippines */
1291 "pa_PK", /* Punjabi Pakistan */
1292 "pl_PL", /* Polish Poland */
1293 "es_PR", /* Spanish Puerto Rico */
1294 "pt_PT", /* Portuguese Portugal */
1295 "es_PY", /* Spanish Paraguay */
1296 "ar_QA", /* Arabic Qatar */
1297 "ro_RO", /* Romanian Romania */
1298 "sr_RS", /* Serbian Serbia */
1299 "ru_RU", /* Russian Russia */
1300 "rw_RW", /* Kinyarwanda Rwanda */
1301 "ar_SA", /* Arabic Saudi Arabia */
1302 "en_SC", /* English Seychelles */
1303 "ar_SD", /* Arabic Sudan */
1304 "sv_SE", /* Swedish Sweden */
1305 "en_SG", /* English Singapore */
1306 "sl_SI", /* Slovenian Slovenia */
1307 "sk_SK", /* Slovak Slovakia */
1308 "en_SL", /* English Sierra Leone */
1309 "fr_SN", /* French Senegal */
1310 "so_SO", /* Somali Somalia */
1311 "ar_SS", /* Arabic South Sudan */
1312 "es_SV", /* Spanish El Salvador */
1313 "ar_SY", /* Arabic Syria */
1314 "th_TH", /* Thai Thailand */
1315 "tg_TJ", /* Tajik Tajikistan */
1316 "tk_TM", /* Turkmen Turkmenistan */
1317 "ar_TN", /* Arabic Tunisia */
1318 "to_TO", /* Tonga Tonga */
1319 "tr_TR", /* Turkish Turkey */
1320 "zh_TW", /* Chinese Taiwan */
1321 "sw_TZ", /* Swahili Tanzania */
1322 "uk_UA", /* Ukrainian Ukraine */
1323 "lg_UG", /* Ganda Uganda */
1324 "en_US", /* English United States of America */
1325 "es_UY", /* Spanish Uruguay */
1326 "uz_UZ", /* Uzbek Uzbekistan */
1327 "es_VE", /* Spanish Venezuela */
1328 "vi_VN", /* Vietnamese Vietnam */
1329 "bi_VU", /* Bislama Vanuatu */
1330 "sm_WS", /* Samoan Samoa */
1331 "ar_YE", /* Arabic Yemen */
1332 "en_ZA", /* English South Africa */
1333 "en_ZM", /* English Zambia */
1334 "en_ZW" /* English Zimbabwe */
1335 };
1336
1337 /* Compare just the territory part of two locale names. */
1338 static int
terrcmp(const char * locale1,const char * locale2)1339 terrcmp (const char *locale1, const char *locale2)
1340 {
1341 const char *territory1 = strrchr (locale1, '_') + 1;
1342 const char *territory2 = strrchr (locale2, '_') + 1;
1343
1344 return strcmp (territory1, territory2);
1345 }
1346
1347 /* Given a locale name, return the locale corresponding to the main language
1348 with the same territory, or NULL if not found.
1349 For example: "fr_DE" -> "de_DE". */
1350 static const char *
get_main_locale_with_same_territory(const char * locale)1351 get_main_locale_with_same_territory (const char *locale)
1352 {
1353 if (strrchr (locale, '_') != NULL)
1354 {
1355 # define table locales_with_principal_language
1356 /* The table is sorted. Perform a binary search. */
1357 size_t hi = sizeof (table) / sizeof (table[0]);
1358 size_t lo = 0;
1359 while (lo < hi)
1360 {
1361 /* Invariant:
1362 for i < lo, terrcmp (table[i], locale) < 0,
1363 for i >= hi, terrcmp (table[i], locale) > 0. */
1364 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1365 int cmp = terrcmp (table[mid], locale);
1366 if (cmp < 0)
1367 lo = mid + 1;
1368 else if (cmp > 0)
1369 hi = mid;
1370 else
1371 {
1372 /* Found an i with
1373 terrcmp (language_table[i], locale) == 0.
1374 Verify that it is the only such i. */
1375 if (mid > lo && terrcmp (table[mid - 1], locale) >= 0)
1376 abort ();
1377 if (mid + 1 < hi && terrcmp (table[mid + 1], locale) <= 0)
1378 abort ();
1379 return table[mid];
1380 }
1381 }
1382 # undef table
1383 }
1384 return NULL;
1385 }
1386
1387 # endif
1388
1389 DLL_EXPORTED
1390 char *
libintl_setlocale(int category,const char * locale)1391 libintl_setlocale (int category, const char *locale)
1392 {
1393 if (locale != NULL && locale[0] == '\0')
1394 {
1395 /* A request to the set the current locale to the default locale. */
1396 if (category == LC_ALL)
1397 {
1398 /* Set LC_CTYPE first. Then the other categories. */
1399 static int const categories[] =
1400 {
1401 LC_CTYPE,
1402 LC_NUMERIC,
1403 LC_TIME,
1404 LC_COLLATE,
1405 LC_MONETARY,
1406 LC_MESSAGES
1407 };
1408 char *saved_locale;
1409 const char *base_name;
1410 unsigned int i;
1411
1412 /* Back up the old locale, in case one of the steps fails. */
1413 saved_locale = setlocale (LC_ALL, NULL);
1414 if (saved_locale == NULL)
1415 return NULL;
1416 saved_locale = strdup (saved_locale);
1417 if (saved_locale == NULL)
1418 return NULL;
1419
1420 /* Set LC_CTYPE category. Set all other categories (except possibly
1421 LC_MESSAGES) to the same value in the same call; this is likely to
1422 save calls. */
1423 base_name =
1424 gl_locale_name_environ (LC_CTYPE, category_to_name (LC_CTYPE));
1425 if (base_name == NULL)
1426 base_name = gl_locale_name_default ();
1427
1428 if (setlocale_unixlike (LC_ALL, base_name) != NULL)
1429 {
1430 /* LC_CTYPE category already set. */
1431 i = 1;
1432 }
1433 else
1434 {
1435 /* On Mac OS X, "UTF-8" is a valid locale name for LC_CTYPE but
1436 not for LC_ALL. Therefore this call may fail. So, try
1437 another base_name. */
1438 base_name = "C";
1439 if (setlocale_unixlike (LC_ALL, base_name) == NULL)
1440 goto fail;
1441 i = 0;
1442 }
1443 # if defined _WIN32 && ! defined __CYGWIN__
1444 /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1445 LC_CTYPE category to an invalid value ("C") when it does not
1446 support the specified encoding. Report a failure instead. */
1447 if (strchr (base_name, '.') != NULL
1448 && strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1449 goto fail;
1450 # endif
1451
1452 for (; i < sizeof (categories) / sizeof (categories[0]); i++)
1453 {
1454 int cat = categories[i];
1455 const char *name;
1456
1457 name = gl_locale_name_environ (cat, category_to_name (cat));
1458 if (name == NULL)
1459 name = gl_locale_name_default ();
1460
1461 /* If name is the same as base_name, it has already been set
1462 through the setlocale call before the loop. */
1463 if (strcmp (name, base_name) != 0
1464 # if LC_MESSAGES == 1729
1465 || cat == LC_MESSAGES
1466 # endif
1467 )
1468 if (setlocale_single (cat, name) == NULL)
1469 # if defined __APPLE__ && defined __MACH__
1470 {
1471 /* On Mac OS X 10.13, some locales can be set through
1472 System Preferences > Language & Region, that are not
1473 supported by libc. The system's setlocale() falls
1474 back to "C" for these locale categories. We can do
1475 better, by trying an existing locale with the same
1476 language or an existing locale with the same territory.
1477 If we can't, print a warning, to limit user
1478 expectations. */
1479 int warn = 0;
1480
1481 if (cat == LC_CTYPE)
1482 warn = (setlocale_single (cat, "UTF-8") == NULL);
1483 else if (cat == LC_MESSAGES)
1484 {
1485 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1486 /* Take the primary language preference. */
1487 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1488 CFArrayRef prefArray = CFLocaleCopyPreferredLanguages ();
1489 # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1490 CFTypeRef preferences =
1491 CFPreferencesCopyAppValue (CFSTR ("AppleLanguages"),
1492 kCFPreferencesCurrentApplication);
1493 if (preferences != NULL
1494 && CFGetTypeID (preferences) == CFArrayGetTypeID ())
1495 {
1496 CFArrayRef prefArray = (CFArrayRef)preferences;
1497 # endif
1498 int n = CFArrayGetCount (prefArray);
1499 if (n > 0)
1500 {
1501 char buf[256];
1502 CFTypeRef element = CFArrayGetValueAtIndex (prefArray, 0);
1503 if (element != NULL
1504 && CFGetTypeID (element) == CFStringGetTypeID ()
1505 && CFStringGetCString ((CFStringRef)element,
1506 buf, sizeof (buf),
1507 kCFStringEncodingASCII))
1508 {
1509 /* Remove the country.
1510 E.g. "zh-Hans-DE" -> "zh-Hans". */
1511 char *last_minus = strrchr (buf, '-');
1512 if (last_minus != NULL)
1513 *last_minus = '\0';
1514
1515 /* Convert to Unix locale name.
1516 E.g. "zh-Hans" -> "zh_CN". */
1517 gl_locale_name_canonicalize (buf);
1518
1519 /* Try setlocale with this value. */
1520 if (setlocale_single (cat, buf) == NULL)
1521 {
1522 const char *last_try =
1523 get_main_locale_with_same_language (buf);
1524
1525 if (last_try == NULL
1526 || setlocale_single (cat, last_try) == NULL)
1527 warn = 1;
1528 }
1529 }
1530 }
1531 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1532 CFRelease (prefArray);
1533 # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1534 }
1535 # endif
1536 # else
1537 const char *last_try =
1538 get_main_locale_with_same_language (name);
1539
1540 if (last_try == NULL
1541 || setlocale_single (cat, last_try) == NULL)
1542 warn = 1;
1543 # endif
1544 }
1545 else
1546 {
1547 /* For LC_NUMERIC, the application should use the locale
1548 properties kCFLocaleDecimalSeparator,
1549 kCFLocaleGroupingSeparator.
1550 For LC_TIME, the application should use the locale
1551 property kCFLocaleCalendarIdentifier.
1552 For LC_COLLATE, the application should use the locale
1553 properties kCFLocaleCollationIdentifier,
1554 kCFLocaleCollatorIdentifier.
1555 For LC_MONETARY, the applicationshould use the locale
1556 properties kCFLocaleCurrencySymbol,
1557 kCFLocaleCurrencyCode.
1558 But since most applications don't have macOS specific
1559 code like this, try an existing locale with the same
1560 territory. */
1561 const char *last_try =
1562 get_main_locale_with_same_territory (name);
1563
1564 if (last_try == NULL
1565 || setlocale_single (cat, last_try) == NULL)
1566 warn = 1;
1567 }
1568
1569 if (warn)
1570 {
1571 /* Warn only if the environment variable
1572 SETLOCALE_VERBOSE is set. Otherwise these warnings
1573 are just annoyances, since normal users won't invoke
1574 'localedef'. */
1575 const char *verbose = getenv ("SETLOCALE_VERBOSE");
1576 if (verbose != NULL && verbose[0] != '\0')
1577 fprintf (stderr,
1578 "Warning: Failed to set locale category %s to %s.\n",
1579 category_to_name (cat), name);
1580 }
1581 }
1582 # else
1583 goto fail;
1584 # endif
1585 }
1586
1587 /* All steps were successful. */
1588 ++_nl_msg_cat_cntr;
1589 free (saved_locale);
1590 return setlocale (LC_ALL, NULL);
1591
1592 fail:
1593 if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1594 setlocale (LC_ALL, saved_locale);
1595 free (saved_locale);
1596 return NULL;
1597 }
1598 else
1599 {
1600 char *result;
1601 const char *name =
1602 gl_locale_name_environ (category, category_to_name (category));
1603 if (name == NULL)
1604 name = gl_locale_name_default ();
1605
1606 result = setlocale_single (category, name);
1607 if (result != NULL)
1608 ++_nl_msg_cat_cntr;
1609 return result;
1610 }
1611 }
1612 else
1613 {
1614 # if defined _WIN32 && ! defined __CYGWIN__
1615 if (category == LC_ALL && locale != NULL && strchr (locale, '.') != NULL)
1616 {
1617 char *saved_locale;
1618
1619 /* Back up the old locale. */
1620 saved_locale = setlocale (LC_ALL, NULL);
1621 if (saved_locale == NULL)
1622 return NULL;
1623 saved_locale = strdup (saved_locale);
1624 if (saved_locale == NULL)
1625 return NULL;
1626
1627 if (setlocale_unixlike (LC_ALL, locale) == NULL)
1628 {
1629 free (saved_locale);
1630 return NULL;
1631 }
1632
1633 /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1634 LC_CTYPE category to an invalid value ("C") when it does not
1635 support the specified encoding. Report a failure instead. */
1636 if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1637 {
1638 if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1639 setlocale (LC_ALL, saved_locale);
1640 free (saved_locale);
1641 return NULL;
1642 }
1643
1644 /* It was really successful. */
1645 ++_nl_msg_cat_cntr;
1646 free (saved_locale);
1647 return setlocale (LC_ALL, NULL);
1648 }
1649 else
1650 # endif
1651 {
1652 char *result = setlocale_single (category, locale);
1653 if (result != NULL)
1654 ++_nl_msg_cat_cntr;
1655 return result;
1656 }
1657 }
1658 }
1659
1660 # if HAVE_NEWLOCALE
1661
1662 DLL_EXPORTED
1663 locale_t
1664 libintl_newlocale (int category_mask, const char *locale, locale_t base)
1665 {
1666 if (category_mask != 0 && locale != NULL && locale[0] == '\0')
1667 {
1668 /* A request to construct a locale_t object that refers to the default
1669 locale. */
1670
1671 /* Set LC_CTYPE first. Then the other categories. */
1672 static struct { int cat; int mask; } const categories[] =
1673 {
1674 { LC_CTYPE, LC_CTYPE_MASK },
1675 { LC_NUMERIC, LC_NUMERIC_MASK },
1676 { LC_TIME, LC_TIME_MASK },
1677 { LC_COLLATE, LC_COLLATE_MASK },
1678 { LC_MONETARY, LC_MONETARY_MASK },
1679 { LC_MESSAGES, LC_MESSAGES_MASK }
1680 };
1681
1682 locale_t orig_base = base;
1683
1684 if ((LC_ALL_MASK & ~category_mask) == 0)
1685 {
1686 const char *base_name;
1687 unsigned int i;
1688
1689 /* Set LC_CTYPE category. Set all other categories (except possibly
1690 LC_MESSAGES) to the same value in the same call; this is likely to
1691 save calls. */
1692 base_name =
1693 gl_locale_name_environ (LC_CTYPE, category_to_name (LC_CTYPE));
1694 if (base_name == NULL)
1695 base_name = gl_locale_name_default ();
1696
1697 base = newlocale (LC_ALL_MASK, base_name, base);
1698 if (base == NULL)
1699 return NULL;
1700
1701 for (i = 1; i < sizeof (categories) / sizeof (categories[0]); i++)
1702 {
1703 int category = categories[i].cat;
1704 int category_mask = categories[i].mask;
1705 const char *name;
1706
1707 name =
1708 gl_locale_name_environ (category, category_to_name (category));
1709 if (name == NULL)
1710 name = gl_locale_name_default ();
1711
1712 /* If name is the same as base_name, it has already been set
1713 through the setlocale call before the loop. */
1714 if (strcmp (name, base_name) != 0)
1715 {
1716 locale_t copy = newlocale (category_mask, name, base);
1717 if (copy == NULL)
1718 goto fail;
1719 /* No need to call freelocale (base) if copy != base; the
1720 newlocale function already takes care of doing it. */
1721 base = copy;
1722 }
1723 }
1724 }
1725 else
1726 {
1727 unsigned int i;
1728
1729 for (i = 0; i < sizeof (categories) / sizeof (categories[0]); i++)
1730 {
1731 int cat_mask = categories[i].mask;
1732
1733 if ((category_mask & cat_mask) != 0)
1734 {
1735 int cat = categories[i].cat;
1736 const char *name;
1737 locale_t copy;
1738
1739 name = gl_locale_name_environ (cat, category_to_name (cat));
1740 if (name == NULL)
1741 name = gl_locale_name_default ();
1742
1743 copy = newlocale (cat_mask, name, base);
1744 if (copy == NULL)
1745 goto fail;
1746 /* No need to call freelocale (base) if copy != base; the
1747 newlocale function already takes care of doing it. */
1748 base = copy;
1749 }
1750 }
1751 }
1752
1753 /* All steps were successful. */
1754 return base;
1755
1756 fail:
1757 if (base != NULL && orig_base == NULL)
1758 {
1759 int saved_errno = errno;
1760 freelocale (base);
1761 errno = saved_errno;
1762 }
1763 return NULL;
1764 }
1765 else
1766 return newlocale (category_mask, locale, base);
1767 }
1768
1769 # endif
1770
1771 #endif
1772