• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*****************************************************************************
9 *
10 * File CLOCTST.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *     Madhu Katragadda            Ported for C API
15 ******************************************************************************
16 */
17 #include "cloctst.h"
18 #include <stdbool.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "cintltst.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "uparse.h"
26 #include "uresimp.h"
27 #include "uassert.h"
28 
29 #include "unicode/putil.h"
30 #include "unicode/ubrk.h"
31 #include "unicode/uchar.h"
32 #include "unicode/ucol.h"
33 #include "unicode/udat.h"
34 #include "unicode/uloc.h"
35 #include "unicode/umsg.h"
36 #include "unicode/ures.h"
37 #include "unicode/uset.h"
38 #include "unicode/ustring.h"
39 #include "unicode/utypes.h"
40 #include "unicode/ulocdata.h"
41 #include "unicode/uldnames.h"
42 #include "unicode/parseerr.h" /* may not be included with some uconfig switches */
43 #include "udbgutil.h"
44 
45 static void TestNullDefault(void);
46 static void TestNonexistentLanguageExemplars(void);
47 static void TestLocDataErrorCodeChaining(void);
48 static void TestLocDataWithRgTag(void);
49 static void TestLanguageExemplarsFallbacks(void);
50 static void TestDisplayNameBrackets(void);
51 static void TestIllegalArgumentWhenNoDataWithNoSubstitute(void);
52 static void Test21157CorrectTerminating(void);
53 
54 static void TestUnicodeDefines(void);
55 
56 static void TestIsRightToLeft(void);
57 static void TestBadLocaleIDs(void);
58 static void TestBug20370(void);
59 static void TestBug20321UnicodeLocaleKey(void);
60 
61 static void TestUsingDefaultWarning(void);
62 static void TestExcessivelyLongIDs(void);
63 #if !UCONFIG_NO_FORMATTING
64 static void TestUldnNameVariants(void);
65 #endif
66 
67 void PrintDataTable(void);
68 
69 /*---------------------------------------------------
70   table of valid data
71  --------------------------------------------------- */
72 #define LOCALE_SIZE 9
73 #define LOCALE_INFO_SIZE 28
74 
75 static const char* const rawData2[LOCALE_INFO_SIZE][LOCALE_SIZE] = {
76     /* language code */
77     {   "en",   "fr",   "ca",   "el",   "no",   "zh",   "de",   "es",  "ja"    },
78     /* script code */
79     {   "",     "",     "",     "",     "",     "", "", "", ""  },
80     /* country code */
81     {   "US",   "FR",   "ES",   "GR",   "NO",   "CN", "DE", "", "JP"    },
82     /* variant code */
83     {   "",     "",     "",     "",     "NY",   "", "", "", ""      },
84     /* full name */
85     {   "en_US",    "fr_FR",    "ca_ES",
86         "el_GR",    "no_NO_NY", "zh_Hans_CN",
87         "de_DE@collation=phonebook", "es@collation=traditional",  "ja_JP@calendar=japanese" },
88     /* ISO-3 language */
89     {   "eng",  "fra",  "cat",  "ell",  "nor",  "zho", "deu", "spa", "jpn"   },
90     /* ISO-3 country */
91     {   "USA",  "FRA",  "ESP",  "GRC",  "NOR",  "CHN", "DEU", "", "JPN"   },
92     /* LCID */
93     {   "409", "40c", "403", "408", "814",  "804", "10407", "40a", "411"     },
94 
95     /* display language (English) */
96     {   "English",  "French",   "Catalan", "Greek",    "Norwegian", "Chinese", "German", "Spanish", "Japanese"    },
97     /* display script code (English) */
98     {   "",     "",     "",     "",     "",     "Simplified Han", "", "", ""       },
99     /* display country (English) */
100     {   "United States",    "France",   "Spain",  "Greece",   "Norway", "China", "Germany", "", "Japan"       },
101     /* display variant (English) */
102     {   "",     "",     "",     "",     "NY",  "", "", "", ""       },
103     /* display name (English) */
104     {   "English (United States)", "French (France)", "Catalan (Spain)",
105         "Greek (Greece)", "Norwegian (Norway, NY)", "Chinese (Simplified, China)",
106         "German (Germany, Sort Order=Phonebook Sort Order)", "Spanish (Sort Order=Traditional Sort Order)", "Japanese (Japan, Calendar=Japanese Calendar)" },
107 
108     /* display language (French) */
109     {   "anglais",  "fran\\u00E7ais",   "catalan", "grec",    "norv\\u00E9gien",    "chinois", "allemand", "espagnol", "japonais"     },
110     /* display script code (French) */
111     {   "",     "",     "",     "",     "",     "sinogrammes simplifi\\u00e9s", "", "", ""         },
112     /* display country (French) */
113     {   "\\u00C9tats-Unis",    "France",   "Espagne",  "Gr\\u00E8ce",   "Norv\\u00E8ge",    "Chine", "Allemagne", "", "Japon"       },
114     /* display variant (French) */
115     {   "",     "",     "",     "",     "NY",   "", "", "", ""       },
116     /* display name (French) */
117     {   "anglais (\\u00C9tats-Unis)", "fran\\u00E7ais (France)", "catalan (Espagne)",
118         "grec (Gr\\u00E8ce)", "norv\\u00E9gien (Norv\\u00E8ge, NY)",  "chinois (simplifi\\u00e9, Chine)",
119         "allemand (Allemagne, ordre de tri=ordre de l\\u2019annuaire)", "espagnol (ordre de tri=ordre traditionnel)", "japonais (Japon, calendrier=calendrier japonais)" },
120 
121     /* display language (Catalan) */
122     {   "angl\\u00E8s", "franc\\u00E8s", "catal\\u00E0", "grec",  "noruec", "xin\\u00E8s", "alemany", "espanyol", "japon\\u00E8s"    },
123     /* display script code (Catalan) */
124     {   "",     "",     "",     "",     "",     "han simplificat", "", "", ""         },
125     /* display country (Catalan) */
126     {   "Estats Units", "Fran\\u00E7a", "Espanya",  "Gr\\u00E8cia", "Noruega",  "Xina", "Alemanya", "", "Jap\\u00F3"    },
127     /* display variant (Catalan) */
128     {   "", "", "",                    "", "NY",    "", "", "", ""    },
129     /* display name (Catalan) */
130     {   "angl\\u00E8s (Estats Units)", "franc\\u00E8s (Fran\\u00E7a)", "catal\\u00E0 (Espanya)",
131     "grec (Gr\\u00E8cia)", "noruec (Noruega, NY)", "xin\\u00E8s (simplificat, Xina)",
132     "alemany (Alemanya, ordre=ordre de la guia telef\\u00F2nica)", "espanyol (ordre=ordre tradicional)", "japon\\u00E8s (Jap\\u00F3, calendari=calendari japon\\u00e8s)" },
133 
134     /* display language (Greek) */
135     {
136         "\\u0391\\u03b3\\u03b3\\u03bb\\u03b9\\u03ba\\u03ac",
137         "\\u0393\\u03b1\\u03bb\\u03bb\\u03b9\\u03ba\\u03ac",
138         "\\u039a\\u03b1\\u03c4\\u03b1\\u03bb\\u03b1\\u03bd\\u03b9\\u03ba\\u03ac",
139         "\\u0395\\u03bb\\u03bb\\u03b7\\u03bd\\u03b9\\u03ba\\u03ac",
140         "\\u039d\\u03bf\\u03c1\\u03b2\\u03b7\\u03b3\\u03b9\\u03ba\\u03ac",
141         "\\u039A\\u03B9\\u03BD\\u03B5\\u03B6\\u03B9\\u03BA\\u03AC",
142         "\\u0393\\u03B5\\u03C1\\u03BC\\u03B1\\u03BD\\u03B9\\u03BA\\u03AC",
143         "\\u0399\\u03C3\\u03C0\\u03B1\\u03BD\\u03B9\\u03BA\\u03AC",
144         "\\u0399\\u03B1\\u03C0\\u03C9\\u03BD\\u03B9\\u03BA\\u03AC"
145     },
146     /* display script code (Greek) */
147 
148     {   "",     "",     "",     "",     "", "\\u0391\\u03c0\\u03bb\\u03bf\\u03c0\\u03bf\\u03b9\\u03b7\\u03bc\\u03ad\\u03bd\\u03bf \\u03a7\\u03b1\\u03bd", "", "", "" },
149     /* display country (Greek) */
150     {
151         "\\u0397\\u03BD\\u03C9\\u03BC\\u03AD\\u03BD\\u03B5\\u03C2 \\u03A0\\u03BF\\u03BB\\u03B9\\u03C4\\u03B5\\u03AF\\u03B5\\u03C2",
152         "\\u0393\\u03b1\\u03bb\\u03bb\\u03af\\u03b1",
153         "\\u0399\\u03c3\\u03c0\\u03b1\\u03bd\\u03af\\u03b1",
154         "\\u0395\\u03bb\\u03bb\\u03ac\\u03b4\\u03b1",
155         "\\u039d\\u03bf\\u03c1\\u03b2\\u03b7\\u03b3\\u03af\\u03b1",
156         "\\u039A\\u03AF\\u03BD\\u03B1",
157         "\\u0393\\u03B5\\u03C1\\u03BC\\u03B1\\u03BD\\u03AF\\u03B1",
158         "",
159         "\\u0399\\u03B1\\u03C0\\u03C9\\u03BD\\u03AF\\u03B1"
160     },
161     /* display variant (Greek) */
162     {   "", "", "", "", "NY", "", "", "", ""    }, /* TODO: currently there is no translation for NY in Greek fix this test when we have it */
163     /* display name (Greek) */
164     {
165         "\\u0391\\u03b3\\u03b3\\u03bb\\u03b9\\u03ba\\u03ac (\\u0397\\u03BD\\u03C9\\u03BC\\u03AD\\u03BD\\u03B5\\u03C2 \\u03A0\\u03BF\\u03BB\\u03B9\\u03C4\\u03B5\\u03AF\\u03B5\\u03C2)",
166         "\\u0393\\u03b1\\u03bb\\u03bb\\u03b9\\u03ba\\u03ac (\\u0393\\u03b1\\u03bb\\u03bb\\u03af\\u03b1)",
167         "\\u039a\\u03b1\\u03c4\\u03b1\\u03bb\\u03b1\\u03bd\\u03b9\\u03ba\\u03ac (\\u0399\\u03c3\\u03c0\\u03b1\\u03bd\\u03af\\u03b1)",
168         "\\u0395\\u03bb\\u03bb\\u03b7\\u03bd\\u03b9\\u03ba\\u03ac (\\u0395\\u03bb\\u03bb\\u03ac\\u03b4\\u03b1)",
169         "\\u039d\\u03bf\\u03c1\\u03b2\\u03b7\\u03b3\\u03b9\\u03ba\\u03ac (\\u039d\\u03bf\\u03c1\\u03b2\\u03b7\\u03b3\\u03af\\u03b1, NY)",
170         "\\u039A\\u03B9\\u03BD\\u03B5\\u03B6\\u03B9\\u03BA\\u03AC (\\u0391\\u03c0\\u03bb\\u03bf\\u03c0\\u03bf\\u03b9\\u03b7\\u03bc\\u03ad\\u03bd\\u03bf, \\u039A\\u03AF\\u03BD\\u03B1)",
171         "\\u0393\\u03b5\\u03c1\\u03bc\\u03b1\\u03bd\\u03b9\\u03ba\\u03ac (\\u0393\\u03b5\\u03c1\\u03bc\\u03b1\\u03bd\\u03af\\u03b1, \\u03a3\\u03b5\\u03b9\\u03c1\\u03ac \\u03c4\\u03b1\\u03be\\u03b9\\u03bd\\u03cc\\u03bc\\u03b7\\u03c3\\u03b7\\u03c2=\\u03a3\\u03b5\\u03b9\\u03c1\\u03ac \\u03c4\\u03b1\\u03be\\u03b9\\u03bd\\u03cc\\u03bc\\u03b7\\u03c3\\u03b7\\u03c2 \\u03c4\\u03b7\\u03bb\\u03b5\\u03c6\\u03c9\\u03bd\\u03b9\\u03ba\\u03bf\\u03cd \\u03ba\\u03b1\\u03c4\\u03b1\\u03bb\\u03cc\\u03b3\\u03bf\\u03c5)",
172         "\\u0399\\u03c3\\u03c0\\u03b1\\u03bd\\u03b9\\u03ba\\u03ac (\\u03a3\\u03b5\\u03b9\\u03c1\\u03ac \\u03c4\\u03b1\\u03be\\u03b9\\u03bd\\u03cc\\u03bc\\u03b7\\u03c3\\u03b7\\u03c2=\\u03a0\\u03b1\\u03c1\\u03b1\\u03b4\\u03bf\\u03c3\\u03b9\\u03b1\\u03ba\\u03ae \\u03c3\\u03b5\\u03b9\\u03c1\\u03ac \\u03c4\\u03b1\\u03be\\u03b9\\u03bd\\u03cc\\u03bc\\u03b7\\u03c3\\u03b7\\u03c2)",
173         "\\u0399\\u03b1\\u03c0\\u03c9\\u03bd\\u03b9\\u03ba\\u03ac (\\u0399\\u03b1\\u03c0\\u03c9\\u03bd\\u03af\\u03b1, \\u0397\\u03bc\\u03b5\\u03c1\\u03bf\\u03bb\\u03cc\\u03b3\\u03b9\\u03bf=\\u0399\\u03b1\\u03c0\\u03c9\\u03bd\\u03b9\\u03ba\\u03cc \\u03b7\\u03bc\\u03b5\\u03c1\\u03bf\\u03bb\\u03cc\\u03b3\\u03b9\\u03bf)"
174     }
175 };
176 
177 static UChar*** dataTable=0;
178 enum {
179     ENGLISH = 0,
180     FRENCH = 1,
181     CATALAN = 2,
182     GREEK = 3,
183     NORWEGIAN = 4
184 };
185 
186 enum {
187     LANG = 0,
188     SCRIPT = 1,
189     CTRY = 2,
190     VAR = 3,
191     NAME = 4,
192     LANG3 = 5,
193     CTRY3 = 6,
194     LCID = 7,
195     DLANG_EN = 8,
196     DSCRIPT_EN = 9,
197     DCTRY_EN = 10,
198     DVAR_EN = 11,
199     DNAME_EN = 12,
200     DLANG_FR = 13,
201     DSCRIPT_FR = 14,
202     DCTRY_FR = 15,
203     DVAR_FR = 16,
204     DNAME_FR = 17,
205     DLANG_CA = 18,
206     DSCRIPT_CA = 19,
207     DCTRY_CA = 20,
208     DVAR_CA = 21,
209     DNAME_CA = 22,
210     DLANG_EL = 23,
211     DSCRIPT_EL = 24,
212     DCTRY_EL = 25,
213     DVAR_EL = 26,
214     DNAME_EL = 27
215 };
216 
217 #define TESTCASE(name) addTest(root, &name, "tsutil/cloctst/" #name)
218 
219 void addLocaleTest(TestNode** root);
220 
addLocaleTest(TestNode ** root)221 void addLocaleTest(TestNode** root)
222 {
223     TESTCASE(TestObsoleteNames); /* srl- move */
224     TESTCASE(TestBasicGetters);
225     TESTCASE(TestNullDefault);
226     TESTCASE(TestPrefixes);
227     TESTCASE(TestVariantLengthLimit);
228     TESTCASE(TestSimpleResourceInfo);
229     TESTCASE(TestDisplayNames);
230     TESTCASE(TestGetDisplayScriptPreFlighting21160);
231     TESTCASE(TestGetAvailableLocales);
232     TESTCASE(TestGetAvailableLocalesByType);
233     TESTCASE(TestDataDirectory);
234 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
235     TESTCASE(TestISOFunctions);
236 #endif
237     TESTCASE(TestISO3Fallback);
238     TESTCASE(TestUninstalledISO3Names);
239     TESTCASE(TestSimpleDisplayNames);
240     TESTCASE(TestVariantParsing);
241     TESTCASE(TestKeywordVariants);
242     TESTCASE(TestKeywordVariantParsing);
243     TESTCASE(TestCanonicalization);
244     TESTCASE(TestCanonicalizationBuffer);
245     TESTCASE(TestKeywordSet);
246     TESTCASE(TestKeywordSetError);
247     TESTCASE(TestDisplayKeywords);
248     TESTCASE(TestCanonicalization21749StackUseAfterScope);
249     TESTCASE(TestDisplayKeywordValues);
250     TESTCASE(TestGetBaseName);
251 #if !UCONFIG_NO_FILE_IO
252     TESTCASE(TestGetLocale);
253 #endif
254     TESTCASE(TestDisplayNameWarning);
255     TESTCASE(Test21157CorrectTerminating);
256     TESTCASE(TestNonexistentLanguageExemplars);
257     TESTCASE(TestLocDataErrorCodeChaining);
258     TESTCASE(TestLocDataWithRgTag);
259     TESTCASE(TestLanguageExemplarsFallbacks);
260     TESTCASE(TestCalendar);
261     TESTCASE(TestDateFormat);
262     TESTCASE(TestCollation);
263     TESTCASE(TestULocale);
264     TESTCASE(TestUResourceBundle);
265     TESTCASE(TestDisplayName);
266     TESTCASE(TestAcceptLanguage);
267     TESTCASE(TestGetLocaleForLCID);
268     TESTCASE(TestOrientation);
269     TESTCASE(TestLikelySubtags);
270     TESTCASE(TestToLanguageTag);
271     TESTCASE(TestBug20132);
272     TESTCASE(TestBug20149);
273     TESTCASE(TestCDefaultLocale);
274     TESTCASE(TestForLanguageTag);
275     TESTCASE(TestLangAndRegionCanonicalize);
276     TESTCASE(TestTrailingNull);
277     TESTCASE(TestUnicodeDefines);
278     TESTCASE(TestEnglishExemplarCharacters);
279     TESTCASE(TestDisplayNameBrackets);
280     TESTCASE(TestIllegalArgumentWhenNoDataWithNoSubstitute);
281     TESTCASE(TestIsRightToLeft);
282     TESTCASE(TestToUnicodeLocaleKey);
283     TESTCASE(TestToLegacyKey);
284     TESTCASE(TestToUnicodeLocaleType);
285     TESTCASE(TestToLegacyType);
286     TESTCASE(TestBadLocaleIDs);
287     TESTCASE(TestBug20370);
288     TESTCASE(TestBug20321UnicodeLocaleKey);
289     TESTCASE(TestUsingDefaultWarning);
290     TESTCASE(TestBug21449InfiniteLoop);
291     TESTCASE(TestExcessivelyLongIDs);
292 #if !UCONFIG_NO_FORMATTING
293     TESTCASE(TestUldnNameVariants);
294 #endif
295 }
296 
297 
298 /* testing uloc(), uloc_getName(), uloc_getLanguage(), uloc_getVariant(), uloc_getCountry() */
TestBasicGetters(void)299 static void TestBasicGetters(void) {
300     int32_t i;
301     int32_t cap;
302     UErrorCode status = U_ZERO_ERROR;
303     char *testLocale = 0;
304     char *temp = 0, *name = 0;
305     log_verbose("Testing Basic Getters\n");
306     for (i = 0; i < LOCALE_SIZE; i++) {
307         testLocale=(char*)malloc(sizeof(char) * (strlen(rawData2[NAME][i])+1));
308         strcpy(testLocale,rawData2[NAME][i]);
309 
310         log_verbose("Testing   %s  .....\n", testLocale);
311         cap=uloc_getLanguage(testLocale, NULL, 0, &status);
312         if(status==U_BUFFER_OVERFLOW_ERROR){
313             status=U_ZERO_ERROR;
314             temp=(char*)malloc(sizeof(char) * (cap+1));
315             uloc_getLanguage(testLocale, temp, cap+1, &status);
316         }
317         if(U_FAILURE(status)){
318             log_err("ERROR: in uloc_getLanguage  %s\n", myErrorName(status));
319         }
320         if (0 !=strcmp(temp,rawData2[LANG][i]))    {
321             log_err("  Language code mismatch: %s versus  %s\n", temp, rawData2[LANG][i]);
322         }
323 
324 
325         cap=uloc_getCountry(testLocale, temp, cap, &status);
326         if(status==U_BUFFER_OVERFLOW_ERROR){
327             status=U_ZERO_ERROR;
328             temp=(char*)realloc(temp, sizeof(char) * (cap+1));
329             uloc_getCountry(testLocale, temp, cap+1, &status);
330         }
331         if(U_FAILURE(status)){
332             log_err("ERROR: in uloc_getCountry  %s\n", myErrorName(status));
333         }
334         if (0 != strcmp(temp, rawData2[CTRY][i])) {
335             log_err(" Country code mismatch:  %s  versus   %s\n", temp, rawData2[CTRY][i]);
336 
337           }
338 
339         cap=uloc_getVariant(testLocale, temp, cap, &status);
340         if(status==U_BUFFER_OVERFLOW_ERROR){
341             status=U_ZERO_ERROR;
342             temp=(char*)realloc(temp, sizeof(char) * (cap+1));
343             uloc_getVariant(testLocale, temp, cap+1, &status);
344         }
345         if(U_FAILURE(status)){
346             log_err("ERROR: in uloc_getVariant  %s\n", myErrorName(status));
347         }
348         if (0 != strcmp(temp, rawData2[VAR][i])) {
349             log_err("Variant code mismatch:  %s  versus   %s\n", temp, rawData2[VAR][i]);
350         }
351 
352         cap=uloc_getName(testLocale, NULL, 0, &status);
353         if(status==U_BUFFER_OVERFLOW_ERROR){
354             status=U_ZERO_ERROR;
355             name=(char*)malloc(sizeof(char) * (cap+1));
356             uloc_getName(testLocale, name, cap+1, &status);
357         } else if(status==U_ZERO_ERROR) {
358           log_err("ERROR: in uloc_getName(%s,NULL,0,..), expected U_BUFFER_OVERFLOW_ERROR!\n", testLocale);
359         }
360         if(U_FAILURE(status)){
361             log_err("ERROR: in uloc_getName   %s\n", myErrorName(status));
362         }
363         if (0 != strcmp(name, rawData2[NAME][i])){
364             log_err(" Mismatch in getName:  %s  versus   %s\n", name, rawData2[NAME][i]);
365         }
366 
367         free(temp);
368         free(name);
369 
370         free(testLocale);
371     }
372 }
373 
TestNullDefault(void)374 static void TestNullDefault(void) {
375     UErrorCode status = U_ZERO_ERROR;
376     char original[ULOC_FULLNAME_CAPACITY];
377 
378     uprv_strcpy(original, uloc_getDefault());
379     uloc_setDefault("qq_BLA", &status);
380     if (uprv_strcmp(uloc_getDefault(), "qq_BLA") != 0) {
381         log_err(" Mismatch in uloc_setDefault:  qq_BLA  versus   %s\n", uloc_getDefault());
382     }
383     uloc_setDefault(NULL, &status);
384     if (uprv_strcmp(uloc_getDefault(), original) != 0) {
385         log_err(" uloc_setDefault(NULL, &status) didn't get the default locale back!\n");
386     }
387 
388     {
389     /* Test that set & get of default locale work, and that
390      * default locales are cached and reused, and not overwritten.
391      */
392         const char *n_en_US;
393         const char *n_fr_FR;
394         const char *n2_en_US;
395 
396         status = U_ZERO_ERROR;
397         uloc_setDefault("en_US", &status);
398         n_en_US = uloc_getDefault();
399         if (strcmp(n_en_US, "en_US") != 0) {
400             log_err("Wrong result from uloc_getDefault().  Expected \"en_US\", got \"%s\"\n", n_en_US);
401         }
402 
403         uloc_setDefault("fr_FR", &status);
404         n_fr_FR = uloc_getDefault();
405         if (strcmp(n_en_US, "en_US") != 0) {
406             log_err("uloc_setDefault altered previously default string."
407                 "Expected \"en_US\", got \"%s\"\n",  n_en_US);
408         }
409         if (strcmp(n_fr_FR, "fr_FR") != 0) {
410             log_err("Wrong result from uloc_getDefault().  Expected \"fr_FR\", got %s\n",  n_fr_FR);
411         }
412 
413         uloc_setDefault("en_US", &status);
414         n2_en_US = uloc_getDefault();
415         if (strcmp(n2_en_US, "en_US") != 0) {
416             log_err("Wrong result from uloc_getDefault().  Expected \"en_US\", got \"%s\"\n", n_en_US);
417         }
418         if (n2_en_US != n_en_US) {
419             log_err("Default locale cache failed to reuse en_US locale.\n");
420         }
421 
422         if (U_FAILURE(status)) {
423             log_err("Failure returned from uloc_setDefault - \"%s\"\n", u_errorName(status));
424         }
425 
426     }
427     uloc_setDefault(original, &status);
428     if (U_FAILURE(status)) {
429         log_err("Failed to change the default locale back to %s\n", original);
430     }
431 
432 }
433 /* Test the i- and x- and @ and . functionality
434 */
435 
436 #define PREFIXBUFSIZ 128
437 
TestPrefixes(void)438 static void TestPrefixes(void) {
439     int row = 0;
440     int n;
441     const char *loc, *expected;
442 
443     static const char * const testData[][7] =
444     {
445         /* NULL canonicalize() column means "expect same as getName()" */
446         {"sv", "", "FI", "AL", "sv-fi-al", "sv_FI_AL", NULL},
447         {"en", "", "GB", "", "en-gb", "en_GB", NULL},
448         {"i-hakka", "", "MT", "XEMXIJA", "i-hakka_MT_XEMXIJA", "i-hakka_MT_XEMXIJA", NULL},
449         {"i-hakka", "", "CN", "", "i-hakka_CN", "i-hakka_CN", NULL},
450         {"i-hakka", "", "MX", "", "I-hakka_MX", "i-hakka_MX", NULL},
451         {"x-klingon", "", "US", "SANJOSE", "X-KLINGON_us_SANJOSE", "x-klingon_US_SANJOSE", NULL},
452         {"hy", "", "", "AREVMDA", "hy_AREVMDA", "hy__AREVMDA", "hyw"},
453         {"de", "", "", "1901", "de-1901", "de__1901", NULL},
454         {"mr", "", "", "", "mr.utf8", "mr.utf8", "mr"},
455         {"de", "", "TV", "", "de-tv.koi8r", "de_TV.koi8r", "de_TV"},
456         {"x-piglatin", "", "ML", "", "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML"},  /* Multibyte English */
457         {"i-cherokee", "","US", "", "i-Cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US"},
458         {"x-filfli", "", "MT", "FILFLA", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA"},
459         {"no", "", "NO", "NY", "no-no-ny.utf32@B", "no_NO_NY.utf32@B", "no_NO_NY_B"},
460         {"no", "", "NO", "",  "no-no.utf32@B", "no_NO.utf32@B", "no_NO_B"},
461         {"no", "", "",   "NY", "no__ny", "no__NY", NULL},
462         {"no", "", "",   "", "no@ny", "no@ny", "no__NY"},
463         {"el", "Latn", "", "", "el-latn", "el_Latn", NULL},
464         {"en", "Cyrl", "RU", "", "en-cyrl-ru", "en_Cyrl_RU", NULL},
465         {"qq", "Qqqq", "QQ", "QQ", "qq_Qqqq_QQ_QQ", "qq_Qqqq_QQ_QQ", NULL},
466         {"qq", "Qqqq", "", "QQ", "qq_Qqqq__QQ", "qq_Qqqq__QQ", NULL},
467         {"ab", "Cdef", "GH", "IJ", "ab_cdef_gh_ij", "ab_Cdef_GH_IJ", NULL}, /* total garbage */
468 
469         // Before ICU 64, ICU locale canonicalization had some additional mappings.
470         // They were removed for ICU-20187 "drop support for long-obsolete locale ID variants".
471         // The following now use standard canonicalization.
472         {"zh", "Hans", "", "PINYIN", "zh-Hans-pinyin", "zh_Hans__PINYIN", "zh_Hans__PINYIN"},
473         {"zh", "Hant", "TW", "STROKE", "zh-hant_TW_STROKE", "zh_Hant_TW_STROKE", "zh_Hant_TW_STROKE"},
474 
475         {NULL,NULL,NULL,NULL,NULL,NULL,NULL}
476     };
477 
478     static const char * const testTitles[] = {
479         "uloc_getLanguage()",
480         "uloc_getScript()",
481         "uloc_getCountry()",
482         "uloc_getVariant()",
483         "name",
484         "uloc_getName()",
485         "uloc_canonicalize()"
486     };
487 
488     char buf[PREFIXBUFSIZ];
489     int32_t len;
490     UErrorCode err;
491 
492 
493     for(row=0;testData[row][0] != NULL;row++) {
494         loc = testData[row][NAME];
495         log_verbose("Test #%d: %s\n", row, loc);
496 
497         err = U_ZERO_ERROR;
498         len=0;
499         buf[0]=0;
500         for(n=0;n<=(NAME+2);n++) {
501             if(n==NAME) continue;
502 
503             for(len=0;len<PREFIXBUFSIZ;len++) {
504                 buf[len] = '%'; /* Set a tripwire.. */
505             }
506             len = 0;
507 
508             switch(n) {
509             case LANG:
510                 len = uloc_getLanguage(loc, buf, PREFIXBUFSIZ, &err);
511                 break;
512 
513             case SCRIPT:
514                 len = uloc_getScript(loc, buf, PREFIXBUFSIZ, &err);
515                 break;
516 
517             case CTRY:
518                 len = uloc_getCountry(loc, buf, PREFIXBUFSIZ, &err);
519                 break;
520 
521             case VAR:
522                 len = uloc_getVariant(loc, buf, PREFIXBUFSIZ, &err);
523                 break;
524 
525             case NAME+1:
526                 len = uloc_getName(loc, buf, PREFIXBUFSIZ, &err);
527                 break;
528 
529             case NAME+2:
530                 len = uloc_canonicalize(loc, buf, PREFIXBUFSIZ, &err);
531                 break;
532 
533             default:
534                 strcpy(buf, "**??");
535                 len=4;
536             }
537 
538             if(U_FAILURE(err)) {
539                 log_err("#%d: %s on %s: err %s\n",
540                     row, testTitles[n], loc, u_errorName(err));
541             } else {
542                 log_verbose("#%d: %s on %s: -> [%s] (length %d)\n",
543                     row, testTitles[n], loc, buf, len);
544 
545                 if(len != (int32_t)strlen(buf)) {
546                     log_err("#%d: %s on %s: -> [%s] (length returned %d, actual %d!)\n",
547                         row, testTitles[n], loc, buf, len, strlen(buf)+1);
548 
549                 }
550 
551                 /* see if they smashed something */
552                 if(buf[len+1] != '%') {
553                     log_err("#%d: %s on %s: -> [%s] - wrote [%X] out ofbounds!\n",
554                         row, testTitles[n], loc, buf, buf[len+1]);
555                 }
556 
557                 expected = testData[row][n];
558                 if (expected == NULL && n == (NAME+2)) {
559                     /* NULL expected canonicalize() means "expect same as getName()" */
560                     expected = testData[row][NAME+1];
561                 }
562                 if(strcmp(buf, expected)) {
563                     log_err("#%d: %s on %s: -> [%s] (expected '%s'!)\n",
564                         row, testTitles[n], loc, buf, expected);
565 
566                 }
567             }
568         }
569     }
570 }
571 
TestVariantLengthLimit(void)572 static void TestVariantLengthLimit(void) {
573     static const char valid[] =
574         "_"
575         "_12345678"
576         "_12345678"
577         "_12345678"
578         "_12345678"
579         "_12345678"
580         "_12345678"
581         "_12345678"
582         "_12345678"
583         "_12345678"
584         "_12345678"
585         "_12345678"
586         "_12345678"
587         "_12345678"
588         "_12345678"
589         "_12345678"
590         "_12345678"
591         "_12345678"
592         "_12345678"
593         "_12345678"
594         "_12345678";
595 
596     static const char invalid[] =
597         "_"
598         "_12345678"
599         "_12345678"
600         "_12345678"
601         "_12345678"
602         "_12345678"
603         "_12345678"
604         "_12345678"
605         "_12345678"
606         "_12345678"
607         "_12345678"
608         "_12345678"
609         "_12345678"
610         "_12345678"
611         "_12345678"
612         "_12345678"
613         "_12345678"
614         "_12345678"
615         "_12345678"
616         "_12345678"
617         "_12345678X";  // One character too long.
618 
619     const char* const variantsExpected = valid + 2;  // Skip initial "__".
620     const int32_t reslenExpected = uprv_strlen(variantsExpected);
621 
622     char buffer[UPRV_LENGTHOF(invalid)];
623     UErrorCode status;
624 
625     status = U_ZERO_ERROR;
626     int32_t reslen =
627         uloc_getVariant(valid, buffer, UPRV_LENGTHOF(buffer), &status);
628     if (U_FAILURE(status)) {
629         log_err("Unexpected error in uloc_getVariant(): %s\n",
630                 myErrorName(status));
631     } else if (reslenExpected != reslen) {
632         log_err("Expected length %d but got length %d.\n",
633                 reslenExpected, reslen);
634     } else if (uprv_strcmp(variantsExpected, buffer) != 0) {
635         log_err("Expected variants \"%s\" but got variants \"%s\"\n",
636                 variantsExpected, buffer);
637     }
638 
639     status = U_ZERO_ERROR;
640     uloc_getVariant(invalid, buffer, UPRV_LENGTHOF(buffer), &status);
641     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
642         // The variants are known to be too long, parsing must fail.
643         log_err("Unexpected error in uloc_getVariant(), expected "
644                 "U_ILLEGAL_ARGUMENT_ERROR but got %s.\n",
645                 myErrorName(status));
646     }
647 }
648 
649 /* testing uloc_getISO3Language(), uloc_getISO3Country(),  */
TestSimpleResourceInfo(void)650 static void TestSimpleResourceInfo(void) {
651     int32_t i;
652     char* testLocale = 0;
653     UChar* expected = 0;
654 
655     const char* temp;
656     char            temp2[20];
657     testLocale=(char*)malloc(sizeof(char) * 1);
658     expected=(UChar*)malloc(sizeof(UChar) * 1);
659 
660     setUpDataTable();
661     log_verbose("Testing getISO3Language and getISO3Country\n");
662     for (i = 0; i < LOCALE_SIZE; i++) {
663 
664         testLocale=(char*)realloc(testLocale, sizeof(char) * (u_strlen(dataTable[NAME][i])+1));
665         u_austrcpy(testLocale, dataTable[NAME][i]);
666 
667         log_verbose("Testing   %s ......\n", testLocale);
668 
669         temp=uloc_getISO3Language(testLocale);
670         expected=(UChar*)realloc(expected, sizeof(UChar) * (strlen(temp) + 1));
671         u_uastrcpy(expected,temp);
672         if (0 != u_strcmp(expected, dataTable[LANG3][i])) {
673             log_err("  ISO-3 language code mismatch:  %s versus  %s\n",  austrdup(expected),
674                 austrdup(dataTable[LANG3][i]));
675         }
676 
677         temp=uloc_getISO3Country(testLocale);
678         expected=(UChar*)realloc(expected, sizeof(UChar) * (strlen(temp) + 1));
679         u_uastrcpy(expected,temp);
680         if (0 != u_strcmp(expected, dataTable[CTRY3][i])) {
681             log_err("  ISO-3 Country code mismatch:  %s versus  %s\n",  austrdup(expected),
682                 austrdup(dataTable[CTRY3][i]));
683         }
684         snprintf(temp2, sizeof(temp2), "%x", (int)uloc_getLCID(testLocale));
685         if (strcmp(temp2, rawData2[LCID][i]) != 0) {
686             log_err("LCID mismatch: %s versus %s\n", temp2 , rawData2[LCID][i]);
687         }
688     }
689 
690     free(expected);
691     free(testLocale);
692     cleanUpDataTable();
693 }
694 
695 /* if len < 0, we convert until we hit UChar 0x0000, which is not output. will add trailing null
696  * if there's room but won't be included in result.  result < 0 indicates an error.
697  * Returns the number of chars written (not those that would be written if there's enough room.*/
UCharsToEscapedAscii(const UChar * utext,int32_t len,char * resultChars,int32_t buflen)698 static int32_t UCharsToEscapedAscii(const UChar* utext, int32_t len, char* resultChars, int32_t buflen) {
699     static const struct {
700         char escapedChar;
701         UChar sourceVal;
702     } ESCAPE_MAP[] = {
703         /*a*/ {'a', 0x07},
704         /*b*/ {'b', 0x08},
705         /*e*/ {'e', 0x1b},
706         /*f*/ {'f', 0x0c},
707         /*n*/ {'n', 0x0a},
708         /*r*/ {'r', 0x0d},
709         /*t*/ {'t', 0x09},
710         /*v*/ {'v', 0x0b}
711     };
712     static const int32_t ESCAPE_MAP_LENGTH = UPRV_LENGTHOF(ESCAPE_MAP);
713     static const char HEX_DIGITS[] = {
714         '0', '1', '2', '3', '4', '5', '6', '7',
715         '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
716     };
717     int32_t i, j;
718     int32_t resultLen = 0;
719     const int32_t limit = len<0 ? buflen : len; /* buflen is long enough to hit the buffer limit */
720     const int32_t escapeLimit1 = buflen-2;
721     const int32_t escapeLimit2 = buflen-6;
722     UChar uc;
723 
724     if(utext==NULL || resultChars==NULL || buflen<0) {
725         return -1;
726     }
727 
728     for(i=0;i<limit && resultLen<buflen;++i) {
729         uc=utext[i];
730         if(len<0 && uc==0) {
731             break;
732         }
733         if(uc<0x20) {
734             for(j=0;j<ESCAPE_MAP_LENGTH && uc!=ESCAPE_MAP[j].sourceVal;j++) {
735             }
736             if(j<ESCAPE_MAP_LENGTH) {
737                 if(resultLen>escapeLimit1) {
738                     break;
739                 }
740                 resultChars[resultLen++]='\\';
741                 resultChars[resultLen++]=ESCAPE_MAP[j].escapedChar;
742                 continue;
743             }
744         } else if(uc<0x7f) {
745             u_austrncpy(resultChars + resultLen, &uc, 1);
746             resultLen++;
747             continue;
748         }
749 
750         if(resultLen>escapeLimit2) {
751             break;
752         }
753 
754         /* have to escape the uchar */
755         resultChars[resultLen++]='\\';
756         resultChars[resultLen++]='u';
757         resultChars[resultLen++]=HEX_DIGITS[(uc>>12)&0xff];
758         resultChars[resultLen++]=HEX_DIGITS[(uc>>8)&0xff];
759         resultChars[resultLen++]=HEX_DIGITS[(uc>>4)&0xff];
760         resultChars[resultLen++]=HEX_DIGITS[uc&0xff];
761     }
762 
763     if(resultLen<buflen) {
764         resultChars[resultLen] = 0;
765     }
766 
767     return resultLen;
768 }
769 
770 /*
771  * Jitterbug 2439 -- markus 20030425
772  *
773  * The lookup of display names must not fall back through the default
774  * locale because that yields useless results.
775  */
TestDisplayNames(void)776 static void TestDisplayNames(void)
777 {
778     UChar buffer[100];
779     UErrorCode errorCode=U_ZERO_ERROR;
780     int32_t length;
781     log_verbose("Testing getDisplayName for different locales\n");
782 
783     log_verbose("  In locale = en_US...\n");
784     doTestDisplayNames("en_US", DLANG_EN);
785     log_verbose("  In locale = fr_FR....\n");
786     doTestDisplayNames("fr_FR", DLANG_FR);
787     log_verbose("  In locale = ca_ES...\n");
788     doTestDisplayNames("ca_ES", DLANG_CA);
789     log_verbose("  In locale = gr_EL..\n");
790     doTestDisplayNames("el_GR", DLANG_EL);
791 
792     /* test that the default locale has a display name for its own language */
793     errorCode=U_ZERO_ERROR;
794     length=uloc_getDisplayLanguage(NULL, NULL, buffer, UPRV_LENGTHOF(buffer), &errorCode);
795     /* check <=3 to reject getting the language code as a display name */
796     if(U_FAILURE(errorCode) || (length<=3 && buffer[0]<=0x7f)) {
797         const char* defaultLocale = uloc_getDefault();
798         for (int32_t i = 0, count = uloc_countAvailable(); i < count; i++) {
799             /* Only report error if the default locale is in the available list */
800             if (uprv_strcmp(defaultLocale, uloc_getAvailable(i)) == 0) {
801                 log_data_err(
802                     "unable to get a display string for the language of the "
803                     "default locale - %s (Are you missing data?)\n",
804                     u_errorName(errorCode));
805                 break;
806             }
807         }
808     }
809 
810     /* test that we get the language code itself for an unknown language, and a default warning */
811     errorCode=U_ZERO_ERROR;
812     length=uloc_getDisplayLanguage("qq", "rr", buffer, UPRV_LENGTHOF(buffer), &errorCode);
813     if(errorCode!=U_USING_DEFAULT_WARNING || length!=2 || buffer[0]!=0x71 || buffer[1]!=0x71) {
814         log_err("error getting the display string for an unknown language - %s\n", u_errorName(errorCode));
815     }
816 
817     /* test that we get a default warning for a display name where one component is unknown (4255) */
818     errorCode=U_ZERO_ERROR;
819     length=uloc_getDisplayName("qq_US_POSIX", "en_US", buffer, UPRV_LENGTHOF(buffer), &errorCode);
820     if(errorCode!=U_USING_DEFAULT_WARNING) {
821         log_err("error getting the display name for a locale with an unknown language - %s\n", u_errorName(errorCode));
822     }
823 
824     {
825         int32_t i;
826         static const char *aLocale = "es@collation=traditional;calendar=japanese";
827         static const char *testL[] = { "en_US",
828             "fr_FR",
829             "ca_ES",
830             "el_GR" };
831         static const char *expect[] = { "Spanish (Calendar=Japanese Calendar, Sort Order=Traditional Sort Order)", /* note sorted order of keywords */
832             "espagnol (calendrier=calendrier japonais, ordre de tri=ordre traditionnel)",
833             "espanyol (calendari=calendari japon\\u00e8s, ordre=ordre tradicional)",
834             "\\u0399\\u03c3\\u03c0\\u03b1\\u03bd\\u03b9\\u03ba\\u03ac (\\u0397\\u03bc\\u03b5\\u03c1\\u03bf\\u03bb\\u03cc\\u03b3\\u03b9\\u03bf=\\u0399\\u03b1\\u03c0\\u03c9\\u03bd\\u03b9\\u03ba\\u03cc \\u03b7\\u03bc\\u03b5\\u03c1\\u03bf\\u03bb\\u03cc\\u03b3\\u03b9\\u03bf, \\u03a3\\u03b5\\u03b9\\u03c1\\u03ac \\u03c4\\u03b1\\u03be\\u03b9\\u03bd\\u03cc\\u03bc\\u03b7\\u03c3\\u03b7\\u03c2=\\u03a0\\u03b1\\u03c1\\u03b1\\u03b4\\u03bf\\u03c3\\u03b9\\u03b1\\u03ba\\u03ae \\u03c3\\u03b5\\u03b9\\u03c1\\u03ac \\u03c4\\u03b1\\u03be\\u03b9\\u03bd\\u03cc\\u03bc\\u03b7\\u03c3\\u03b7\\u03c2)" };
835         UChar *expectBuffer;
836 
837         for(i=0;i<UPRV_LENGTHOF(testL);i++) {
838             errorCode = U_ZERO_ERROR;
839             uloc_getDisplayName(aLocale, testL[i], buffer, UPRV_LENGTHOF(buffer), &errorCode);
840             if(U_FAILURE(errorCode)) {
841                 log_err("FAIL in uloc_getDisplayName(%s,%s,..) -> %s\n", aLocale, testL[i], u_errorName(errorCode));
842             } else {
843                 expectBuffer = CharsToUChars(expect[i]);
844                 if(u_strcmp(buffer,expectBuffer)) {
845                     log_data_err("FAIL in uloc_getDisplayName(%s,%s,..) expected '%s' got '%s' (Are you missing data?)\n", aLocale, testL[i], expect[i], austrdup(buffer));
846                 } else {
847                     log_verbose("pass in uloc_getDisplayName(%s,%s,..) got '%s'\n", aLocale, testL[i], expect[i]);
848                 }
849                 free(expectBuffer);
850             }
851         }
852     }
853 
854     /* test that we properly preflight and return data when there's a non-default pattern,
855        see ticket #8262. */
856     {
857         int32_t i;
858         static const char *locale="az_Cyrl";
859         static const char *displayLocale="ja";
860         static const char *expectedChars =
861                 "\\u30a2\\u30bc\\u30eb\\u30d0\\u30a4\\u30b8\\u30e3\\u30f3\\u8a9e "
862                 "(\\u30ad\\u30ea\\u30eb\\u6587\\u5b57)";
863         UErrorCode ec=U_ZERO_ERROR;
864         UChar result[256];
865         int32_t len;
866         int32_t preflightLen=uloc_getDisplayName(locale, displayLocale, NULL, 0, &ec);
867         /* inconvenient semantics when preflighting, this condition is expected... */
868         if(ec==U_BUFFER_OVERFLOW_ERROR) {
869             ec=U_ZERO_ERROR;
870         }
871         len=uloc_getDisplayName(locale, displayLocale, result, UPRV_LENGTHOF(result), &ec);
872         if(U_FAILURE(ec)) {
873             log_err("uloc_getDisplayName(%s, %s...) returned error: %s",
874                     locale, displayLocale, u_errorName(ec));
875         } else {
876             UChar *expected=CharsToUChars(expectedChars);
877             int32_t expectedLen=u_strlen(expected);
878 
879             if(len!=expectedLen) {
880                 log_data_err("uloc_getDisplayName(%s, %s...) returned string of length %d, expected length %d",
881                         locale, displayLocale, len, expectedLen);
882             } else if(preflightLen!=expectedLen) {
883                 log_err("uloc_getDisplayName(%s, %s...) returned preflight length %d, expected length %d",
884                         locale, displayLocale, preflightLen, expectedLen);
885             } else if(u_strncmp(result, expected, len)) {
886                 int32_t cap=len*6+1;  /* worst case + space for trailing null */
887                 char* resultChars=(char*)malloc(cap);
888                 int32_t resultCharsLen=UCharsToEscapedAscii(result, len, resultChars, cap);
889                 if(resultCharsLen<0 || resultCharsLen<cap-1) {
890                     log_err("uloc_getDisplayName(%s, %s...) mismatch", locale, displayLocale);
891                 } else {
892                     log_err("uloc_getDisplayName(%s, %s...) returned '%s' but expected '%s'",
893                             locale, displayLocale, resultChars, expectedChars);
894                 }
895                 free(resultChars);
896                 resultChars=NULL;
897             } else {
898                 /* test all buffer sizes */
899                 for(i=len+1;i>=0;--i) {
900                     len=uloc_getDisplayName(locale, displayLocale, result, i, &ec);
901                     if(ec==U_BUFFER_OVERFLOW_ERROR) {
902                         ec=U_ZERO_ERROR;
903                     }
904                     if(U_FAILURE(ec)) {
905                         log_err("using buffer of length %d returned error %s", i, u_errorName(ec));
906                         break;
907                     }
908                     if(len!=expectedLen) {
909                         log_err("with buffer of length %d, expected length %d but got %d", i, expectedLen, len);
910                         break;
911                     }
912                     /* There's no guarantee about what's in the buffer if we've overflowed, in particular,
913                      * we don't know that it's been filled, so no point in checking. */
914                 }
915             }
916 
917             free(expected);
918         }
919     }
920 }
921 
922 /**
923  * ICU-21160 test the pre-flighting call to uloc_getDisplayScript returns the actual length needed
924  * for the result buffer.
925  */
TestGetDisplayScriptPreFlighting21160(void)926 static void TestGetDisplayScriptPreFlighting21160(void)
927 {
928     const char* locale = "und-Latn";
929     const char* inlocale = "de";
930 
931     UErrorCode ec = U_ZERO_ERROR;
932     UChar* result = NULL;
933     int32_t length = uloc_getDisplayScript(locale, inlocale, NULL, 0, &ec) + 1;
934     ec = U_ZERO_ERROR;
935     result=(UChar*)malloc(sizeof(UChar) * length);
936     length = uloc_getDisplayScript(locale, inlocale, result, length, &ec);
937     if (U_FAILURE(ec)) {
938         log_err("uloc_getDisplayScript length %d returned error %s", length, u_errorName(ec));
939     }
940     free(result);
941 }
942 
943 /* test for uloc_getAvailable()  and uloc_countAvailable()*/
TestGetAvailableLocales(void)944 static void TestGetAvailableLocales(void)
945 {
946 
947     const char *locList;
948     int32_t locCount,i;
949 
950     log_verbose("Testing the no of available locales\n");
951     locCount=uloc_countAvailable();
952     if (locCount == 0)
953         log_data_err("countAvailable() returned an empty list!\n");
954 
955     /* use something sensible w/o hardcoding the count */
956     else if(locCount < 0){
957         log_data_err("countAvailable() returned a wrong value!= %d\n", locCount);
958     }
959     else{
960         log_info("Number of locales returned = %d\n", locCount);
961     }
962     for(i=0;i<locCount;i++){
963         locList=uloc_getAvailable(i);
964 
965         log_verbose(" %s\n", locList);
966     }
967 }
968 
TestGetAvailableLocalesByType(void)969 static void TestGetAvailableLocalesByType(void) {
970     UErrorCode status = U_ZERO_ERROR;
971 
972     UEnumeration* uenum = uloc_openAvailableByType(ULOC_AVAILABLE_DEFAULT, &status);
973     assertSuccess("Constructing the UEnumeration", &status);
974 
975     assertIntEquals("countAvailable() should be same in old and new methods",
976         uloc_countAvailable(),
977         uenum_count(uenum, &status));
978 
979     for (int32_t i = 0; i < uloc_countAvailable(); i++) {
980         const char* old = uloc_getAvailable(i);
981         int32_t len = 0;
982         const char* new = uenum_next(uenum, &len, &status);
983         assertEquals("Old and new strings should equal", old, new);
984         assertIntEquals("String length should be correct", uprv_strlen(old), len);
985     }
986     assertPtrEquals("Should get nullptr on the last string",
987         NULL, uenum_next(uenum, NULL, &status));
988 
989     uenum_close(uenum);
990 
991     uenum = uloc_openAvailableByType(ULOC_AVAILABLE_ONLY_LEGACY_ALIASES, &status);
992     UBool found_he = false;
993     UBool found_iw = false;
994     const char* loc;
995     while ((loc = uenum_next(uenum, NULL, &status))) {
996         if (uprv_strcmp("he", loc) == 0) {
997             found_he = true;
998         }
999         if (uprv_strcmp("iw", loc) == 0) {
1000             found_iw = true;
1001         }
1002     }
1003     assertTrue("Should NOT have found he amongst the legacy/alias locales", !found_he);
1004     assertTrue("Should have found iw amongst the legacy/alias locales", found_iw);
1005     uenum_close(uenum);
1006 
1007     uenum = uloc_openAvailableByType(ULOC_AVAILABLE_WITH_LEGACY_ALIASES, &status);
1008     found_he = false;
1009     found_iw = false;
1010     const UChar* uloc; // test the UChar conversion
1011     int32_t count = 0;
1012     while ((uloc = uenum_unext(uenum, NULL, &status))) {
1013         if (u_strcmp(u"iw", uloc) == 0) {
1014             found_iw = true;
1015         }
1016         if (u_strcmp(u"he", uloc) == 0) {
1017             found_he = true;
1018         }
1019         count++;
1020     }
1021     assertTrue("Should have found he amongst all locales", found_he);
1022     assertTrue("Should have found iw amongst all locales", found_iw);
1023     assertIntEquals("Should return as many strings as claimed",
1024         count, uenum_count(uenum, &status));
1025 
1026     // Reset the enumeration and it should still work
1027     uenum_reset(uenum, &status);
1028     count = 0;
1029     while ((loc = uenum_next(uenum, NULL, &status))) {
1030         count++;
1031     }
1032     assertIntEquals("After reset, should return as many strings as claimed",
1033         count, uenum_count(uenum, &status));
1034 
1035     uenum_close(uenum);
1036 
1037     assertSuccess("No errors should have occurred", &status);
1038 }
1039 
1040 /* test for u_getDataDirectory, u_setDataDirectory, uloc_getISO3Language */
TestDataDirectory(void)1041 static void TestDataDirectory(void)
1042 {
1043 
1044     char            oldDirectory[512];
1045     const char     *temp,*testValue1,*testValue2,*testValue3;
1046     const char path[40] ="d:\\icu\\source\\test\\intltest" U_FILE_SEP_STRING; /*give the required path */
1047 
1048     log_verbose("Testing getDataDirectory()\n");
1049     temp = u_getDataDirectory();
1050     strcpy(oldDirectory, temp);
1051 
1052     testValue1=uloc_getISO3Language("en_US");
1053     log_verbose("first fetch of language retrieved  %s\n", testValue1);
1054 
1055     if (0 != strcmp(testValue1,"eng")){
1056         log_err("Initial check of ISO3 language failed: expected \"eng\", got  %s \n", testValue1);
1057     }
1058 
1059     /*defining the path for DataDirectory */
1060     log_verbose("Testing setDataDirectory\n");
1061     u_setDataDirectory( path );
1062     if(strcmp(path, u_getDataDirectory())==0)
1063         log_verbose("setDataDirectory working fine\n");
1064     else
1065         log_err("Error in setDataDirectory. Directory not set correctly - came back as [%s], expected [%s]\n", u_getDataDirectory(), path);
1066 
1067     testValue2=uloc_getISO3Language("en_US");
1068     log_verbose("second fetch of language retrieved  %s \n", testValue2);
1069 
1070     u_setDataDirectory(oldDirectory);
1071     testValue3=uloc_getISO3Language("en_US");
1072     log_verbose("third fetch of language retrieved  %s \n", testValue3);
1073 
1074     if (0 != strcmp(testValue3,"eng")) {
1075        log_err("get/setDataDirectory() failed: expected \"eng\", got \" %s  \" \n", testValue3);
1076     }
1077 }
1078 
1079 
1080 
1081 /*=========================================================== */
1082 
1083 static UChar _NUL=0;
1084 
doTestDisplayNames(const char * displayLocale,int32_t compareIndex)1085 static void doTestDisplayNames(const char* displayLocale, int32_t compareIndex)
1086 {
1087     UErrorCode status = U_ZERO_ERROR;
1088     int32_t i;
1089     int32_t maxresultsize;
1090 
1091     const char *testLocale;
1092 
1093 
1094     UChar  *testLang  = 0;
1095     UChar  *testScript  = 0;
1096     UChar  *testCtry = 0;
1097     UChar  *testVar = 0;
1098     UChar  *testName = 0;
1099 
1100 
1101     UChar*  expectedLang = 0;
1102     UChar*  expectedScript = 0;
1103     UChar*  expectedCtry = 0;
1104     UChar*  expectedVar = 0;
1105     UChar*  expectedName = 0;
1106 
1107 setUpDataTable();
1108 
1109     for(i=0;i<LOCALE_SIZE; ++i)
1110     {
1111         testLocale=rawData2[NAME][i];
1112 
1113         log_verbose("Testing.....  %s\n", testLocale);
1114 
1115         maxresultsize=0;
1116         maxresultsize=uloc_getDisplayLanguage(testLocale, displayLocale, NULL, maxresultsize, &status);
1117         if(status==U_BUFFER_OVERFLOW_ERROR)
1118         {
1119             status=U_ZERO_ERROR;
1120             testLang=(UChar*)malloc(sizeof(UChar) * (maxresultsize+1));
1121             uloc_getDisplayLanguage(testLocale, displayLocale, testLang, maxresultsize + 1, &status);
1122         }
1123         else
1124         {
1125             testLang=&_NUL;
1126         }
1127         if(U_FAILURE(status)){
1128             log_err("Error in getDisplayLanguage()  %s\n", myErrorName(status));
1129         }
1130 
1131         maxresultsize=0;
1132         maxresultsize=uloc_getDisplayScript(testLocale, displayLocale, NULL, maxresultsize, &status);
1133         if(status==U_BUFFER_OVERFLOW_ERROR)
1134         {
1135             status=U_ZERO_ERROR;
1136             testScript=(UChar*)malloc(sizeof(UChar) * (maxresultsize+1));
1137             uloc_getDisplayScript(testLocale, displayLocale, testScript, maxresultsize + 1, &status);
1138         }
1139         else
1140         {
1141             testScript=&_NUL;
1142         }
1143         if(U_FAILURE(status)){
1144             log_err("Error in getDisplayScript()  %s\n", myErrorName(status));
1145         }
1146 
1147         maxresultsize=0;
1148         maxresultsize=uloc_getDisplayCountry(testLocale, displayLocale, NULL, maxresultsize, &status);
1149         if(status==U_BUFFER_OVERFLOW_ERROR)
1150         {
1151             status=U_ZERO_ERROR;
1152             testCtry=(UChar*)malloc(sizeof(UChar) * (maxresultsize+1));
1153             uloc_getDisplayCountry(testLocale, displayLocale, testCtry, maxresultsize + 1, &status);
1154         }
1155         else
1156         {
1157             testCtry=&_NUL;
1158         }
1159         if(U_FAILURE(status)){
1160             log_err("Error in getDisplayCountry()  %s\n", myErrorName(status));
1161         }
1162 
1163         maxresultsize=0;
1164         maxresultsize=uloc_getDisplayVariant(testLocale, displayLocale, NULL, maxresultsize, &status);
1165         if(status==U_BUFFER_OVERFLOW_ERROR)
1166         {
1167             status=U_ZERO_ERROR;
1168             testVar=(UChar*)malloc(sizeof(UChar) * (maxresultsize+1));
1169             uloc_getDisplayVariant(testLocale, displayLocale, testVar, maxresultsize + 1, &status);
1170         }
1171         else
1172         {
1173             testVar=&_NUL;
1174         }
1175         if(U_FAILURE(status)){
1176                 log_err("Error in getDisplayVariant()  %s\n", myErrorName(status));
1177         }
1178 
1179         maxresultsize=0;
1180         maxresultsize=uloc_getDisplayName(testLocale, displayLocale, NULL, maxresultsize, &status);
1181         if(status==U_BUFFER_OVERFLOW_ERROR)
1182         {
1183             status=U_ZERO_ERROR;
1184             testName=(UChar*)malloc(sizeof(UChar) * (maxresultsize+1));
1185             uloc_getDisplayName(testLocale, displayLocale, testName, maxresultsize + 1, &status);
1186         }
1187         else
1188         {
1189             testName=&_NUL;
1190         }
1191         if(U_FAILURE(status)){
1192             log_err("Error in getDisplayName()  %s\n", myErrorName(status));
1193         }
1194 
1195         expectedLang=dataTable[compareIndex][i];
1196         if(u_strlen(expectedLang)== 0)
1197             expectedLang=dataTable[DLANG_EN][i];
1198 
1199         expectedScript=dataTable[compareIndex + 1][i];
1200         if(u_strlen(expectedScript)== 0)
1201             expectedScript=dataTable[DSCRIPT_EN][i];
1202 
1203         expectedCtry=dataTable[compareIndex + 2][i];
1204         if(u_strlen(expectedCtry)== 0)
1205             expectedCtry=dataTable[DCTRY_EN][i];
1206 
1207         expectedVar=dataTable[compareIndex + 3][i];
1208         if(u_strlen(expectedVar)== 0)
1209             expectedVar=dataTable[DVAR_EN][i];
1210 
1211         expectedName=dataTable[compareIndex + 4][i];
1212         if(u_strlen(expectedName) == 0)
1213             expectedName=dataTable[DNAME_EN][i];
1214 
1215         if (0 !=u_strcmp(testLang,expectedLang))  {
1216             log_data_err(" Display Language mismatch: got %s expected %s displayLocale=%s (Are you missing data?)\n", austrdup(testLang), austrdup(expectedLang), displayLocale);
1217         }
1218 
1219         if (0 != u_strcmp(testScript,expectedScript))   {
1220             log_data_err(" Display Script mismatch: got %s expected %s displayLocale=%s (Are you missing data?)\n", austrdup(testScript), austrdup(expectedScript), displayLocale);
1221         }
1222 
1223         if (0 != u_strcmp(testCtry,expectedCtry))   {
1224             log_data_err(" Display Country mismatch: got %s expected %s displayLocale=%s (Are you missing data?)\n", austrdup(testCtry), austrdup(expectedCtry), displayLocale);
1225         }
1226 
1227         if (0 != u_strcmp(testVar,expectedVar))    {
1228             log_data_err(" Display Variant mismatch: got %s expected %s displayLocale=%s (Are you missing data?)\n", austrdup(testVar), austrdup(expectedVar), displayLocale);
1229         }
1230 
1231         if(0 != u_strcmp(testName, expectedName))    {
1232             log_data_err(" Display Name mismatch: got %s expected %s displayLocale=%s (Are you missing data?)\n", austrdup(testName), austrdup(expectedName), displayLocale);
1233         }
1234 
1235         if(testName!=&_NUL) {
1236             free(testName);
1237         }
1238         if(testLang!=&_NUL) {
1239             free(testLang);
1240         }
1241         if(testScript!=&_NUL) {
1242             free(testScript);
1243         }
1244         if(testCtry!=&_NUL) {
1245             free(testCtry);
1246         }
1247         if(testVar!=&_NUL) {
1248             free(testVar);
1249         }
1250     }
1251 cleanUpDataTable();
1252 }
1253 
1254 /*------------------------------
1255  * TestDisplayNameBrackets
1256  */
1257 
1258 typedef struct {
1259     const char * displayLocale;
1260     const char * namedRegion;
1261     const char * namedLocale;
1262     const char * regionName;
1263     const char * localeName;
1264 } DisplayNameBracketsItem;
1265 
1266 static const DisplayNameBracketsItem displayNameBracketsItems[] = {
1267     { "en", "CC", "en_CC",      "Cocos (Keeling) Islands",  "English (Cocos [Keeling] Islands)"  },
1268     { "en", "MM", "my_MM",      "Myanmar (Burma)",          "Burmese (Myanmar [Burma])"          },
1269     { "en", "MM", "my_Mymr_MM", "Myanmar (Burma)",          "Burmese (Myanmar, Myanmar [Burma])" },
1270     { "zh", "CC", "en_CC",      "\\u79D1\\u79D1\\u65AF\\uFF08\\u57FA\\u6797\\uFF09\\u7FA4\\u5C9B", "\\u82F1\\u8BED\\uFF08\\u79D1\\u79D1\\u65AF\\uFF3B\\u57FA\\u6797\\uFF3D\\u7FA4\\u5C9B\\uFF09" },
1271     { "zh", "CG", "fr_CG",      "\\u521A\\u679C\\uFF08\\u5E03\\uFF09",                             "\\u6CD5\\u8BED\\uFF08\\u521A\\u679C\\uFF3B\\u5E03\\uFF3D\\uFF09" },
1272     { NULL, NULL, NULL,         NULL,                       NULL                                 }
1273 };
1274 
1275 enum { kDisplayNameBracketsMax = 128 };
1276 
TestDisplayNameBrackets(void)1277 static void TestDisplayNameBrackets(void)
1278 {
1279     const DisplayNameBracketsItem * itemPtr = displayNameBracketsItems;
1280     for (; itemPtr->displayLocale != NULL; itemPtr++) {
1281         ULocaleDisplayNames * uldn;
1282         UErrorCode status;
1283         UChar expectRegionName[kDisplayNameBracketsMax];
1284         UChar expectLocaleName[kDisplayNameBracketsMax];
1285         UChar getName[kDisplayNameBracketsMax];
1286         int32_t ulen;
1287 
1288         (void) u_unescape(itemPtr->regionName, expectRegionName, kDisplayNameBracketsMax);
1289         (void) u_unescape(itemPtr->localeName, expectLocaleName, kDisplayNameBracketsMax);
1290 
1291         status = U_ZERO_ERROR;
1292         ulen = uloc_getDisplayCountry(itemPtr->namedLocale, itemPtr->displayLocale, getName, kDisplayNameBracketsMax, &status);
1293         if ( U_FAILURE(status) || u_strcmp(getName, expectRegionName) != 0 ) {
1294             log_data_err("uloc_getDisplayCountry for displayLocale %s and namedLocale %s returns unexpected name or status %s\n", itemPtr->displayLocale, itemPtr->namedLocale, myErrorName(status));
1295         }
1296 
1297         status = U_ZERO_ERROR;
1298         ulen = uloc_getDisplayName(itemPtr->namedLocale, itemPtr->displayLocale, getName, kDisplayNameBracketsMax, &status);
1299         if ( U_FAILURE(status) || u_strcmp(getName, expectLocaleName) != 0 ) {
1300             log_data_err("uloc_getDisplayName for displayLocale %s and namedLocale %s returns unexpected name or status %s\n", itemPtr->displayLocale, itemPtr->namedLocale, myErrorName(status));
1301         }
1302 
1303 #if !UCONFIG_NO_FORMATTING
1304         status = U_ZERO_ERROR;
1305         uldn = uldn_open(itemPtr->displayLocale, ULDN_STANDARD_NAMES, &status);
1306         if (U_SUCCESS(status)) {
1307             status = U_ZERO_ERROR;
1308             ulen = uldn_regionDisplayName(uldn, itemPtr->namedRegion, getName, kDisplayNameBracketsMax, &status);
1309             if ( U_FAILURE(status) || u_strcmp(getName, expectRegionName) != 0 ) {
1310                 log_data_err("uldn_regionDisplayName for displayLocale %s and namedRegion %s returns unexpected name or status %s\n", itemPtr->displayLocale, itemPtr->namedRegion, myErrorName(status));
1311             }
1312 
1313             status = U_ZERO_ERROR;
1314             ulen = uldn_localeDisplayName(uldn, itemPtr->namedLocale, getName, kDisplayNameBracketsMax, &status);
1315             if ( U_FAILURE(status) || u_strcmp(getName, expectLocaleName) != 0 ) {
1316                 log_data_err("uldn_localeDisplayName for displayLocale %s and namedLocale %s returns unexpected name or status %s\n", itemPtr->displayLocale, itemPtr->namedLocale, myErrorName(status));
1317             }
1318 
1319             uldn_close(uldn);
1320         } else {
1321             log_data_err("uldn_open fails for displayLocale %s, status=%s\n", itemPtr->displayLocale, u_errorName(status));
1322         }
1323 #endif
1324     (void)ulen;   /* Suppress variable not used warning */
1325     }
1326 }
1327 
1328 /*------------------------------
1329  * TestIllegalArgumentWhenNoDataWithNoSubstitute
1330  */
1331 
TestIllegalArgumentWhenNoDataWithNoSubstitute(void)1332 static void TestIllegalArgumentWhenNoDataWithNoSubstitute(void)
1333 {
1334 #if !UCONFIG_NO_FORMATTING
1335     UErrorCode status = U_ZERO_ERROR;
1336     UChar getName[kDisplayNameBracketsMax];
1337     UDisplayContext contexts[] = {
1338         UDISPCTX_NO_SUBSTITUTE,
1339     };
1340     ULocaleDisplayNames* ldn = uldn_openForContext("en", contexts, 1, &status);
1341 
1342     uldn_localeDisplayName(ldn, "efg", getName, kDisplayNameBracketsMax, &status);
1343     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1344         log_err("FAIL uldn_localeDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1345                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1346     }
1347 
1348     status = U_ZERO_ERROR;
1349     uldn_languageDisplayName(ldn, "zz", getName, kDisplayNameBracketsMax, &status);
1350     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1351         log_err("FAIL uldn_languageDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1352                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1353     }
1354 
1355     status = U_ZERO_ERROR;
1356     uldn_scriptDisplayName(ldn, "Aaaa", getName, kDisplayNameBracketsMax, &status);
1357     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1358         log_err("FAIL uldn_scriptDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1359                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1360     }
1361 
1362     status = U_ZERO_ERROR;
1363     uldn_regionDisplayName(ldn, "KK", getName, kDisplayNameBracketsMax, &status);
1364     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1365         log_err("FAIL uldn_regionDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1366                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1367     }
1368 
1369     status = U_ZERO_ERROR;
1370     uldn_variantDisplayName(ldn, "ZZ", getName, kDisplayNameBracketsMax, &status);
1371     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1372         log_err("FAIL uldn_variantDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1373                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1374     }
1375 
1376     status = U_ZERO_ERROR;
1377     uldn_keyDisplayName(ldn, "zz", getName, kDisplayNameBracketsMax, &status);
1378     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1379         log_err("FAIL uldn_keyDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1380                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1381     }
1382 
1383     status = U_ZERO_ERROR;
1384     uldn_keyValueDisplayName(ldn, "ca", "zz", getName, kDisplayNameBracketsMax, &status);
1385     if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1386         log_err("FAIL uldn_keyValueDisplayName should return U_ILLEGAL_ARGUMENT_ERROR "
1387                 "while no resource under UDISPCTX_NO_SUBSTITUTE");
1388     }
1389 
1390     uldn_close(ldn);
1391 #endif
1392 }
1393 
1394 /*------------------------------
1395  * TestISOFunctions
1396  */
1397 
1398 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
1399 /* test for uloc_getISOLanguages, uloc_getISOCountries */
TestISOFunctions(void)1400 static void TestISOFunctions(void)
1401 {
1402     // Android-changed: Skip this test on Android because we allow extra languages added on devices.
1403     if (true) {
1404         return;
1405     }
1406 
1407     const char* const* str=uloc_getISOLanguages();
1408     const char* const* str1=uloc_getISOCountries();
1409     const char* test;
1410     const char *key = NULL;
1411     int32_t count = 0, skipped = 0;
1412     int32_t expect;
1413     UResourceBundle *res;
1414     UResourceBundle *subRes;
1415     UErrorCode status = U_ZERO_ERROR;
1416 
1417     /*  test getISOLanguages*/
1418     /*str=uloc_getISOLanguages(); */
1419     log_verbose("Testing ISO Languages: \n");
1420 
1421     /* use structLocale - this data is no longer in root */
1422     res = ures_openDirect(loadTestData(&status), "structLocale", &status);
1423     subRes = ures_getByKey(res, "Languages", NULL, &status);
1424     if (U_FAILURE(status)) {
1425         log_data_err("There is an error in structLocale's ures_getByKey(\"Languages\"), status=%s\n", u_errorName(status));
1426         return;
1427     }
1428 
1429     expect = ures_getSize(subRes);
1430     for(count = 0; *(str+count) != 0; count++)
1431     {
1432         key = NULL;
1433         test = *(str+count);
1434         status = U_ZERO_ERROR;
1435 
1436         do {
1437             /* Skip over language tags. This API only returns language codes. */
1438             skipped += (key != NULL);
1439             ures_getNextString(subRes, NULL, &key, &status);
1440         }
1441         while (key != NULL && strchr(key, '_'));
1442 
1443         if(key == NULL)
1444             break;
1445         /* TODO: Consider removing sh, which is deprecated */
1446         if(strcmp(key,"root") == 0 || strcmp(key,"Fallback") == 0 || strcmp(key,"sh") == 0) {
1447             ures_getNextString(subRes, NULL, &key, &status);
1448             skipped++;
1449         }
1450 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
1451         /* This code only works on ASCII machines where the keys are stored in ASCII order */
1452         if(strcmp(test,key)) {
1453             /* The first difference usually implies the place where things get out of sync */
1454             log_err("FAIL Language diff at offset %d, \"%s\" != \"%s\"\n", count, test, key);
1455         }
1456 #endif
1457 
1458         if(!strcmp(test,"in"))
1459             log_err("FAIL getISOLanguages() has obsolete language code %s\n", test);
1460         if(!strcmp(test,"iw"))
1461             log_err("FAIL getISOLanguages() has obsolete language code %s\n", test);
1462         if(!strcmp(test,"ji"))
1463             log_err("FAIL getISOLanguages() has obsolete language code %s\n", test);
1464         if(!strcmp(test,"jw"))
1465             log_err("FAIL getISOLanguages() has obsolete language code %s\n", test);
1466         if(!strcmp(test,"sh"))
1467             log_err("FAIL getISOLanguages() has obsolete language code %s\n", test);
1468     }
1469 
1470     expect -= skipped; /* Ignore the skipped resources from structLocale */
1471 
1472     if(count!=expect) {
1473         log_err("There is an error in getISOLanguages, got %d, expected %d (as per structLocale)\n", count, expect);
1474     }
1475 
1476     subRes = ures_getByKey(res, "Countries", subRes, &status);
1477     log_verbose("Testing ISO Countries");
1478     skipped = 0;
1479     expect = ures_getSize(subRes) - 1; /* Skip ZZ */
1480     for(count = 0; *(str1+count) != 0; count++)
1481     {
1482         key = NULL;
1483         test = *(str1+count);
1484         do {
1485             /* Skip over numeric UN tags. This API only returns ISO-3166 codes. */
1486             skipped += (key != NULL);
1487             ures_getNextString(subRes, NULL, &key, &status);
1488         }
1489         while (key != NULL && strlen(key) != 2);
1490 
1491         if(key == NULL)
1492             break;
1493         /* TODO: Consider removing CS, which is deprecated */
1494         while(strcmp(key,"QO") == 0 || strcmp(key,"QU") == 0 || strcmp(key,"CS") == 0) {
1495             ures_getNextString(subRes, NULL, &key, &status);
1496             skipped++;
1497         }
1498 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
1499         /* This code only works on ASCII machines where the keys are stored in ASCII order */
1500         if(strcmp(test,key)) {
1501             /* The first difference usually implies the place where things get out of sync */
1502             log_err("FAIL Country diff at offset %d, \"%s\" != \"%s\"\n", count, test, key);
1503         }
1504 #endif
1505         if(!strcmp(test,"FX"))
1506             log_err("FAIL getISOCountries() has obsolete country code %s\n", test);
1507         if(!strcmp(test,"YU"))
1508             log_err("FAIL getISOCountries() has obsolete country code %s\n", test);
1509         if(!strcmp(test,"ZR"))
1510             log_err("FAIL getISOCountries() has obsolete country code %s\n", test);
1511     }
1512 
1513     ures_getNextString(subRes, NULL, &key, &status);
1514     if (strcmp(key, "ZZ") != 0) {
1515         log_err("ZZ was expected to be the last entry in structLocale, but got %s\n", key);
1516     }
1517 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
1518     /* On EBCDIC machines, the numbers are sorted last. Account for those in the skipped value too. */
1519     key = NULL;
1520     do {
1521         /* Skip over numeric UN tags. uloc_getISOCountries only returns ISO-3166 codes. */
1522         skipped += (key != NULL);
1523         ures_getNextString(subRes, NULL, &key, &status);
1524     }
1525     while (U_SUCCESS(status) && key != NULL && strlen(key) != 2);
1526 #endif
1527     expect -= skipped; /* Ignore the skipped resources from structLocale */
1528     if(count!=expect)
1529     {
1530         log_err("There is an error in getISOCountries, got %d, expected %d \n", count, expect);
1531     }
1532     ures_close(subRes);
1533     ures_close(res);
1534 }
1535 #endif
1536 
setUpDataTable(void)1537 static void setUpDataTable(void)
1538 {
1539     int32_t i,j;
1540     dataTable = (UChar***)(calloc(sizeof(UChar**),LOCALE_INFO_SIZE));
1541 
1542     for (i = 0; i < LOCALE_INFO_SIZE; i++) {
1543         dataTable[i] = (UChar**)(calloc(sizeof(UChar*),LOCALE_SIZE));
1544         for (j = 0; j < LOCALE_SIZE; j++){
1545             dataTable[i][j] = CharsToUChars(rawData2[i][j]);
1546         }
1547     }
1548 }
1549 
cleanUpDataTable(void)1550 static void cleanUpDataTable(void)
1551 {
1552     int32_t i,j;
1553     if(dataTable != NULL) {
1554         for (i=0; i<LOCALE_INFO_SIZE; i++) {
1555             for(j = 0; j < LOCALE_SIZE; j++) {
1556                 free(dataTable[i][j]);
1557             }
1558             free(dataTable[i]);
1559         }
1560         free(dataTable);
1561     }
1562     dataTable = NULL;
1563 }
1564 
1565 /**
1566  * @bug 4011756 4011380
1567  */
TestISO3Fallback(void)1568 static void TestISO3Fallback(void)
1569 {
1570     const char* test="xx_YY";
1571 
1572     const char * result;
1573 
1574     result = uloc_getISO3Language(test);
1575 
1576     /* Conform to C API usage  */
1577 
1578     if (!result || (result[0] != 0))
1579        log_err("getISO3Language() on xx_YY returned %s instead of \"\"");
1580 
1581     result = uloc_getISO3Country(test);
1582 
1583     if (!result || (result[0] != 0))
1584         log_err("getISO3Country() on xx_YY returned %s instead of \"\"");
1585 }
1586 
1587 /**
1588  * @bug 4118587
1589  */
TestSimpleDisplayNames(void)1590 static void TestSimpleDisplayNames(void)
1591 {
1592   /*
1593      This test is different from TestDisplayNames because TestDisplayNames checks
1594      fallback behavior, combination of language and country names to form locale
1595      names, and other stuff like that.  This test just checks specific language
1596      and country codes to make sure we have the correct names for them.
1597   */
1598     char languageCodes[] [4] = { "he", "id", "iu", "ug", "yi", "za", "419" };
1599     const char* languageNames [] = { "Hebrew", "Indonesian", "Inuktitut", "Uyghur", "Yiddish",
1600                                "Zhuang", "419" };
1601     const char* inLocale [] = { "en_US", "zh_Hant"};
1602     UErrorCode status=U_ZERO_ERROR;
1603 
1604     int32_t i;
1605     int32_t localeIndex = 0;
1606     for (i = 0; i < 7; i++) {
1607         UChar *testLang=0;
1608         UChar *expectedLang=0;
1609         int size=0;
1610 
1611         if (i == 6) {
1612             localeIndex = 1; /* Use the second locale for the rest of the test. */
1613         }
1614 
1615         size=uloc_getDisplayLanguage(languageCodes[i], inLocale[localeIndex], NULL, size, &status);
1616         if(status==U_BUFFER_OVERFLOW_ERROR) {
1617             status=U_ZERO_ERROR;
1618             testLang=(UChar*)malloc(sizeof(UChar) * (size + 1));
1619             uloc_getDisplayLanguage(languageCodes[i], inLocale[localeIndex], testLang, size + 1, &status);
1620         }
1621         expectedLang=(UChar*)malloc(sizeof(UChar) * (strlen(languageNames[i])+1));
1622         u_uastrcpy(expectedLang, languageNames[i]);
1623         if (u_strcmp(testLang, expectedLang) != 0)
1624             log_data_err("Got wrong display name for %s : Expected \"%s\", got \"%s\".\n",
1625                     languageCodes[i], languageNames[i], austrdup(testLang));
1626         free(testLang);
1627         free(expectedLang);
1628     }
1629 
1630 }
1631 
1632 /**
1633  * @bug 4118595
1634  */
TestUninstalledISO3Names(void)1635 static void TestUninstalledISO3Names(void)
1636 {
1637   /* This test checks to make sure getISO3Language and getISO3Country work right
1638      even for locales that are not installed (and some installed ones). */
1639     static const char iso2Languages [][4] = {     "am", "ba", "fy", "mr", "rn",
1640                                         "ss", "tw", "zu", "sr" };
1641     static const char iso3Languages [][5] = {     "amh", "bak", "fry", "mar", "run",
1642                                         "ssw", "twi", "zul", "srp" };
1643     static const char iso2Countries [][6] = {     "am_AF", "ba_BW", "fy_KZ", "mr_MO", "rn_MN",
1644                                         "ss_SB", "tw_TC", "zu_ZW", "sr_XK" };
1645     static const char iso3Countries [][4] = {     "AFG", "BWA", "KAZ", "MAC", "MNG",
1646                                         "SLB", "TCA", "ZWE", "XKK" };
1647     int32_t i;
1648 
1649     for (i = 0; i < 9; i++) {
1650       UErrorCode err = U_ZERO_ERROR;
1651       const char *test;
1652       test = uloc_getISO3Language(iso2Languages[i]);
1653       if(strcmp(test, iso3Languages[i]) !=0 || U_FAILURE(err))
1654          log_err("Got wrong ISO3 code for %s : Expected \"%s\", got \"%s\". %s\n",
1655                      iso2Languages[i], iso3Languages[i], test, myErrorName(err));
1656     }
1657     for (i = 0; i < 9; i++) {
1658       UErrorCode err = U_ZERO_ERROR;
1659       const char *test;
1660       test = uloc_getISO3Country(iso2Countries[i]);
1661       if(strcmp(test, iso3Countries[i]) !=0 || U_FAILURE(err))
1662          log_err("Got wrong ISO3 code for %s : Expected \"%s\", got \"%s\". %s\n",
1663                      iso2Countries[i], iso3Countries[i], test, myErrorName(err));
1664     }
1665 }
1666 
1667 
TestVariantParsing(void)1668 static void TestVariantParsing(void)
1669 {
1670     static const char* en_US_custom="en_US_De Anza_Cupertino_California_United States_Earth";
1671     static const char* dispName="English (United States, DE ANZA_CUPERTINO_CALIFORNIA_UNITED STATES_EARTH)";
1672     static const char* dispVar="DE ANZA_CUPERTINO_CALIFORNIA_UNITED STATES_EARTH";
1673     static const char* shortVariant="fr_FR_foo";
1674     static const char* bogusVariant="fr_FR__foo";
1675     static const char* bogusVariant2="fr_FR_foo_";
1676     static const char* bogusVariant3="fr_FR__foo_";
1677 
1678 
1679     UChar displayVar[100];
1680     UChar displayName[100];
1681     UErrorCode status=U_ZERO_ERROR;
1682     UChar* got=0;
1683     int32_t size=0;
1684     size=uloc_getDisplayVariant(en_US_custom, "en_US", NULL, size, &status);
1685     if(status==U_BUFFER_OVERFLOW_ERROR) {
1686         status=U_ZERO_ERROR;
1687         got=(UChar*)realloc(got, sizeof(UChar) * (size+1));
1688         uloc_getDisplayVariant(en_US_custom, "en_US", got, size + 1, &status);
1689     }
1690     else {
1691         log_err("FAIL: Didn't get U_BUFFER_OVERFLOW_ERROR\n");
1692     }
1693     u_uastrcpy(displayVar, dispVar);
1694     if(u_strcmp(got,displayVar)!=0) {
1695         log_err("FAIL: getDisplayVariant() Wanted %s, got %s\n", dispVar, austrdup(got));
1696     }
1697     size=0;
1698     size=uloc_getDisplayName(en_US_custom, "en_US", NULL, size, &status);
1699     if(status==U_BUFFER_OVERFLOW_ERROR) {
1700         status=U_ZERO_ERROR;
1701         got=(UChar*)realloc(got, sizeof(UChar) * (size+1));
1702         uloc_getDisplayName(en_US_custom, "en_US", got, size + 1, &status);
1703     }
1704     else {
1705         log_err("FAIL: Didn't get U_BUFFER_OVERFLOW_ERROR\n");
1706     }
1707     u_uastrcpy(displayName, dispName);
1708     if(u_strcmp(got,displayName)!=0) {
1709         if (status == U_USING_DEFAULT_WARNING) {
1710             log_data_err("FAIL: getDisplayName() got %s. Perhaps you are missing data?\n", u_errorName(status));
1711         } else {
1712             log_err("FAIL: getDisplayName() Wanted %s, got %s\n", dispName, austrdup(got));
1713         }
1714     }
1715 
1716     size=0;
1717     status=U_ZERO_ERROR;
1718     size=uloc_getDisplayVariant(shortVariant, NULL, NULL, size, &status);
1719     if(status==U_BUFFER_OVERFLOW_ERROR) {
1720         status=U_ZERO_ERROR;
1721         got=(UChar*)realloc(got, sizeof(UChar) * (size+1));
1722         uloc_getDisplayVariant(shortVariant, NULL, got, size + 1, &status);
1723     }
1724     else {
1725         log_err("FAIL: Didn't get U_BUFFER_OVERFLOW_ERROR\n");
1726     }
1727     if(strcmp(austrdup(got),"FOO")!=0) {
1728         log_err("FAIL: getDisplayVariant()  Wanted: foo  Got: %s\n", austrdup(got));
1729     }
1730     size=0;
1731     status=U_ZERO_ERROR;
1732     size=uloc_getDisplayVariant(bogusVariant, NULL, NULL, size, &status);
1733     if(status==U_BUFFER_OVERFLOW_ERROR) {
1734         status=U_ZERO_ERROR;
1735         got=(UChar*)realloc(got, sizeof(UChar) * (size+1));
1736         uloc_getDisplayVariant(bogusVariant, NULL, got, size + 1, &status);
1737     }
1738     else {
1739         log_err("FAIL: Didn't get U_BUFFER_OVERFLOW_ERROR\n");
1740     }
1741     if(strcmp(austrdup(got),"_FOO")!=0) {
1742         log_err("FAIL: getDisplayVariant()  Wanted: _FOO  Got: %s\n", austrdup(got));
1743     }
1744     size=0;
1745     status=U_ZERO_ERROR;
1746     size=uloc_getDisplayVariant(bogusVariant2, NULL, NULL, size, &status);
1747     if(status==U_BUFFER_OVERFLOW_ERROR) {
1748         status=U_ZERO_ERROR;
1749         got=(UChar*)realloc(got, sizeof(UChar) * (size+1));
1750         uloc_getDisplayVariant(bogusVariant2, NULL, got, size + 1, &status);
1751     }
1752     else {
1753         log_err("FAIL: Didn't get U_BUFFER_OVERFLOW_ERROR\n");
1754     }
1755     if(strcmp(austrdup(got),"FOO_")!=0) {
1756         log_err("FAIL: getDisplayVariant()  Wanted: FOO_  Got: %s\n", austrdup(got));
1757     }
1758     size=0;
1759     status=U_ZERO_ERROR;
1760     size=uloc_getDisplayVariant(bogusVariant3, NULL, NULL, size, &status);
1761     if(status==U_BUFFER_OVERFLOW_ERROR) {
1762         status=U_ZERO_ERROR;
1763         got=(UChar*)realloc(got, sizeof(UChar) * (size+1));
1764         uloc_getDisplayVariant(bogusVariant3, NULL, got, size + 1, &status);
1765     }
1766     else {
1767         log_err("FAIL: Didn't get U_BUFFER_OVERFLOW_ERROR\n");
1768     }
1769     if(strcmp(austrdup(got),"_FOO_")!=0) {
1770         log_err("FAIL: getDisplayVariant()  Wanted: _FOO_  Got: %s\n", austrdup(got));
1771     }
1772     free(got);
1773 }
1774 
1775 
TestObsoleteNames(void)1776 static void TestObsoleteNames(void)
1777 {
1778     int32_t i;
1779     UErrorCode status = U_ZERO_ERROR;
1780     char buff[256];
1781 
1782     static const struct
1783     {
1784         char locale[9];
1785         char lang3[4];
1786         char lang[4];
1787         char ctry3[4];
1788         char ctry[4];
1789     } tests[] =
1790     {
1791         { "eng_USA", "eng", "en", "USA", "US" },
1792         { "kok",  "kok", "kok", "", "" },
1793         { "in",  "ind", "in", "", "" },
1794         { "id",  "ind", "id", "", "" }, /* NO aliasing */
1795         { "sh",  "srp", "sh", "", "" },
1796         { "zz_CS",  "", "zz", "SCG", "CS" },
1797         { "zz_FX",  "", "zz", "FXX", "FX" },
1798         { "zz_RO",  "", "zz", "ROU", "RO" },
1799         { "zz_TP",  "", "zz", "TMP", "TP" },
1800         { "zz_TL",  "", "zz", "TLS", "TL" },
1801         { "zz_ZR",  "", "zz", "ZAR", "ZR" },
1802         { "zz_FXX",  "", "zz", "FXX", "FX" }, /* no aliasing. Doesn't go to PS(PSE). */
1803         { "zz_ROM",  "", "zz", "ROU", "RO" },
1804         { "zz_ROU",  "", "zz", "ROU", "RO" },
1805         { "zz_ZAR",  "", "zz", "ZAR", "ZR" },
1806         { "zz_TMP",  "", "zz", "TMP", "TP" },
1807         { "zz_TLS",  "", "zz", "TLS", "TL" },
1808         { "zz_YUG",  "", "zz", "YUG", "YU" },
1809         { "mlt_PSE", "mlt", "mt", "PSE", "PS" },
1810         { "iw", "heb", "iw", "", "" },
1811         { "ji", "yid", "ji", "", "" },
1812         { "jw", "jaw", "jw", "", "" },
1813         { "sh", "srp", "sh", "", "" },
1814         { "", "", "", "", "" }
1815     };
1816 
1817     for(i=0;tests[i].locale[0];i++)
1818     {
1819         const char *locale;
1820 
1821         locale = tests[i].locale;
1822         log_verbose("** %s:\n", locale);
1823 
1824         status = U_ZERO_ERROR;
1825         if(strcmp(tests[i].lang3,uloc_getISO3Language(locale)))
1826         {
1827             log_err("FAIL: uloc_getISO3Language(%s)==\t\"%s\",\t expected \"%s\"\n",
1828                 locale,  uloc_getISO3Language(locale), tests[i].lang3);
1829         }
1830         else
1831         {
1832             log_verbose("   uloc_getISO3Language()==\t\"%s\"\n",
1833                 uloc_getISO3Language(locale) );
1834         }
1835 
1836         status = U_ZERO_ERROR;
1837         uloc_getLanguage(locale, buff, 256, &status);
1838         if(U_FAILURE(status))
1839         {
1840             log_err("FAIL: error getting language from %s\n", locale);
1841         }
1842         else
1843         {
1844             if(strcmp(buff,tests[i].lang))
1845             {
1846                 log_err("FAIL: uloc_getLanguage(%s)==\t\"%s\"\t expected \"%s\"\n",
1847                     locale, buff, tests[i].lang);
1848             }
1849             else
1850             {
1851                 log_verbose("  uloc_getLanguage(%s)==\t%s\n", locale, buff);
1852             }
1853         }
1854         if(strcmp(tests[i].lang3,uloc_getISO3Language(locale)))
1855         {
1856             log_err("FAIL: uloc_getISO3Language(%s)==\t\"%s\",\t expected \"%s\"\n",
1857                 locale,  uloc_getISO3Language(locale), tests[i].lang3);
1858         }
1859         else
1860         {
1861             log_verbose("   uloc_getISO3Language()==\t\"%s\"\n",
1862                 uloc_getISO3Language(locale) );
1863         }
1864 
1865         if(strcmp(tests[i].ctry3,uloc_getISO3Country(locale)))
1866         {
1867             log_err("FAIL: uloc_getISO3Country(%s)==\t\"%s\",\t expected \"%s\"\n",
1868                 locale,  uloc_getISO3Country(locale), tests[i].ctry3);
1869         }
1870         else
1871         {
1872             log_verbose("   uloc_getISO3Country()==\t\"%s\"\n",
1873                 uloc_getISO3Country(locale) );
1874         }
1875 
1876         status = U_ZERO_ERROR;
1877         uloc_getCountry(locale, buff, 256, &status);
1878         if(U_FAILURE(status))
1879         {
1880             log_err("FAIL: error getting country from %s\n", locale);
1881         }
1882         else
1883         {
1884             if(strcmp(buff,tests[i].ctry))
1885             {
1886                 log_err("FAIL: uloc_getCountry(%s)==\t\"%s\"\t expected \"%s\"\n",
1887                     locale, buff, tests[i].ctry);
1888             }
1889             else
1890             {
1891                 log_verbose("  uloc_getCountry(%s)==\t%s\n", locale, buff);
1892             }
1893         }
1894     }
1895 
1896     if (uloc_getLCID("iw_IL") != uloc_getLCID("he_IL")) {
1897         log_err("he,iw LCID mismatch: %X versus %X\n", uloc_getLCID("iw_IL"), uloc_getLCID("he_IL"));
1898     }
1899 
1900     if (uloc_getLCID("iw") != uloc_getLCID("he")) {
1901         log_err("he,iw LCID mismatch: %X versus %X\n", uloc_getLCID("iw"), uloc_getLCID("he"));
1902     }
1903 
1904 #if 0
1905 
1906     i = uloc_getLanguage("kok",NULL,0,&icu_err);
1907     if(U_FAILURE(icu_err))
1908     {
1909         log_err("FAIL: Got %s trying to do uloc_getLanguage(kok)\n", u_errorName(icu_err));
1910     }
1911 
1912     icu_err = U_ZERO_ERROR;
1913     uloc_getLanguage("kok",r1_buff,12,&icu_err);
1914     if(U_FAILURE(icu_err))
1915     {
1916         log_err("FAIL: Got %s trying to do uloc_getLanguage(kok, buff)\n", u_errorName(icu_err));
1917     }
1918 
1919     r1_addr = (char *)uloc_getISO3Language("kok");
1920 
1921     icu_err = U_ZERO_ERROR;
1922     if (strcmp(r1_buff,"kok") != 0)
1923     {
1924         log_err("FAIL: uloc_getLanguage(kok)==%s not kok\n",r1_buff);
1925         line--;
1926     }
1927     r1_addr = (char *)uloc_getISO3Language("in");
1928     i = uloc_getLanguage(r1_addr,r1_buff,12,&icu_err);
1929     if (strcmp(r1_buff,"id") != 0)
1930     {
1931         printf("uloc_getLanguage error (%s)\n",r1_buff);
1932         line--;
1933     }
1934     r1_addr = (char *)uloc_getISO3Language("sh");
1935     i = uloc_getLanguage(r1_addr,r1_buff,12,&icu_err);
1936     if (strcmp(r1_buff,"sr") != 0)
1937     {
1938         printf("uloc_getLanguage error (%s)\n",r1_buff);
1939         line--;
1940     }
1941 
1942     r1_addr = (char *)uloc_getISO3Country("zz_ZR");
1943     strcpy(p1_buff,"zz_");
1944     strcat(p1_buff,r1_addr);
1945     i = uloc_getCountry(p1_buff,r1_buff,12,&icu_err);
1946     if (strcmp(r1_buff,"ZR") != 0)
1947     {
1948         printf("uloc_getCountry error (%s)\n",r1_buff);
1949         line--;
1950     }
1951     r1_addr = (char *)uloc_getISO3Country("zz_FX");
1952     strcpy(p1_buff,"zz_");
1953     strcat(p1_buff,r1_addr);
1954     i = uloc_getCountry(p1_buff,r1_buff,12,&icu_err);
1955     if (strcmp(r1_buff,"FX") != 0)
1956     {
1957         printf("uloc_getCountry error (%s)\n",r1_buff);
1958         line--;
1959     }
1960 
1961 #endif
1962 
1963 }
1964 
TestKeywordVariants(void)1965 static void TestKeywordVariants(void)
1966 {
1967     static const struct {
1968         const char *localeID;
1969         const char *expectedLocaleID;           /* uloc_getName */
1970         const char *expectedLocaleIDNoKeywords; /* uloc_getBaseName */
1971         const char *expectedCanonicalID;        /* uloc_canonicalize */
1972         const char *expectedKeywords[10];
1973         int32_t numKeywords;
1974         UErrorCode expectedStatus; /* from uloc_openKeywords */
1975     } testCases[] = {
1976         {
1977             "de_DE@  currency = euro; C o ll A t i o n   = Phonebook   ; C alen dar = buddhist   ",
1978             "de_DE@calendar=buddhist;collation=Phonebook;currency=euro",
1979             "de_DE",
1980             "de_DE@calendar=buddhist;collation=Phonebook;currency=euro",
1981             {"calendar", "collation", "currency"},
1982             3,
1983             U_ZERO_ERROR
1984         },
1985         {
1986             "de_DE@euro",
1987             "de_DE@euro",
1988             "de_DE@euro",   /* we probably should strip off the POSIX style variant @euro see #11690 */
1989             "de_DE_EURO",
1990             {"","","","","","",""},
1991             0,
1992             U_INVALID_FORMAT_ERROR /* must have '=' after '@' */
1993         },
1994         {
1995             "de_DE@euro;collation=phonebook",   /* The POSIX style variant @euro cannot be combined with key=value? */
1996             "de_DE", /* getName returns de_DE - should be INVALID_FORMAT_ERROR? */
1997             "de_DE", /* getBaseName returns de_DE - should be INVALID_FORMAT_ERROR? see #11690 */
1998             "de_DE", /* canonicalize returns de_DE - should be INVALID_FORMAT_ERROR? */
1999             {"","","","","","",""},
2000             0,
2001             U_INVALID_FORMAT_ERROR
2002         },
2003         {
2004             "de_DE@collation=",
2005             0, /* expected getName to fail */
2006             "de_DE", /* getBaseName returns de_DE - should be INVALID_FORMAT_ERROR? see #11690 */
2007             0, /* expected canonicalize to fail */
2008             {"","","","","","",""},
2009             0,
2010             U_INVALID_FORMAT_ERROR /* must have '=' after '@' */
2011         }
2012     };
2013     UErrorCode status = U_ZERO_ERROR;
2014 
2015     int32_t i = 0, j = 0;
2016     int32_t resultLen = 0;
2017     char buffer[256];
2018     UEnumeration *keywords;
2019     int32_t keyCount = 0;
2020     const char *keyword = NULL;
2021     int32_t keywordLen = 0;
2022 
2023     for(i = 0; i < UPRV_LENGTHOF(testCases); i++) {
2024         status = U_ZERO_ERROR;
2025         *buffer = 0;
2026         keywords = uloc_openKeywords(testCases[i].localeID, &status);
2027 
2028         if(status != testCases[i].expectedStatus) {
2029             log_err("Expected to uloc_openKeywords(\"%s\") => status %s. Got %s instead\n",
2030                     testCases[i].localeID,
2031                     u_errorName(testCases[i].expectedStatus), u_errorName(status));
2032         }
2033         status = U_ZERO_ERROR;
2034         if(keywords) {
2035             if((keyCount = uenum_count(keywords, &status)) != testCases[i].numKeywords) {
2036                 log_err("Expected to get %i keywords, got %i\n", testCases[i].numKeywords, keyCount);
2037             }
2038             if(keyCount) {
2039                 j = 0;
2040                 while((keyword = uenum_next(keywords, &keywordLen, &status))) {
2041                     if(strcmp(keyword, testCases[i].expectedKeywords[j]) != 0) {
2042                         log_err("Expected to get keyword value %s, got %s\n", testCases[i].expectedKeywords[j], keyword);
2043                     }
2044                     j++;
2045                 }
2046                 j = 0;
2047                 uenum_reset(keywords, &status);
2048                 while((keyword = uenum_next(keywords, &keywordLen, &status))) {
2049                     if(strcmp(keyword, testCases[i].expectedKeywords[j]) != 0) {
2050                         log_err("Expected to get keyword value %s, got %s\n", testCases[i].expectedKeywords[j], keyword);
2051                     }
2052                     j++;
2053                 }
2054             }
2055             uenum_close(keywords);
2056         }
2057 
2058         status = U_ZERO_ERROR;
2059         resultLen = uloc_getName(testCases[i].localeID, buffer, 256, &status);
2060         (void)resultLen;
2061         U_ASSERT(resultLen < 256);
2062         if (U_SUCCESS(status)) {
2063             if (testCases[i].expectedLocaleID == 0) {
2064                 log_err("Expected uloc_getName(\"%s\") to fail; got \"%s\"\n",
2065                         testCases[i].localeID, buffer);
2066             } else if (uprv_strcmp(testCases[i].expectedLocaleID, buffer) != 0) {
2067                 log_err("Expected uloc_getName(\"%s\") => \"%s\"; got \"%s\"\n",
2068                         testCases[i].localeID, testCases[i].expectedLocaleID, buffer);
2069             }
2070         } else {
2071             if (testCases[i].expectedLocaleID != 0) {
2072                 log_err("Expected uloc_getName(\"%s\") => \"%s\"; but returned error: %s\n",
2073                         testCases[i].localeID, testCases[i].expectedLocaleID, buffer, u_errorName(status));
2074             }
2075         }
2076 
2077         status = U_ZERO_ERROR;
2078         resultLen = uloc_getBaseName(testCases[i].localeID, buffer, 256, &status);
2079         U_ASSERT(resultLen < 256);
2080         if (U_SUCCESS(status)) {
2081             if (testCases[i].expectedLocaleIDNoKeywords == 0) {
2082                 log_err("Expected uloc_getBaseName(\"%s\") to fail; got \"%s\"\n",
2083                         testCases[i].localeID, buffer);
2084             } else if (uprv_strcmp(testCases[i].expectedLocaleIDNoKeywords, buffer) != 0) {
2085                 log_err("Expected uloc_getBaseName(\"%s\") => \"%s\"; got \"%s\"\n",
2086                         testCases[i].localeID, testCases[i].expectedLocaleIDNoKeywords, buffer);
2087             }
2088         } else {
2089             if (testCases[i].expectedLocaleIDNoKeywords != 0) {
2090                 log_err("Expected uloc_getBaseName(\"%s\") => \"%s\"; but returned error: %s\n",
2091                         testCases[i].localeID, testCases[i].expectedLocaleIDNoKeywords, buffer, u_errorName(status));
2092             }
2093         }
2094 
2095         status = U_ZERO_ERROR;
2096         resultLen = uloc_canonicalize(testCases[i].localeID, buffer, 256, &status);
2097         U_ASSERT(resultLen < 256);
2098         if (U_SUCCESS(status)) {
2099             if (testCases[i].expectedCanonicalID == 0) {
2100                 log_err("Expected uloc_canonicalize(\"%s\") to fail; got \"%s\"\n",
2101                         testCases[i].localeID, buffer);
2102             } else if (uprv_strcmp(testCases[i].expectedCanonicalID, buffer) != 0) {
2103                 log_err("Expected uloc_canonicalize(\"%s\") => \"%s\"; got \"%s\"\n",
2104                         testCases[i].localeID, testCases[i].expectedCanonicalID, buffer);
2105             }
2106         } else {
2107             if (testCases[i].expectedCanonicalID != 0) {
2108                 log_err("Expected uloc_canonicalize(\"%s\") => \"%s\"; but returned error: %s\n",
2109                         testCases[i].localeID, testCases[i].expectedCanonicalID, buffer, u_errorName(status));
2110             }
2111         }
2112     }
2113 }
2114 
TestKeywordVariantParsing(void)2115 static void TestKeywordVariantParsing(void)
2116 {
2117     static const struct {
2118         const char *localeID;
2119         const char *keyword;
2120         const char *expectedValue; /* NULL if failure is expected */
2121     } testCases[] = {
2122         { "de_DE@  C o ll A t i o n   = Phonebook   ", "c o ll a t i o n", NULL }, /* malformed key name */
2123         { "de_DE", "collation", ""},
2124         { "de_DE@collation=PHONEBOOK", "collation", "PHONEBOOK" },
2125         { "de_DE@currency = euro; CoLLaTion   = PHONEBOOk", "collatiON", "PHONEBOOk" },
2126     };
2127 
2128     UErrorCode status;
2129     int32_t i = 0;
2130     int32_t resultLen = 0;
2131     char buffer[256];
2132 
2133     for(i = 0; i < UPRV_LENGTHOF(testCases); i++) {
2134         *buffer = 0;
2135         status = U_ZERO_ERROR;
2136         resultLen = uloc_getKeywordValue(testCases[i].localeID, testCases[i].keyword, buffer, 256, &status);
2137         (void)resultLen;    /* Suppress set but not used warning. */
2138         if (testCases[i].expectedValue) {
2139             /* expect success */
2140             if (U_FAILURE(status)) {
2141                 log_err("Expected to extract \"%s\" from \"%s\" for keyword \"%s\". Instead got status %s\n",
2142                     testCases[i].expectedValue, testCases[i].localeID, testCases[i].keyword, u_errorName(status));
2143             } else if (uprv_strcmp(testCases[i].expectedValue, buffer) != 0) {
2144                 log_err("Expected to extract \"%s\" from \"%s\" for keyword \"%s\". Instead got \"%s\"\n",
2145                     testCases[i].expectedValue, testCases[i].localeID, testCases[i].keyword, buffer);
2146             }
2147         } else if (U_SUCCESS(status)) {
2148             /* expect failure */
2149             log_err("Expected failure but got success from \"%s\" for keyword \"%s\". Got \"%s\"\n",
2150                 testCases[i].localeID, testCases[i].keyword, buffer);
2151 
2152         }
2153     }
2154 }
2155 
2156 static const struct {
2157   const char *l; /* locale */
2158   const char *k; /* kw */
2159   const char *v; /* value */
2160   const char *x; /* expected */
2161 } kwSetTestCases[] = {
2162 #if 1
2163   { "en_US", "calendar", "japanese", "en_US@calendar=japanese" },
2164   { "en_US@", "calendar", "japanese", "en_US@calendar=japanese" },
2165   { "en_US@calendar=islamic", "calendar", "japanese", "en_US@calendar=japanese" },
2166   { "en_US@calendar=slovakian", "calendar", "gregorian", "en_US@calendar=gregorian" }, /* don't know what this means, but it has the same # of chars as gregorian */
2167   { "en_US@calendar=gregorian", "calendar", "japanese", "en_US@calendar=japanese" },
2168   { "de", "Currency", "CHF", "de@currency=CHF" },
2169   { "de", "Currency", "CHF", "de@currency=CHF" },
2170 
2171   { "en_US@collation=phonebook", "calendar", "japanese", "en_US@calendar=japanese;collation=phonebook" },
2172   { "en_US@calendar=japanese", "collation", "phonebook", "en_US@calendar=japanese;collation=phonebook" },
2173   { "de@collation=phonebook", "Currency", "CHF", "de@collation=phonebook;currency=CHF" },
2174   { "en_US@calendar=gregorian;collation=phonebook", "calendar", "japanese", "en_US@calendar=japanese;collation=phonebook" },
2175   { "en_US@calendar=slovakian;collation=phonebook", "calendar", "gregorian", "en_US@calendar=gregorian;collation=phonebook" }, /* don't know what this means, but it has the same # of chars as gregorian */
2176   { "en_US@calendar=slovakian;collation=videobook", "collation", "phonebook", "en_US@calendar=slovakian;collation=phonebook" }, /* don't know what this means, but it has the same # of chars as gregorian */
2177   { "en_US@calendar=islamic;collation=phonebook", "calendar", "japanese", "en_US@calendar=japanese;collation=phonebook" },
2178   { "de@collation=phonebook", "Currency", "CHF", "de@collation=phonebook;currency=CHF" },
2179 #endif
2180 #if 1
2181   { "mt@a=0;b=1;c=2;d=3", "c","j", "mt@a=0;b=1;c=j;d=3" },
2182   { "mt@a=0;b=1;c=2;d=3", "x","j", "mt@a=0;b=1;c=2;d=3;x=j" },
2183   { "mt@a=0;b=1;c=2;d=3", "a","f", "mt@a=f;b=1;c=2;d=3" },
2184   { "mt@a=0;aa=1;aaa=3", "a","x", "mt@a=x;aa=1;aaa=3" },
2185   { "mt@a=0;aa=1;aaa=3", "aa","x", "mt@a=0;aa=x;aaa=3" },
2186   { "mt@a=0;aa=1;aaa=3", "aaa","x", "mt@a=0;aa=1;aaa=x" },
2187   { "mt@a=0;aa=1;aaa=3", "a","yy", "mt@a=yy;aa=1;aaa=3" },
2188   { "mt@a=0;aa=1;aaa=3", "aa","yy", "mt@a=0;aa=yy;aaa=3" },
2189   { "mt@a=0;aa=1;aaa=3", "aaa","yy", "mt@a=0;aa=1;aaa=yy" },
2190 #endif
2191 #if 1
2192   /* removal tests */
2193   /* 1. removal of item at end */
2194   { "de@collation=phonebook;currency=CHF", "currency",   "", "de@collation=phonebook" },
2195   { "de@collation=phonebook;currency=CHF", "currency", NULL, "de@collation=phonebook" },
2196   /* 2. removal of item at beginning */
2197   { "de@collation=phonebook;currency=CHF", "collation", "", "de@currency=CHF" },
2198   { "de@collation=phonebook;currency=CHF", "collation", NULL, "de@currency=CHF" },
2199   /* 3. removal of an item not there */
2200   { "de@collation=phonebook;currency=CHF", "calendar", NULL, "de@collation=phonebook;currency=CHF" },
2201   /* 4. removal of only item */
2202   { "de@collation=phonebook", "collation", NULL, "de" },
2203 #endif
2204   { "de@collation=phonebook", "Currency", "CHF", "de@collation=phonebook;currency=CHF" },
2205   /* cases with legal extra spacing */
2206   /*31*/{ "en_US@ calendar = islamic", "calendar", "japanese", "en_US@calendar=japanese" },
2207   /*32*/{ "en_US@ calendar = gregorian ; collation = phonebook", "calendar", "japanese", "en_US@calendar=japanese;collation=phonebook" },
2208   /*33*/{ "en_US@ calendar = islamic", "currency", "CHF", "en_US@calendar=islamic;currency=CHF" },
2209   /*34*/{ "en_US@ currency = CHF", "calendar", "japanese", "en_US@calendar=japanese;currency=CHF" },
2210   /* cases in which setKeywordValue expected to fail (implied by NULL for expected); locale need not be canonical */
2211   /*35*/{ "en_US@calendar=gregorian;", "calendar", "japanese", NULL },
2212   /*36*/{ "en_US@calendar=gregorian;=", "calendar", "japanese", NULL },
2213   /*37*/{ "en_US@calendar=gregorian;currency=", "calendar", "japanese", NULL },
2214   /*38*/{ "en_US@=", "calendar", "japanese", NULL },
2215   /*39*/{ "en_US@=;", "calendar", "japanese", NULL },
2216   /*40*/{ "en_US@= ", "calendar", "japanese", NULL },
2217   /*41*/{ "en_US@ =", "calendar", "japanese", NULL },
2218   /*42*/{ "en_US@ = ", "calendar", "japanese", NULL },
2219   /*43*/{ "en_US@=;calendar=gregorian", "calendar", "japanese", NULL },
2220   /*44*/{ "en_US@= calen dar = gregorian", "calendar", "japanese", NULL },
2221   /*45*/{ "en_US@= calendar = greg orian", "calendar", "japanese", NULL },
2222   /*46*/{ "en_US@=;cal...endar=gregorian", "calendar", "japanese", NULL },
2223   /*47*/{ "en_US@=;calendar=greg...orian", "calendar", "japanese", NULL },
2224   /*48*/{ "en_US@calendar=gregorian", "cale ndar", "japanese", NULL },
2225   /*49*/{ "en_US@calendar=gregorian", "calendar", "japa..nese", NULL },
2226   /* cases in which getKeywordValue and setKeyword expected to fail (implied by NULL for value and expected) */
2227   /*50*/{ "en_US@=", "calendar", NULL, NULL },
2228   /*51*/{ "en_US@=;", "calendar", NULL, NULL },
2229   /*52*/{ "en_US@= ", "calendar", NULL, NULL },
2230   /*53*/{ "en_US@ =", "calendar", NULL, NULL },
2231   /*54*/{ "en_US@ = ", "calendar", NULL, NULL },
2232   /*55*/{ "en_US@=;calendar=gregorian", "calendar", NULL, NULL },
2233   /*56*/{ "en_US@= calen dar = gregorian", "calendar", NULL, NULL },
2234   /*57*/{ "en_US@= calendar = greg orian", "calendar", NULL, NULL },
2235   /*58*/{ "en_US@=;cal...endar=gregorian", "calendar", NULL, NULL },
2236   /*59*/{ "en_US@=;calendar=greg...orian", "calendar", NULL, NULL },
2237   /*60*/{ "en_US@calendar=gregorian", "cale ndar", NULL, NULL },
2238 };
2239 
2240 
TestKeywordSet(void)2241 static void TestKeywordSet(void)
2242 {
2243     int32_t i = 0;
2244     int32_t resultLen = 0;
2245     char buffer[1024];
2246 
2247     char cbuffer[1024];
2248 
2249     for(i = 0; i < UPRV_LENGTHOF(kwSetTestCases); i++) {
2250       UErrorCode status = U_ZERO_ERROR;
2251       memset(buffer,'%',1023);
2252       strcpy(buffer, kwSetTestCases[i].l);
2253 
2254       if (kwSetTestCases[i].x != NULL) {
2255         uloc_canonicalize(kwSetTestCases[i].l, cbuffer, 1023, &status);
2256         if(strcmp(buffer,cbuffer)) {
2257           log_verbose("note: [%d] wasn't canonical, should be: '%s' not '%s'. Won't check for canonicity in output.\n", i, cbuffer, buffer);
2258         }
2259         /* sanity check test case results for canonicity */
2260         uloc_canonicalize(kwSetTestCases[i].x, cbuffer, 1023, &status);
2261         if(strcmp(kwSetTestCases[i].x,cbuffer)) {
2262           log_err("%s:%d: ERROR: kwSetTestCases[%d].x = '%s', should be %s (must be canonical)\n", __FILE__, __LINE__, i, kwSetTestCases[i].x, cbuffer);
2263         }
2264 
2265         status = U_ZERO_ERROR;
2266         resultLen = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, 1023, &status);
2267         if(U_FAILURE(status)) {
2268           log_err("Err on test case %d for setKeywordValue: got error %s\n", i, u_errorName(status));
2269         } else if(strcmp(buffer,kwSetTestCases[i].x) || ((int32_t)strlen(buffer)!=resultLen)) {
2270           log_err("FAIL: #%d setKeywordValue: %s + [%s=%s] -> %s (%d) expected %s (%d)\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k,
2271                   kwSetTestCases[i].v, buffer, resultLen, kwSetTestCases[i].x, strlen(buffer));
2272         } else {
2273           log_verbose("pass: #%d: %s + [%s=%s] -> %s\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v,buffer);
2274         }
2275 
2276         if (kwSetTestCases[i].v != NULL && kwSetTestCases[i].v[0] != 0) {
2277           status = U_ZERO_ERROR;
2278           resultLen = uloc_getKeywordValue(kwSetTestCases[i].x, kwSetTestCases[i].k, buffer, 1023, &status);
2279           if(U_FAILURE(status)) {
2280             log_err("Err on test case %d for getKeywordValue: got error %s\n", i, u_errorName(status));
2281           } else if (resultLen != (int32_t)uprv_strlen(kwSetTestCases[i].v) || uprv_strcmp(buffer, kwSetTestCases[i].v) != 0) {
2282             log_err("FAIL: #%d getKeywordValue: got %s (%d) expected %s (%d)\n", i, buffer, resultLen,
2283                     kwSetTestCases[i].v, uprv_strlen(kwSetTestCases[i].v));
2284           }
2285         }
2286       } else {
2287         /* test cases expected to result in error */
2288         status = U_ZERO_ERROR;
2289         resultLen = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, 1023, &status);
2290         if(U_SUCCESS(status)) {
2291           log_err("Err on test case %d for setKeywordValue: expected to fail but succeeded, got %s (%d)\n", i, buffer, resultLen);
2292         }
2293 
2294         if (kwSetTestCases[i].v == NULL) {
2295           status = U_ZERO_ERROR;
2296           strcpy(cbuffer, kwSetTestCases[i].l);
2297           resultLen = uloc_getKeywordValue(cbuffer, kwSetTestCases[i].k, buffer, 1023, &status);
2298           if(U_SUCCESS(status)) {
2299             log_err("Err on test case %d for getKeywordValue: expected to fail but succeeded\n", i);
2300           }
2301         }
2302       }
2303     }
2304 }
2305 
TestKeywordSetError(void)2306 static void TestKeywordSetError(void)
2307 {
2308     char buffer[1024];
2309     UErrorCode status;
2310     int32_t res;
2311     int32_t i;
2312     int32_t blen;
2313 
2314     /* 0-test whether an error condition modifies the buffer at all */
2315     blen=0;
2316     i=0;
2317     memset(buffer,'%',1023);
2318     status = U_ZERO_ERROR;
2319     res = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, blen, &status);
2320     if(status != U_ILLEGAL_ARGUMENT_ERROR) {
2321         log_err("expected illegal err got %s\n", u_errorName(status));
2322         return;
2323     }
2324     /*  if(res!=strlen(kwSetTestCases[i].x)) {
2325     log_err("expected result %d got %d\n", strlen(kwSetTestCases[i].x), res);
2326     return;
2327     } */
2328     if(buffer[blen]!='%') {
2329         log_err("Buffer byte %d was modified: now %c\n", blen, buffer[blen]);
2330         return;
2331     }
2332     log_verbose("0-buffer modify OK\n");
2333 
2334     for(i=0;i<=2;i++) {
2335         /* 1- test a short buffer with growing text */
2336         blen=(int32_t)strlen(kwSetTestCases[i].l)+1;
2337         memset(buffer,'%',1023);
2338         strcpy(buffer,kwSetTestCases[i].l);
2339         status = U_ZERO_ERROR;
2340         res = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, blen, &status);
2341         if(status != U_BUFFER_OVERFLOW_ERROR) {
2342             log_err("expected buffer overflow on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v);
2343             return;
2344         }
2345         if(res!=(int32_t)strlen(kwSetTestCases[i].x)) {
2346             log_err("expected result %d got %d\n", strlen(kwSetTestCases[i].x), res);
2347             return;
2348         }
2349         if(buffer[blen]!='%') {
2350             log_err("Buffer byte %d was modified: now %c\n", blen, buffer[blen]);
2351             return;
2352         }
2353         log_verbose("1/%d-buffer modify OK\n",i);
2354     }
2355 
2356     for(i=3;i<=4;i++) {
2357         /* 2- test a short buffer - text the same size or shrinking   */
2358         blen=(int32_t)strlen(kwSetTestCases[i].l)+1;
2359         memset(buffer,'%',1023);
2360         strcpy(buffer,kwSetTestCases[i].l);
2361         status = U_ZERO_ERROR;
2362         res = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, blen, &status);
2363         if(status != U_ZERO_ERROR) {
2364             log_err("expected zero error got %s\n", u_errorName(status));
2365             return;
2366         }
2367         if(buffer[blen+1]!='%') {
2368             log_err("Buffer byte %d was modified: now %c\n", blen+1, buffer[blen+1]);
2369             return;
2370         }
2371         if(res!=(int32_t)strlen(kwSetTestCases[i].x)) {
2372             log_err("expected result %d got %d\n", strlen(kwSetTestCases[i].x), res);
2373             return;
2374         }
2375         if(strcmp(buffer,kwSetTestCases[i].x) || ((int32_t)strlen(buffer)!=res)) {
2376             log_err("FAIL: #%d: %s + [%s=%s] -> %s (%d) expected %s (%d)\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k,
2377                 kwSetTestCases[i].v, buffer, res, kwSetTestCases[i].x, strlen(buffer));
2378         } else {
2379             log_verbose("pass: #%d: %s + [%s=%s] -> %s\n", i, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v,
2380                 buffer);
2381         }
2382         log_verbose("2/%d-buffer modify OK\n",i);
2383     }
2384 }
2385 
_canonicalize(int32_t selector,const char * localeID,char * result,int32_t resultCapacity,UErrorCode * ec)2386 static int32_t _canonicalize(int32_t selector, /* 0==getName, 1==canonicalize */
2387                              const char* localeID,
2388                              char* result,
2389                              int32_t resultCapacity,
2390                              UErrorCode* ec) {
2391     /* YOU can change this to use function pointers if you like */
2392     switch (selector) {
2393     case 0:
2394         return uloc_getName(localeID, result, resultCapacity, ec);
2395     case 1:
2396         return uloc_canonicalize(localeID, result, resultCapacity, ec);
2397     default:
2398         return -1;
2399     }
2400 }
2401 
TestCanonicalization(void)2402 static void TestCanonicalization(void)
2403 {
2404     static const struct {
2405         const char *localeID;    /* input */
2406         const char *getNameID;   /* expected getName() result */
2407         const char *canonicalID; /* expected canonicalize() result */
2408     } testCases[] = {
2409         { "ca_ES-with-extra-stuff-that really doesn't make any sense-unless-you're trying to increase code coverage",
2410           "ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE",
2411           "ca_ES_WITH_EXTRA_STUFF_THAT REALLY DOESN'T MAKE ANY SENSE_UNLESS_YOU'RE TRYING TO INCREASE CODE COVERAGE"},
2412         { "zh@collation=pinyin", "zh@collation=pinyin", "zh@collation=pinyin" },
2413         { "zh_CN@collation=pinyin", "zh_CN@collation=pinyin", "zh_CN@collation=pinyin" },
2414         { "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin", "zh_CN_CA@collation=pinyin" },
2415         { "en_US_POSIX", "en_US_POSIX", "en_US_POSIX" },
2416         { "hy_AM_REVISED", "hy_AM_REVISED", "hy_AM_REVISED" },
2417         { "no_NO_NY", "no_NO_NY", "no_NO_NY" /* not: "nn_NO" [alan ICU3.0] */ },
2418         { "no@ny", "no@ny", "no__NY" /* not: "nn" [alan ICU3.0] */ }, /* POSIX ID */
2419         { "no-no.utf32@B", "no_NO.utf32@B", "no_NO_B" /* not: "nb_NO_B" [alan ICU3.0] */ }, /* POSIX ID */
2420         { "qz-qz@Euro", "qz_QZ@Euro", "qz_QZ_EURO" }, /* qz-qz uses private use iso codes */
2421         { "en-BOONT", "en__BOONT", "en__BOONT" }, /* registered name */
2422         { "de-1901", "de__1901", "de__1901" }, /* registered name */
2423         { "de-1906", "de__1906", "de__1906" }, /* registered name */
2424 
2425         /* posix behavior that used to be performed by getName */
2426         { "mr.utf8", "mr.utf8", "mr" },
2427         { "de-tv.koi8r", "de_TV.koi8r", "de_TV" },
2428         { "x-piglatin_ML.MBE", "x-piglatin_ML.MBE", "x-piglatin_ML" },
2429         { "i-cherokee_US.utf7", "i-cherokee_US.utf7", "i-cherokee_US" },
2430         { "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA.gb-18030", "x-filfli_MT_FILFLA" },
2431         { "no-no-ny.utf8@B", "no_NO_NY.utf8@B", "no_NO_NY_B" /* not: "nn_NO" [alan ICU3.0] */ }, /* @ ignored unless variant is empty */
2432 
2433         /* fleshing out canonicalization */
2434         /* trim space and sort keywords, ';' is separator so not present at end in canonical form */
2435         { "en_Hant_IL_VALLEY_GIRL@ currency = EUR; calendar = Japanese ;", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
2436         /* already-canonical ids are not changed */
2437         { "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR", "en_Hant_IL_VALLEY_GIRL@calendar=Japanese;currency=EUR" },
2438         /* norwegian is just too weird, if we handle things in their full generality */
2439         { "no-Hant-GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$", "no_Hant_GB_NY@currency=$$$" /* not: "nn_Hant_GB@currency=$$$" [alan ICU3.0] */ },
2440 
2441         /* test cases reflecting internal resource bundle usage */
2442         { "root@kw=foo", "root@kw=foo", "root@kw=foo" },
2443         { "@calendar=gregorian", "@calendar=gregorian", "@calendar=gregorian" },
2444         { "ja_JP@calendar=Japanese", "ja_JP@calendar=Japanese", "ja_JP@calendar=Japanese" },
2445         { "ja_JP", "ja_JP", "ja_JP" },
2446 
2447         /* test case for "i-default" */
2448         { "i-default", "en@x=i-default", "en@x=i-default" },
2449 
2450         // Before ICU 64, ICU locale canonicalization had some additional mappings.
2451         // They were removed for ICU-20187 "drop support for long-obsolete locale ID variants".
2452         // The following now use standard canonicalization.
2453         { "ca_ES_PREEURO", "ca_ES_PREEURO", "ca_ES_PREEURO" },
2454         { "de_AT_PREEURO", "de_AT_PREEURO", "de_AT_PREEURO" },
2455         { "de_DE_PREEURO", "de_DE_PREEURO", "de_DE_PREEURO" },
2456         { "de_LU_PREEURO", "de_LU_PREEURO", "de_LU_PREEURO" },
2457         { "el_GR_PREEURO", "el_GR_PREEURO", "el_GR_PREEURO" },
2458         { "en_BE_PREEURO", "en_BE_PREEURO", "en_BE_PREEURO" },
2459         { "en_IE_PREEURO", "en_IE_PREEURO", "en_IE_PREEURO" },
2460         { "es_ES_PREEURO", "es_ES_PREEURO", "es_ES_PREEURO" },
2461         { "eu_ES_PREEURO", "eu_ES_PREEURO", "eu_ES_PREEURO" },
2462         { "fi_FI_PREEURO", "fi_FI_PREEURO", "fi_FI_PREEURO" },
2463         { "fr_BE_PREEURO", "fr_BE_PREEURO", "fr_BE_PREEURO" },
2464         { "fr_FR_PREEURO", "fr_FR_PREEURO", "fr_FR_PREEURO" },
2465         { "fr_LU_PREEURO", "fr_LU_PREEURO", "fr_LU_PREEURO" },
2466         { "ga_IE_PREEURO", "ga_IE_PREEURO", "ga_IE_PREEURO" },
2467         { "gl_ES_PREEURO", "gl_ES_PREEURO", "gl_ES_PREEURO" },
2468         { "it_IT_PREEURO", "it_IT_PREEURO", "it_IT_PREEURO" },
2469         { "nl_BE_PREEURO", "nl_BE_PREEURO", "nl_BE_PREEURO" },
2470         { "nl_NL_PREEURO", "nl_NL_PREEURO", "nl_NL_PREEURO" },
2471         { "pt_PT_PREEURO", "pt_PT_PREEURO", "pt_PT_PREEURO" },
2472         { "de__PHONEBOOK", "de__PHONEBOOK", "de__PHONEBOOK" },
2473         { "en_GB_EURO", "en_GB_EURO", "en_GB_EURO" },
2474         { "en_GB@EURO", "en_GB@EURO", "en_GB_EURO" }, /* POSIX ID */
2475         { "es__TRADITIONAL", "es__TRADITIONAL", "es__TRADITIONAL" },
2476         { "hi__DIRECT", "hi__DIRECT", "hi__DIRECT" },
2477         { "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL", "ja_JP_TRADITIONAL" },
2478         { "th_TH_TRADITIONAL", "th_TH_TRADITIONAL", "th_TH_TRADITIONAL" },
2479         { "zh_TW_STROKE", "zh_TW_STROKE", "zh_TW_STROKE" },
2480         { "zh__PINYIN", "zh__PINYIN", "zh__PINYIN" },
2481         { "zh_CN_STROKE", "zh_CN_STROKE", "zh_CN_STROKE" },
2482         { "sr-SP-Cyrl", "sr_SP_CYRL", "sr_SP_CYRL" }, /* .NET name */
2483         { "sr-SP-Latn", "sr_SP_LATN", "sr_SP_LATN" }, /* .NET name */
2484         { "sr_YU_CYRILLIC", "sr_YU_CYRILLIC", "sr_YU_CYRILLIC" }, /* Linux name */
2485         { "uz-UZ-Cyrl", "uz_UZ_CYRL", "uz_UZ_CYRL" }, /* .NET name */
2486         { "uz-UZ-Latn", "uz_UZ_LATN", "uz_UZ_LATN" }, /* .NET name */
2487         { "zh-CHS", "zh_CHS", "zh_CHS" }, /* .NET name */
2488         { "zh-CHT", "zh_CHT", "zh_CHT" }, /* .NET name This may change back to zh_Hant */
2489         /* PRE_EURO and EURO conversions don't affect other keywords */
2490         { "es_ES_PREEURO@CALendar=Japanese", "es_ES_PREEURO@calendar=Japanese", "es_ES_PREEURO@calendar=Japanese" },
2491         { "es_ES_EURO@SHOUT=zipeedeedoodah", "es_ES_EURO@shout=zipeedeedoodah", "es_ES_EURO@shout=zipeedeedoodah" },
2492         /* currency keyword overrides PRE_EURO and EURO currency */
2493         { "es_ES_PREEURO@currency=EUR", "es_ES_PREEURO@currency=EUR", "es_ES_PREEURO@currency=EUR" },
2494         { "es_ES_EURO@currency=ESP", "es_ES_EURO@currency=ESP", "es_ES_EURO@currency=ESP" },
2495     };
2496 
2497     static const char* label[] = { "getName", "canonicalize" };
2498 
2499     UErrorCode status = U_ZERO_ERROR;
2500     int32_t i, j, resultLen = 0, origResultLen;
2501     char buffer[256];
2502 
2503     for (i=0; i < UPRV_LENGTHOF(testCases); i++) {
2504         for (j=0; j<2; ++j) {
2505             const char* expected = (j==0) ? testCases[i].getNameID : testCases[i].canonicalID;
2506             *buffer = 0;
2507             status = U_ZERO_ERROR;
2508 
2509             if (expected == NULL) {
2510                 expected = uloc_getDefault();
2511             }
2512 
2513             /* log_verbose("testing %s -> %s\n", testCases[i], testCases[i].canonicalID); */
2514             origResultLen = _canonicalize(j, testCases[i].localeID, NULL, 0, &status);
2515             if (status != U_BUFFER_OVERFLOW_ERROR) {
2516                 log_err("FAIL: uloc_%s(%s) => %s, expected U_BUFFER_OVERFLOW_ERROR\n",
2517                         label[j], testCases[i].localeID, u_errorName(status));
2518                 continue;
2519             }
2520             status = U_ZERO_ERROR;
2521             resultLen = _canonicalize(j, testCases[i].localeID, buffer, sizeof(buffer), &status);
2522             if (U_FAILURE(status)) {
2523                 log_err("FAIL: uloc_%s(%s) => %s, expected U_ZERO_ERROR\n",
2524                         label[j], testCases[i].localeID, u_errorName(status));
2525                 continue;
2526             }
2527             if(uprv_strcmp(expected, buffer) != 0) {
2528                 log_err("FAIL: uloc_%s(%s) => \"%s\", expected \"%s\"\n",
2529                         label[j], testCases[i].localeID, buffer, expected);
2530             } else {
2531                 log_verbose("Ok: uloc_%s(%s) => \"%s\"\n",
2532                             label[j], testCases[i].localeID, buffer);
2533             }
2534             if (resultLen != (int32_t)strlen(buffer)) {
2535                 log_err("FAIL: uloc_%s(%s) => len %d, expected len %d\n",
2536                         label[j], testCases[i].localeID, resultLen, strlen(buffer));
2537             }
2538             if (origResultLen != resultLen) {
2539                 log_err("FAIL: uloc_%s(%s) => preflight len %d != actual len %d\n",
2540                         label[j], testCases[i].localeID, origResultLen, resultLen);
2541             }
2542         }
2543     }
2544 }
2545 
TestCanonicalizationBuffer(void)2546 static void TestCanonicalizationBuffer(void)
2547 {
2548     UErrorCode status = U_ZERO_ERROR;
2549     char buffer[256];
2550 
2551     // ULOC_FULLNAME_CAPACITY == 157 (uloc.h)
2552     static const char name[] =
2553         "zh@x"
2554         "=foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
2555         "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
2556         "-foo-bar-baz-foo-bar-baz-foo-bar-baz-foo-bar-baz"
2557         "-foo-barz"
2558     ;
2559     static const size_t len = sizeof(name) - 1;  // Without NUL terminator.
2560 
2561     int32_t reslen = uloc_canonicalize(name, buffer, (int32_t)len, &status);
2562 
2563     if (U_FAILURE(status)) {
2564         log_err("FAIL: uloc_canonicalize(%s) => %s, expected !U_FAILURE()\n",
2565                 name, u_errorName(status));
2566         return;
2567     }
2568 
2569     if (reslen != len) {
2570         log_err("FAIL: uloc_canonicalize(%s) => \"%i\", expected \"%u\"\n",
2571                 name, reslen, len);
2572         return;
2573     }
2574 
2575     if (uprv_strncmp(name, buffer, len) != 0) {
2576         log_err("FAIL: uloc_canonicalize(%s) => \"%.*s\", expected \"%s\"\n",
2577                 name, reslen, buffer, name);
2578         return;
2579     }
2580 }
2581 
TestCanonicalization21749StackUseAfterScope(void)2582 static void TestCanonicalization21749StackUseAfterScope(void)
2583 {
2584     UErrorCode status = U_ZERO_ERROR;
2585     char buffer[256];
2586     const char* input = "- _";
2587     uloc_canonicalize(input, buffer, -1, &status);
2588     if (U_SUCCESS(status)) {
2589         log_err("FAIL: uloc_canonicalize(%s) => %s, expected U_FAILURE()\n",
2590                 input, u_errorName(status));
2591         return;
2592     }
2593 
2594     // ICU-22475 test that we don't free an internal buffer twice.
2595     status = U_ZERO_ERROR;
2596     uloc_canonicalize("ti-defaultgR-lS-z-UK-0P", buffer, UPRV_LENGTHOF(buffer), &status);
2597 }
2598 
TestDisplayKeywords(void)2599 static void TestDisplayKeywords(void)
2600 {
2601     int32_t i;
2602 
2603     static const struct {
2604         const char *localeID;
2605         const char *displayLocale;
2606         UChar displayKeyword[200];
2607     } testCases[] = {
2608         {   "ca_ES@currency=ESP",         "de_AT",
2609             {0x0057, 0x00e4, 0x0068, 0x0072, 0x0075, 0x006e, 0x0067, 0x0000},
2610         },
2611         {   "ja_JP@calendar=japanese",         "de",
2612             { 0x004b, 0x0061, 0x006c, 0x0065, 0x006e, 0x0064, 0x0065, 0x0072, 0x0000}
2613         },
2614         {   "de_DE@collation=traditional",       "de_DE",
2615             {0x0053, 0x006f, 0x0072, 0x0074, 0x0069, 0x0065, 0x0072, 0x0075, 0x006e, 0x0067, 0x0000}
2616         },
2617     };
2618     for(i = 0; i < UPRV_LENGTHOF(testCases); i++) {
2619         UErrorCode status = U_ZERO_ERROR;
2620         const char* keyword =NULL;
2621         int32_t keywordLen = 0;
2622         int32_t keywordCount = 0;
2623         UChar *displayKeyword=NULL;
2624         int32_t displayKeywordLen = 0;
2625         UEnumeration* keywordEnum = uloc_openKeywords(testCases[i].localeID, &status);
2626         for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
2627               if(U_FAILURE(status)){
2628                   log_err("uloc_getKeywords failed for locale id: %s with error : %s \n", testCases[i].localeID, u_errorName(status));
2629                   break;
2630               }
2631               /* the uenum_next returns NUL terminated string */
2632               keyword = uenum_next(keywordEnum, &keywordLen, &status);
2633               /* fetch the displayKeyword */
2634               displayKeywordLen = uloc_getDisplayKeyword(keyword, testCases[i].displayLocale, displayKeyword, displayKeywordLen, &status);
2635               if(status==U_BUFFER_OVERFLOW_ERROR){
2636                   status = U_ZERO_ERROR;
2637                   displayKeywordLen++; /* for null termination */
2638                   displayKeyword = (UChar*) malloc(displayKeywordLen * U_SIZEOF_UCHAR);
2639                   displayKeywordLen = uloc_getDisplayKeyword(keyword, testCases[i].displayLocale, displayKeyword, displayKeywordLen, &status);
2640                   if(U_FAILURE(status)){
2641                       log_err("uloc_getDisplayKeyword filed for keyword : %s in locale id: %s for display locale: %s \n", testCases[i].localeID, keyword, testCases[i].displayLocale, u_errorName(status));
2642                       free(displayKeyword);
2643                       break;
2644                   }
2645                   if(u_strncmp(displayKeyword, testCases[i].displayKeyword, displayKeywordLen)!=0){
2646                       if (status == U_USING_DEFAULT_WARNING) {
2647                           log_data_err("uloc_getDisplayKeyword did not get the expected value for keyword : %s in locale id: %s for display locale: %s . Got error: %s. Perhaps you are missing data?\n", testCases[i].localeID, keyword, testCases[i].displayLocale, u_errorName(status));
2648                       } else {
2649                           log_err("uloc_getDisplayKeyword did not get the expected value for keyword : %s in locale id: %s for display locale: %s \n", testCases[i].localeID, keyword, testCases[i].displayLocale);
2650                       }
2651                       free(displayKeyword);
2652                       break;
2653                   }
2654               }else{
2655                   log_err("uloc_getDisplayKeyword did not return the expected error. Error: %s\n", u_errorName(status));
2656               }
2657 
2658               free(displayKeyword);
2659 
2660         }
2661         uenum_close(keywordEnum);
2662     }
2663 }
2664 
TestDisplayKeywordValues(void)2665 static void TestDisplayKeywordValues(void){
2666     int32_t i;
2667 
2668     static const struct {
2669         const char *localeID;
2670         const char *displayLocale;
2671         UChar displayKeywordValue[500];
2672     } testCases[] = {
2673         {   "ca_ES@currency=ESP",         "de_AT",
2674             {0x0053, 0x0070, 0x0061, 0x006e, 0x0069, 0x0073, 0x0063, 0x0068, 0x0065, 0x0020, 0x0050, 0x0065, 0x0073, 0x0065, 0x0074, 0x0061, 0x0000}
2675         },
2676         {   "de_AT@currency=ATS",         "fr_FR",
2677             {0x0073, 0x0063, 0x0068, 0x0069, 0x006c, 0x006c, 0x0069, 0x006e, 0x0067, 0x0020, 0x0061, 0x0075, 0x0074, 0x0072, 0x0069, 0x0063, 0x0068, 0x0069, 0x0065, 0x006e, 0x0000}
2678         },
2679         {   "de_DE@currency=DEM",         "it",
2680             {0x006d, 0x0061, 0x0072, 0x0063, 0x006f, 0x0020, 0x0074, 0x0065, 0x0064, 0x0065, 0x0073, 0x0063, 0x006f, 0x0000}
2681         },
2682         {   "el_GR@currency=GRD",         "en",
2683             {0x0047, 0x0072, 0x0065, 0x0065, 0x006b, 0x0020, 0x0044, 0x0072, 0x0061, 0x0063, 0x0068, 0x006d, 0x0061, 0x0000}
2684         },
2685         {   "eu_ES@currency=ESP",         "it_IT",
2686             {0x0070, 0x0065, 0x0073, 0x0065, 0x0074, 0x0061, 0x0020, 0x0073, 0x0070, 0x0061, 0x0067, 0x006e, 0x006f, 0x006c, 0x0061, 0x0000}
2687         },
2688         {   "de@collation=phonebook",     "es",
2689             {0x006F, 0x0072, 0x0064, 0x0065, 0x006E, 0x0020, 0x0064, 0x0065, 0x0020, 0x006C, 0x0069, 0x0073, 0x0074, 0x00ED, 0x006E, 0x0020, 0x0074, 0x0065, 0x006C, 0x0065, 0x0066, 0x00F3, 0x006E, 0x0069, 0x0063, 0x006F, 0x0000}
2690         },
2691 
2692         { "de_DE@collation=phonebook",  "es",
2693           {0x006F, 0x0072, 0x0064, 0x0065, 0x006E, 0x0020, 0x0064, 0x0065, 0x0020, 0x006C, 0x0069, 0x0073, 0x0074, 0x00ED, 0x006E, 0x0020, 0x0074, 0x0065, 0x006C, 0x0065, 0x0066, 0x00F3, 0x006E, 0x0069, 0x0063, 0x006F, 0x0000}
2694         },
2695         { "es_ES@collation=traditional","de",
2696           {0x0054, 0x0072, 0x0061, 0x0064, 0x0069, 0x0074, 0x0069, 0x006f, 0x006e, 0x0065, 0x006c, 0x006c, 0x0065, 0x0020, 0x0053, 0x006f, 0x0072, 0x0074, 0x0069, 0x0065, 0x0072, 0x0075, 0x006E, 0x0067, 0x0000}
2697         },
2698         { "ja_JP@calendar=japanese",    "de",
2699            {0x004a, 0x0061, 0x0070, 0x0061, 0x006e, 0x0069, 0x0073, 0x0063, 0x0068, 0x0065, 0x0072, 0x0020, 0x004b, 0x0061, 0x006c, 0x0065, 0x006e, 0x0064, 0x0065, 0x0072, 0x0000}
2700         },
2701     };
2702     for(i = 0; i < UPRV_LENGTHOF(testCases); i++) {
2703         UErrorCode status = U_ZERO_ERROR;
2704         const char* keyword =NULL;
2705         int32_t keywordLen = 0;
2706         int32_t keywordCount = 0;
2707         UChar *displayKeywordValue = NULL;
2708         int32_t displayKeywordValueLen = 0;
2709         UEnumeration* keywordEnum = uloc_openKeywords(testCases[i].localeID, &status);
2710         for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
2711               if(U_FAILURE(status)){
2712                   log_err("uloc_getKeywords failed for locale id: %s in display locale: % with error : %s \n", testCases[i].localeID, testCases[i].displayLocale, u_errorName(status));
2713                   break;
2714               }
2715               /* the uenum_next returns NUL terminated string */
2716               keyword = uenum_next(keywordEnum, &keywordLen, &status);
2717 
2718               /* fetch the displayKeywordValue */
2719               displayKeywordValueLen = uloc_getDisplayKeywordValue(testCases[i].localeID, keyword, testCases[i].displayLocale, displayKeywordValue, displayKeywordValueLen, &status);
2720               if(status==U_BUFFER_OVERFLOW_ERROR){
2721                   status = U_ZERO_ERROR;
2722                   displayKeywordValueLen++; /* for null termination */
2723                   displayKeywordValue = (UChar*)malloc(displayKeywordValueLen * U_SIZEOF_UCHAR);
2724                   displayKeywordValueLen = uloc_getDisplayKeywordValue(testCases[i].localeID, keyword, testCases[i].displayLocale, displayKeywordValue, displayKeywordValueLen, &status);
2725                   if(U_FAILURE(status)){
2726                       log_err("uloc_getDisplayKeywordValue failed for keyword : %s in locale id: %s for display locale: %s with error : %s \n", testCases[i].localeID, keyword, testCases[i].displayLocale, u_errorName(status));
2727                       free(displayKeywordValue);
2728                       break;
2729                   }
2730                   if(u_strncmp(displayKeywordValue, testCases[i].displayKeywordValue, displayKeywordValueLen)!=0){
2731                       if (status == U_USING_DEFAULT_WARNING) {
2732                           log_data_err("uloc_getDisplayKeywordValue did not return the expected value keyword : %s in locale id: %s for display locale: %s with error : %s Perhaps you are missing data\n", testCases[i].localeID, keyword, testCases[i].displayLocale, u_errorName(status));
2733                       } else {
2734                           log_err("uloc_getDisplayKeywordValue did not return the expected value keyword : %s in locale id: %s for display locale: %s with error : %s \n", testCases[i].localeID, keyword, testCases[i].displayLocale, u_errorName(status));
2735                       }
2736                       free(displayKeywordValue);
2737                       break;
2738                   }
2739               }else{
2740                   log_err("uloc_getDisplayKeywordValue did not return the expected error. Error: %s\n", u_errorName(status));
2741               }
2742               free(displayKeywordValue);
2743         }
2744         uenum_close(keywordEnum);
2745     }
2746     {
2747         /* test a multiple keywords */
2748         UErrorCode status = U_ZERO_ERROR;
2749         const char* keyword =NULL;
2750         int32_t keywordLen = 0;
2751         int32_t keywordCount = 0;
2752         const char* localeID = "es@collation=phonebook;calendar=buddhist;currency=DEM";
2753         const char* displayLocale = "de";
2754         static const UChar expected[][50] = {
2755             {0x0042, 0x0075, 0x0064, 0x0064, 0x0068, 0x0069, 0x0073, 0x0074, 0x0069, 0x0073, 0x0063, 0x0068, 0x0065, 0x0072, 0x0020, 0x004b, 0x0061, 0x006c, 0x0065, 0x006e, 0x0064, 0x0065, 0x0072, 0x0000},
2756 
2757             {0x0054, 0x0065, 0x006c, 0x0065, 0x0066, 0x006f, 0x006e, 0x0062, 0x0075, 0x0063, 0x0068, 0x002d, 0x0053, 0x006f, 0x0072, 0x0074, 0x0069, 0x0065, 0x0072, 0x0075, 0x006e, 0x0067, 0x0000},
2758             {0x0044, 0x0065, 0x0075, 0x0074, 0x0073, 0x0063, 0x0068, 0x0065, 0x0020, 0x004d, 0x0061, 0x0072, 0x006b, 0x0000},
2759         };
2760 
2761         UEnumeration* keywordEnum = uloc_openKeywords(localeID, &status);
2762 
2763         for(keywordCount = 0; keywordCount < uenum_count(keywordEnum, &status) ; keywordCount++){
2764               UChar *displayKeywordValue = NULL;
2765               int32_t displayKeywordValueLen = 0;
2766               if(U_FAILURE(status)){
2767                   log_err("uloc_getKeywords failed for locale id: %s in display locale: % with error : %s \n", localeID, displayLocale, u_errorName(status));
2768                   break;
2769               }
2770               /* the uenum_next returns NUL terminated string */
2771               keyword = uenum_next(keywordEnum, &keywordLen, &status);
2772 
2773               /* fetch the displayKeywordValue */
2774               displayKeywordValueLen = uloc_getDisplayKeywordValue(localeID, keyword, displayLocale, displayKeywordValue, displayKeywordValueLen, &status);
2775               if(status==U_BUFFER_OVERFLOW_ERROR){
2776                   status = U_ZERO_ERROR;
2777                   displayKeywordValueLen++; /* for null termination */
2778                   displayKeywordValue = (UChar*)malloc(displayKeywordValueLen * U_SIZEOF_UCHAR);
2779                   displayKeywordValueLen = uloc_getDisplayKeywordValue(localeID, keyword, displayLocale, displayKeywordValue, displayKeywordValueLen, &status);
2780                   if(U_FAILURE(status)){
2781                       log_err("uloc_getDisplayKeywordValue failed for keyword : %s in locale id: %s for display locale: %s with error : %s \n", localeID, keyword, displayLocale, u_errorName(status));
2782                       free(displayKeywordValue);
2783                       break;
2784                   }
2785                   if(u_strncmp(displayKeywordValue, expected[keywordCount], displayKeywordValueLen)!=0){
2786                       if (status == U_USING_DEFAULT_WARNING) {
2787                           log_data_err("uloc_getDisplayKeywordValue did not return the expected value keyword : %s in locale id: %s for display locale: %s  got error: %s. Perhaps you are missing data?\n", localeID, keyword, displayLocale, u_errorName(status));
2788                       } else {
2789                           log_err("uloc_getDisplayKeywordValue did not return the expected value keyword : %s in locale id: %s for display locale: %s \n", localeID, keyword, displayLocale);
2790                       }
2791                       free(displayKeywordValue);
2792                       break;
2793                   }
2794               }else{
2795                   log_err("uloc_getDisplayKeywordValue did not return the expected error. Error: %s\n", u_errorName(status));
2796               }
2797               free(displayKeywordValue);
2798         }
2799         uenum_close(keywordEnum);
2800 
2801     }
2802     {
2803         /* Test non existent keywords */
2804         UErrorCode status = U_ZERO_ERROR;
2805         const char* localeID = "es";
2806         const char* displayLocale = "de";
2807         UChar *displayKeywordValue = NULL;
2808         int32_t displayKeywordValueLen = 0;
2809 
2810         /* fetch the displayKeywordValue */
2811         displayKeywordValueLen = uloc_getDisplayKeywordValue(localeID, "calendar", displayLocale, displayKeywordValue, displayKeywordValueLen, &status);
2812         if(U_FAILURE(status)) {
2813           log_err("uloc_getDisplaykeywordValue returned error status %s\n", u_errorName(status));
2814         } else if(displayKeywordValueLen != 0) {
2815           log_err("uloc_getDisplaykeywordValue returned %d should be 0 \n", displayKeywordValueLen);
2816         }
2817     }
2818 }
2819 
2820 
TestGetBaseName(void)2821 static void TestGetBaseName(void) {
2822     static const struct {
2823         const char *localeID;
2824         const char *baseName;
2825     } testCases[] = {
2826         { "de_DE@  C o ll A t i o n   = Phonebook   ", "de_DE" },
2827         { "de@currency = euro; CoLLaTion   = PHONEBOOk", "de" },
2828         { "ja@calendar = buddhist", "ja" }
2829     };
2830 
2831     int32_t i = 0, baseNameLen = 0;
2832     char baseName[256];
2833     UErrorCode status = U_ZERO_ERROR;
2834 
2835     for(i = 0; i < UPRV_LENGTHOF(testCases); i++) {
2836         baseNameLen = uloc_getBaseName(testCases[i].localeID, baseName, 256, &status);
2837         (void)baseNameLen;    /* Suppress set but not used warning. */
2838         if(strcmp(testCases[i].baseName, baseName)) {
2839             log_err("For locale \"%s\" expected baseName \"%s\", but got \"%s\"\n",
2840                 testCases[i].localeID, testCases[i].baseName, baseName);
2841             return;
2842         }
2843     }
2844 }
2845 
TestTrailingNull(void)2846 static void TestTrailingNull(void) {
2847   const char* localeId = "zh_Hans";
2848   UChar buffer[128]; /* sufficient for this test */
2849   int32_t len;
2850   UErrorCode status = U_ZERO_ERROR;
2851   int i;
2852 
2853   len = uloc_getDisplayName(localeId, localeId, buffer, 128, &status);
2854   if (len > 128) {
2855     log_err("buffer too small");
2856     return;
2857   }
2858 
2859   for (i = 0; i < len; ++i) {
2860     if (buffer[i] == 0) {
2861       log_err("name contained null");
2862       return;
2863     }
2864   }
2865 }
2866 
2867 /* Jitterbug 4115 */
TestDisplayNameWarning(void)2868 static void TestDisplayNameWarning(void) {
2869     UChar name[256];
2870     int32_t size;
2871     UErrorCode status = U_ZERO_ERROR;
2872 
2873     size = uloc_getDisplayLanguage("qqq", "kl", name, UPRV_LENGTHOF(name), &status);
2874     (void)size;    /* Suppress set but not used warning. */
2875     if (status != U_USING_DEFAULT_WARNING) {
2876         log_err("For language \"qqq\" in locale \"kl\", expecting U_USING_DEFAULT_WARNING, but got %s\n",
2877             u_errorName(status));
2878     }
2879 }
2880 
2881 
2882 /**
2883  * Compare two locale IDs.  If they are equal, return 0.  If `string'
2884  * starts with `prefix' plus an additional element, that is, string ==
2885  * prefix + '_' + x, then return 1.  Otherwise return a value < 0.
2886  */
_loccmp(const char * string,const char * prefix)2887 static UBool _loccmp(const char* string, const char* prefix) {
2888     int32_t slen = (int32_t)uprv_strlen(string),
2889             plen = (int32_t)uprv_strlen(prefix);
2890     int32_t c = uprv_strncmp(string, prefix, plen);
2891     /* 'root' is less than everything */
2892     if (uprv_strcmp(prefix, "root") == 0) {
2893         return (uprv_strcmp(string, "root") == 0) ? 0 : 1;
2894     }
2895     if (c) return -1; /* mismatch */
2896     if (slen == plen) return 0;
2897     if (string[plen] == '_') return 1;
2898     return -2; /* false match, e.g. "en_USX" cmp "en_US" */
2899 }
2900 
_checklocs(const char * label,const char * req,const char * valid,const char * actual)2901 static void _checklocs(const char* label,
2902                        const char* req,
2903                        const char* valid,
2904                        const char* actual) {
2905     /* We want the valid to be strictly > the bogus requested locale,
2906        and the valid to be >= the actual. */
2907     if (_loccmp(req, valid) > 0 &&
2908         _loccmp(valid, actual) >= 0) {
2909         log_verbose("%s; req=%s, valid=%s, actual=%s\n",
2910                     label, req, valid, actual);
2911     } else {
2912         log_err("FAIL: %s; req=%s, valid=%s, actual=%s\n",
2913                 label, req, valid, actual);
2914     }
2915 }
2916 
TestGetLocale(void)2917 static void TestGetLocale(void) {
2918     UErrorCode ec = U_ZERO_ERROR;
2919     UParseError pe;
2920     UChar EMPTY[1] = {0};
2921 
2922     /* === udat === */
2923 #if !UCONFIG_NO_FORMATTING
2924     {
2925         UDateFormat *obj;
2926         const char *req = "en_US_REDWOODSHORES", *valid, *actual;
2927         obj = udat_open(UDAT_DEFAULT, UDAT_DEFAULT,
2928                         req,
2929                         NULL, 0,
2930                         NULL, 0, &ec);
2931         if (U_FAILURE(ec)) {
2932             log_data_err("udat_open failed.Error %s\n", u_errorName(ec));
2933             return;
2934         }
2935         valid = udat_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);
2936         actual = udat_getLocaleByType(obj, ULOC_ACTUAL_LOCALE, &ec);
2937         if (U_FAILURE(ec)) {
2938             log_err("udat_getLocaleByType() failed\n");
2939             return;
2940         }
2941         _checklocs("udat", req, valid, actual);
2942         udat_close(obj);
2943     }
2944 #endif
2945 
2946     /* === ucal === */
2947 #if !UCONFIG_NO_FORMATTING
2948     {
2949         UCalendar *obj;
2950         const char *req = "fr_FR_PROVENCAL", *valid, *actual;
2951         obj = ucal_open(NULL, 0,
2952                         req,
2953                         UCAL_GREGORIAN,
2954                         &ec);
2955         if (U_FAILURE(ec)) {
2956             log_err("ucal_open failed with error: %s\n", u_errorName(ec));
2957             return;
2958         }
2959         valid = ucal_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);
2960         actual = ucal_getLocaleByType(obj, ULOC_ACTUAL_LOCALE, &ec);
2961         if (U_FAILURE(ec)) {
2962             log_err("ucal_getLocaleByType() failed\n");
2963             return;
2964         }
2965         _checklocs("ucal", req, valid, actual);
2966         ucal_close(obj);
2967     }
2968 #endif
2969 
2970     /* === unum === */
2971 #if !UCONFIG_NO_FORMATTING
2972     {
2973         UNumberFormat *obj;
2974         const char *req = "zh_Hant_TW_TAINAN", *valid, *actual;
2975         obj = unum_open(UNUM_DECIMAL,
2976                         NULL, 0,
2977                         req,
2978                         &pe, &ec);
2979         if (U_FAILURE(ec)) {
2980             log_err("unum_open failed\n");
2981             return;
2982         }
2983         valid = unum_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);
2984         actual = unum_getLocaleByType(obj, ULOC_ACTUAL_LOCALE, &ec);
2985         if (U_FAILURE(ec)) {
2986             log_err("unum_getLocaleByType() failed\n");
2987             return;
2988         }
2989         _checklocs("unum", req, valid, actual);
2990         unum_close(obj);
2991     }
2992 #endif
2993 
2994     /* === umsg === */
2995 #if 0
2996     /* commented out by weiv 01/12/2005. umsg_getLocaleByType is to be removed */
2997 #if !UCONFIG_NO_FORMATTING
2998     {
2999         UMessageFormat *obj;
3000         const char *req = "ja_JP_TAKAYAMA", *valid, *actual;
3001         UBool test;
3002         obj = umsg_open(EMPTY, 0,
3003                         req,
3004                         &pe, &ec);
3005         if (U_FAILURE(ec)) {
3006             log_err("umsg_open failed\n");
3007             return;
3008         }
3009         valid = umsg_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);
3010         actual = umsg_getLocaleByType(obj, ULOC_ACTUAL_LOCALE, &ec);
3011         if (U_FAILURE(ec)) {
3012             log_err("umsg_getLocaleByType() failed\n");
3013             return;
3014         }
3015         /* We want the valid to be strictly > the bogus requested locale,
3016            and the valid to be >= the actual. */
3017         /* TODO MessageFormat is currently just storing the locale it is given.
3018            As a result, it will return whatever it was given, even if the
3019            locale is invalid. */
3020         test = (_cmpversion("3.2") <= 0) ?
3021             /* Here is the weakened test for 3.0: */
3022             (_loccmp(req, valid) >= 0) :
3023             /* Here is what the test line SHOULD be: */
3024             (_loccmp(req, valid) > 0);
3025 
3026         if (test &&
3027             _loccmp(valid, actual) >= 0) {
3028             log_verbose("umsg; req=%s, valid=%s, actual=%s\n", req, valid, actual);
3029         } else {
3030             log_err("FAIL: umsg; req=%s, valid=%s, actual=%s\n", req, valid, actual);
3031         }
3032         umsg_close(obj);
3033     }
3034 #endif
3035 #endif
3036 
3037     /* === ubrk === */
3038 #if !UCONFIG_NO_BREAK_ITERATION
3039     {
3040         UBreakIterator *obj;
3041         const char *req = "ar_KW_ABDALI", *valid, *actual;
3042         obj = ubrk_open(UBRK_WORD,
3043                         req,
3044                         EMPTY,
3045                         0,
3046                         &ec);
3047         if (U_FAILURE(ec)) {
3048             log_err("ubrk_open failed. Error: %s \n", u_errorName(ec));
3049             return;
3050         }
3051         valid = ubrk_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);
3052         actual = ubrk_getLocaleByType(obj, ULOC_ACTUAL_LOCALE, &ec);
3053         if (U_FAILURE(ec)) {
3054             log_err("ubrk_getLocaleByType() failed\n");
3055             return;
3056         }
3057         _checklocs("ubrk", req, valid, actual);
3058         ubrk_close(obj);
3059     }
3060 #endif
3061 
3062     /* === ucol === */
3063 #if !UCONFIG_NO_COLLATION
3064     {
3065         UCollator *obj;
3066         const char *req = "es_AR_BUENOSAIRES", *valid, *actual;
3067         obj = ucol_open(req, &ec);
3068         if (U_FAILURE(ec)) {
3069             log_err("ucol_open failed - %s\n", u_errorName(ec));
3070             return;
3071         }
3072         valid = ucol_getLocaleByType(obj, ULOC_VALID_LOCALE, &ec);
3073         actual = ucol_getLocaleByType(obj, ULOC_ACTUAL_LOCALE, &ec);
3074         if (U_FAILURE(ec)) {
3075             log_err("ucol_getLocaleByType() failed\n");
3076             return;
3077         }
3078         _checklocs("ucol", req, valid, actual);
3079         ucol_close(obj);
3080     }
3081 #endif
3082 }
TestEnglishExemplarCharacters(void)3083 static void TestEnglishExemplarCharacters(void) {
3084     UErrorCode status = U_ZERO_ERROR;
3085     int i;
3086     USet *exSet = NULL;
3087     UChar testChars[] = {
3088         0x61,   /* standard */
3089         0xE1,   /* auxiliary */
3090         0x41,   /* index */
3091         0x2D    /* punctuation */
3092     };
3093     ULocaleData *uld = ulocdata_open("en", &status);
3094     if (U_FAILURE(status)) {
3095         log_data_err("ulocdata_open() failed : %s - (Are you missing data?)\n", u_errorName(status));
3096         return;
3097     }
3098 
3099     for (i = 0; i < ULOCDATA_ES_COUNT; i++) {
3100         exSet = ulocdata_getExemplarSet(uld, exSet, 0, (ULocaleDataExemplarSetType)i, &status);
3101         if (U_FAILURE(status)) {
3102             log_err_status(status, "ulocdata_getExemplarSet() for type %d failed\n", i);
3103             status = U_ZERO_ERROR;
3104             continue;
3105         }
3106         if (!uset_contains(exSet, (UChar32)testChars[i])) {
3107             log_err("Character U+%04X is not included in exemplar type %d\n", testChars[i], i);
3108         }
3109     }
3110 
3111     uset_close(exSet);
3112     ulocdata_close(uld);
3113 }
3114 
TestNonexistentLanguageExemplars(void)3115 static void TestNonexistentLanguageExemplars(void) {
3116     /* JB 4068 - Nonexistent language */
3117     UErrorCode ec = U_ZERO_ERROR;
3118     ULocaleData *uld = ulocdata_open("qqq",&ec);
3119     if (ec != U_USING_DEFAULT_WARNING) {
3120         log_err_status(ec, "Exemplar set for \"qqq\", expecting U_USING_DEFAULT_WARNING, but got %s\n",
3121             u_errorName(ec));
3122     }
3123     uset_close(ulocdata_getExemplarSet(uld, NULL, 0, ULOCDATA_ES_STANDARD, &ec));
3124     ulocdata_close(uld);
3125 }
3126 
TestLocDataErrorCodeChaining(void)3127 static void TestLocDataErrorCodeChaining(void) {
3128     UErrorCode ec = U_USELESS_COLLATOR_ERROR;
3129     ulocdata_open(NULL, &ec);
3130     ulocdata_getExemplarSet(NULL, NULL, 0, ULOCDATA_ES_STANDARD, &ec);
3131     ulocdata_getDelimiter(NULL, ULOCDATA_DELIMITER_COUNT, NULL, -1, &ec);
3132     ulocdata_getMeasurementSystem(NULL, &ec);
3133     ulocdata_getPaperSize(NULL, NULL, NULL, &ec);
3134     if (ec != U_USELESS_COLLATOR_ERROR) {
3135         log_err("ulocdata API changed the error code to %s\n", u_errorName(ec));
3136     }
3137 }
3138 
3139 typedef struct {
3140     const char*        locale;
3141     UMeasurementSystem measureSys;
3142 } LocToMeasureSys;
3143 
3144 static const LocToMeasureSys locToMeasures[] = {
3145     { "fr_FR",            UMS_SI },
3146     { "en",               UMS_US },
3147     { "en_GB",            UMS_UK },
3148     { "fr_FR@rg=GBZZZZ",  UMS_UK },
3149     { "en@rg=frzzzz",     UMS_SI },
3150     { "en_GB@rg=USZZZZ",  UMS_US },
3151     { NULL, (UMeasurementSystem)0 } /* terminator */
3152 };
3153 
TestLocDataWithRgTag(void)3154 static void TestLocDataWithRgTag(void) {
3155     const  LocToMeasureSys* locToMeasurePtr = locToMeasures;
3156     for (; locToMeasurePtr->locale != NULL; locToMeasurePtr++) {
3157         UErrorCode status = U_ZERO_ERROR;
3158         UMeasurementSystem measureSys = ulocdata_getMeasurementSystem(locToMeasurePtr->locale, &status);
3159         if (U_FAILURE(status)) {
3160             log_data_err("ulocdata_getMeasurementSystem(\"%s\", ...) failed: %s - Are you missing data?\n",
3161                         locToMeasurePtr->locale, u_errorName(status));
3162         } else if (measureSys != locToMeasurePtr->measureSys) {
3163             log_err("ulocdata_getMeasurementSystem(\"%s\", ...), expected %d, got %d\n",
3164                         locToMeasurePtr->locale, (int) locToMeasurePtr->measureSys, (int)measureSys);
3165         }
3166     }
3167 }
3168 
TestLanguageExemplarsFallbacks(void)3169 static void TestLanguageExemplarsFallbacks(void) {
3170     /* Test that en_US fallsback, but en doesn't fallback. */
3171     UErrorCode ec = U_ZERO_ERROR;
3172     ULocaleData *uld = ulocdata_open("en_US",&ec);
3173     uset_close(ulocdata_getExemplarSet(uld, NULL, 0, ULOCDATA_ES_STANDARD, &ec));
3174     if (ec != U_USING_FALLBACK_WARNING) {
3175         log_err_status(ec, "Exemplar set for \"en_US\", expecting U_USING_FALLBACK_WARNING, but got %s\n",
3176             u_errorName(ec));
3177     }
3178     ulocdata_close(uld);
3179     ec = U_ZERO_ERROR;
3180     uld = ulocdata_open("en",&ec);
3181     uset_close(ulocdata_getExemplarSet(uld, NULL, 0, ULOCDATA_ES_STANDARD, &ec));
3182     if (ec != U_ZERO_ERROR) {
3183         log_err_status(ec, "Exemplar set for \"en\", expecting U_ZERO_ERROR, but got %s\n",
3184             u_errorName(ec));
3185     }
3186     ulocdata_close(uld);
3187 }
3188 
acceptResult(UAcceptResult uar)3189 static const char *acceptResult(UAcceptResult uar) {
3190     return  udbg_enumName(UDBG_UAcceptResult, uar);
3191 }
3192 
TestAcceptLanguage(void)3193 static void TestAcceptLanguage(void) {
3194     UErrorCode status = U_ZERO_ERROR;
3195     UAcceptResult outResult;
3196     UEnumeration *available;
3197     char tmp[200];
3198     int i;
3199     int32_t rc = 0;
3200 
3201     struct {
3202         int32_t httpSet;       /**< Which of http[] should be used? */
3203         const char *icuSet;    /**< ? */
3204         const char *expect;    /**< The expected locale result */
3205         UAcceptResult res;     /**< The expected error code */
3206         UErrorCode expectStatus; /**< expected status */
3207     } tests[] = {
3208         /*0*/{ 0, NULL, "mt_MT", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
3209         /*1*/{ 1, NULL, "en", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
3210         /*2*/{ 2, NULL, "en_GB", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},
3211         /*3*/{ 3, NULL, "", ULOC_ACCEPT_FAILED, U_ZERO_ERROR},
3212         /*4*/{ 4, NULL, "es", ULOC_ACCEPT_VALID, U_ZERO_ERROR},
3213         /*5*/{ 5, NULL, "zh", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},  /* XF */
3214         /*6*/{ 6, NULL, "ja", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},  /* XF */
3215         /*7*/{ 7, NULL, "zh", ULOC_ACCEPT_FALLBACK, U_ZERO_ERROR},  /* XF */
3216         /*8*/{ 8, NULL, "", ULOC_ACCEPT_FAILED, U_ILLEGAL_ARGUMENT_ERROR },  /*  */
3217         /*9*/{ 9, NULL, "", ULOC_ACCEPT_FAILED, U_ILLEGAL_ARGUMENT_ERROR },  /*  */
3218        /*10*/{10, NULL, "", ULOC_ACCEPT_FAILED, U_ILLEGAL_ARGUMENT_ERROR },  /*  */
3219        /*11*/{11, NULL, "", ULOC_ACCEPT_FAILED, U_ILLEGAL_ARGUMENT_ERROR },  /*  */
3220     };
3221     const int32_t numTests = UPRV_LENGTHOF(tests);
3222     static const char *http[] = {
3223         /*0*/ "mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, "
3224               "iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, "
3225               "es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, "
3226               "nl-nl;q=0.55, nl;q=0.53, th-th-traditional;q=0.01",
3227         /*1*/ "ja;q=0.5, en;q=0.8, tlh",
3228         /*2*/ "en-wf, de-lx;q=0.8",
3229         /*3*/ "mga-ie;q=0.9, sux",
3230         /*4*/ "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, "
3231               "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, "
3232               "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, "
3233               "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, "
3234               "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, "
3235               "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, "
3236               "xxx-yyy;q=0.01, xxx-yyy;q=0.01, xxx-yyy;q=0.01, xx-yy;q=0.1, "
3237               "es",
3238         /*5*/ "zh-xx;q=0.9, en;q=0.6",
3239         /*6*/ "ja-JA",
3240         /*7*/ "zh-xx;q=0.9",
3241        /*08*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3242               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3243               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3244               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 156
3245        /*09*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3246               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3247               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3248               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB", // 157 (this hits U_STRING_NOT_TERMINATED_WARNING )
3249        /*10*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3250               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3251               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3252               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABC", // 158
3253        /*11*/ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3254               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3255               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
3256               "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", // 163 bytes
3257     };
3258 
3259     for(i=0;i<numTests;i++) {
3260         outResult = -3;
3261         status=U_ZERO_ERROR;
3262         log_verbose("test #%d: http[%s], ICU[%s], expect %s, %s\n",
3263             i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res));
3264 
3265         available = ures_openAvailableLocales(tests[i].icuSet, &status);
3266         tmp[0]=0;
3267         rc = uloc_acceptLanguageFromHTTP(tmp, 199, &outResult,
3268                                          http[tests[i].httpSet], available, &status);
3269         (void)rc;    /* Suppress set but not used warning. */
3270         uenum_close(available);
3271         log_verbose(" got %s, %s [%s]\n",
3272                     tmp[0]?tmp:"(EMPTY)", acceptResult(outResult), u_errorName(status));
3273         if(status != tests[i].expectStatus) {
3274           log_err_status(status,
3275                          "FAIL: expected status %s but got %s\n",
3276                          u_errorName(tests[i].expectStatus),
3277                          u_errorName(status));
3278         } else if(U_SUCCESS(tests[i].expectStatus)) {
3279             /* don't check content if expected failure */
3280             if(outResult != tests[i].res) {
3281             log_err_status(status, "FAIL: #%d: expected outResult of %s but got %s\n", i,
3282                 acceptResult( tests[i].res),
3283                 acceptResult( outResult));
3284             log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n",
3285                      i, http[tests[i].httpSet], tests[i].icuSet,
3286                      tests[i].expect,acceptResult(tests[i].res));
3287             }
3288             if((outResult>0)&&uprv_strcmp(tmp, tests[i].expect)) {
3289               log_err_status(status,
3290                              "FAIL: #%d: expected %s but got %s\n",
3291                              i, tests[i].expect, tmp);
3292               log_info("test #%d: http[%s], ICU[%s], expect %s, %s\n",
3293                        i, http[tests[i].httpSet], tests[i].icuSet, tests[i].expect, acceptResult(tests[i].res));
3294             }
3295         }
3296     }
3297 
3298     // API coverage
3299     status = U_ZERO_ERROR;
3300     static const char *const supported[] = { "en-US", "en-GB", "de-DE", "ja-JP" };
3301     const char * desired[] = { "de-LI", "en-IN", "zu", "fr" };
3302     available = uenum_openCharStringsEnumeration(supported, UPRV_LENGTHOF(supported), &status);
3303     tmp[0]=0;
3304     rc = uloc_acceptLanguage(tmp, 199, &outResult, desired, UPRV_LENGTHOF(desired), available, &status);
3305     if (U_FAILURE(status) || rc != 5 || uprv_strcmp(tmp, "de_DE") != 0 || outResult == ULOC_ACCEPT_FAILED) {
3306         log_err("uloc_acceptLanguage() failed to do a simple match\n");
3307     }
3308     uenum_close(available);
3309 }
3310 
3311 static const char* LOCALE_ALIAS[][2] = {
3312     {"in", "id"},
3313     {"in_ID", "id_ID"},
3314     {"iw", "he"},
3315     {"iw_IL", "he_IL"},
3316     {"ji", "yi"},
3317     {"en_BU", "en_MM"},
3318     {"en_DY", "en_BJ"},
3319     {"en_HV", "en_BF"},
3320     {"en_NH", "en_VU"},
3321     {"en_RH", "en_ZW"},
3322     {"en_TP", "en_TL"},
3323     {"en_ZR", "en_CD"}
3324 };
isLocaleAvailable(UResourceBundle * resIndex,const char * loc)3325 static UBool isLocaleAvailable(UResourceBundle* resIndex, const char* loc){
3326     UErrorCode status = U_ZERO_ERROR;
3327     int32_t len = 0;
3328     ures_getStringByKey(resIndex, loc,&len, &status);
3329     if(U_FAILURE(status)){
3330         return false;
3331     }
3332     return true;
3333 }
3334 
TestCalendar(void)3335 static void TestCalendar(void) {
3336 #if !UCONFIG_NO_FORMATTING
3337     int i;
3338     UErrorCode status = U_ZERO_ERROR;
3339     UResourceBundle *resIndex = ures_open(NULL,"res_index", &status);
3340     if(U_FAILURE(status)){
3341         log_err_status(status, "Could not open res_index.res. Exiting. Error: %s\n", u_errorName(status));
3342         return;
3343     }
3344     for (i=0; i<UPRV_LENGTHOF(LOCALE_ALIAS); i++) {
3345         const char* oldLoc = LOCALE_ALIAS[i][0];
3346         const char* newLoc = LOCALE_ALIAS[i][1];
3347         UCalendar* c1 = NULL;
3348         UCalendar* c2 = NULL;
3349 
3350         /*Test function "getLocale(ULocale.VALID_LOCALE)"*/
3351         const char* l1 = ucal_getLocaleByType(c1, ULOC_VALID_LOCALE, &status);
3352         const char* l2 = ucal_getLocaleByType(c2, ULOC_VALID_LOCALE, &status);
3353 
3354         if(!isLocaleAvailable(resIndex, newLoc)){
3355             continue;
3356         }
3357         c1 = ucal_open(NULL, -1, oldLoc, UCAL_GREGORIAN, &status);
3358         c2 = ucal_open(NULL, -1, newLoc, UCAL_GREGORIAN, &status);
3359 
3360         if (strcmp(newLoc,l1)!=0 || strcmp(l1,l2)!=0 || status!=U_ZERO_ERROR) {
3361             log_err("The locales are not equal!.Old: %s, New: %s \n", oldLoc, newLoc);
3362         }
3363         log_verbose("ucal_getLocaleByType old:%s   new:%s\n", l1, l2);
3364         ucal_close(c1);
3365         ucal_close(c2);
3366     }
3367     ures_close(resIndex);
3368 #endif
3369 }
3370 
TestDateFormat(void)3371 static void TestDateFormat(void) {
3372 #if !UCONFIG_NO_FORMATTING
3373     int i;
3374     UErrorCode status = U_ZERO_ERROR;
3375     UResourceBundle *resIndex = ures_open(NULL,"res_index", &status);
3376     if(U_FAILURE(status)){
3377         log_err_status(status, "Could not open res_index.res. Exiting. Error: %s\n", u_errorName(status));
3378         return;
3379     }
3380     for (i=0; i<UPRV_LENGTHOF(LOCALE_ALIAS); i++) {
3381         const char* oldLoc = LOCALE_ALIAS[i][0];
3382         const char* newLoc = LOCALE_ALIAS[i][1];
3383         UDateFormat* df1 = NULL;
3384         UDateFormat* df2 = NULL;
3385         const char* l1 = NULL;
3386         const char* l2 = NULL;
3387 
3388         if(!isLocaleAvailable(resIndex, newLoc)){
3389             continue;
3390         }
3391         df1 = udat_open(UDAT_FULL, UDAT_FULL,oldLoc, NULL, 0, NULL, -1, &status);
3392         df2 = udat_open(UDAT_FULL, UDAT_FULL,newLoc, NULL, 0, NULL, -1, &status);
3393         if(U_FAILURE(status)){
3394             log_err("Creation of date format failed  %s\n", u_errorName(status));
3395             return;
3396         }
3397         /*Test function "getLocale"*/
3398         l1 = udat_getLocaleByType(df1, ULOC_VALID_LOCALE, &status);
3399         l2 = udat_getLocaleByType(df2, ULOC_VALID_LOCALE, &status);
3400         if(U_FAILURE(status)){
3401             log_err("Fetching the locale by type failed.  %s\n", u_errorName(status));
3402         }
3403         if (strcmp(newLoc,l1)!=0 || strcmp(l1,l2)!=0) {
3404             log_err("The locales are not equal!.Old: %s, New: %s \n", oldLoc, newLoc);
3405         }
3406         log_verbose("udat_getLocaleByType old:%s   new:%s\n", l1, l2);
3407         udat_close(df1);
3408         udat_close(df2);
3409     }
3410     ures_close(resIndex);
3411 #endif
3412 }
3413 
TestCollation(void)3414 static void TestCollation(void) {
3415 #if !UCONFIG_NO_COLLATION
3416     int i;
3417     UErrorCode status = U_ZERO_ERROR;
3418     UResourceBundle *resIndex = ures_open(NULL,"res_index", &status);
3419     if(U_FAILURE(status)){
3420         log_err_status(status, "Could not open res_index.res. Exiting. Error: %s\n", u_errorName(status));
3421         return;
3422     }
3423     for (i=0; i<UPRV_LENGTHOF(LOCALE_ALIAS); i++) {
3424         const char* oldLoc = LOCALE_ALIAS[i][0];
3425         const char* newLoc = LOCALE_ALIAS[i][1];
3426         UCollator* c1 = NULL;
3427         UCollator* c2 = NULL;
3428         const char* l1 = NULL;
3429         const char* l2 = NULL;
3430 
3431         status = U_ZERO_ERROR;
3432         if(!isLocaleAvailable(resIndex, newLoc)){
3433             continue;
3434         }
3435         if(U_FAILURE(status)){
3436             log_err("Creation of collators failed  %s\n", u_errorName(status));
3437             return;
3438         }
3439         c1 = ucol_open(oldLoc, &status);
3440         c2 = ucol_open(newLoc, &status);
3441         l1 = ucol_getLocaleByType(c1, ULOC_VALID_LOCALE, &status);
3442         l2 = ucol_getLocaleByType(c2, ULOC_VALID_LOCALE, &status);
3443         if(U_FAILURE(status)){
3444             log_err("Fetching the locale names failed failed  %s\n", u_errorName(status));
3445         }
3446         if (strcmp(newLoc,l1)!=0 || strcmp(l1,l2)!=0) {
3447             log_err("The locales are not equal!.Old: %s, New: %s \n", oldLoc, newLoc);
3448         }
3449         log_verbose("ucol_getLocaleByType old:%s   new:%s\n", l1, l2);
3450         ucol_close(c1);
3451         ucol_close(c2);
3452     }
3453     ures_close(resIndex);
3454 #endif
3455 }
3456 
3457 typedef struct OrientationStructTag {
3458     const char* localeId;
3459     ULayoutType character;
3460     ULayoutType line;
3461 } OrientationStruct;
3462 
ULayoutTypeToString(ULayoutType type)3463 static const char* ULayoutTypeToString(ULayoutType type)
3464 {
3465     switch(type)
3466     {
3467     case ULOC_LAYOUT_LTR:
3468         return "ULOC_LAYOUT_LTR";
3469         break;
3470     case ULOC_LAYOUT_RTL:
3471         return "ULOC_LAYOUT_RTL";
3472         break;
3473     case ULOC_LAYOUT_TTB:
3474         return "ULOC_LAYOUT_TTB";
3475         break;
3476     case ULOC_LAYOUT_BTT:
3477         return "ULOC_LAYOUT_BTT";
3478         break;
3479     case ULOC_LAYOUT_UNKNOWN:
3480         break;
3481     }
3482 
3483     return "Unknown enum value for ULayoutType!";
3484 }
3485 
TestOrientation(void)3486 static void  TestOrientation(void)
3487 {
3488     static const OrientationStruct toTest [] = {
3489         { "ar", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3490         { "aR", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3491         { "ar_Arab", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3492         { "fa", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3493         { "Fa", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3494         { "he", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3495         { "ps", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3496         { "ur", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3497         { "UR", ULOC_LAYOUT_RTL, ULOC_LAYOUT_TTB },
3498         { "en", ULOC_LAYOUT_LTR, ULOC_LAYOUT_TTB }
3499     };
3500 
3501     size_t i = 0;
3502     for (; i < UPRV_LENGTHOF(toTest); ++i) {
3503         UErrorCode statusCO = U_ZERO_ERROR;
3504         UErrorCode statusLO = U_ZERO_ERROR;
3505         const char* const localeId = toTest[i].localeId;
3506         const ULayoutType co = uloc_getCharacterOrientation(localeId, &statusCO);
3507         const ULayoutType expectedCO = toTest[i].character;
3508         const ULayoutType lo = uloc_getLineOrientation(localeId, &statusLO);
3509         const ULayoutType expectedLO = toTest[i].line;
3510         if (U_FAILURE(statusCO)) {
3511             log_err_status(statusCO,
3512                 "  unexpected failure for uloc_getCharacterOrientation(), with localId \"%s\" and status %s\n",
3513                 localeId,
3514                 u_errorName(statusCO));
3515         }
3516         else if (co != expectedCO) {
3517             log_err(
3518                 "  unexpected result for uloc_getCharacterOrientation(), with localeId \"%s\". Expected %s but got result %s\n",
3519                 localeId,
3520                 ULayoutTypeToString(expectedCO),
3521                 ULayoutTypeToString(co));
3522         }
3523         if (U_FAILURE(statusLO)) {
3524             log_err_status(statusLO,
3525                 "  unexpected failure for uloc_getLineOrientation(), with localId \"%s\" and status %s\n",
3526                 localeId,
3527                 u_errorName(statusLO));
3528         }
3529         else if (lo != expectedLO) {
3530             log_err(
3531                 "  unexpected result for uloc_getLineOrientation(), with localeId \"%s\". Expected %s but got result %s\n",
3532                 localeId,
3533                 ULayoutTypeToString(expectedLO),
3534                 ULayoutTypeToString(lo));
3535         }
3536     }
3537 }
3538 
TestULocale(void)3539 static void  TestULocale(void) {
3540     int i;
3541     UErrorCode status = U_ZERO_ERROR;
3542     UResourceBundle *resIndex = ures_open(NULL,"res_index", &status);
3543     if(U_FAILURE(status)){
3544         log_err_status(status, "Could not open res_index.res. Exiting. Error: %s\n", u_errorName(status));
3545         return;
3546     }
3547     for (i=0; i<UPRV_LENGTHOF(LOCALE_ALIAS); i++) {
3548         const char* oldLoc = LOCALE_ALIAS[i][0];
3549         const char* newLoc = LOCALE_ALIAS[i][1];
3550         UChar name1[256], name2[256];
3551         char names1[256], names2[256];
3552         int32_t capacity = 256;
3553 
3554         status = U_ZERO_ERROR;
3555         if(!isLocaleAvailable(resIndex, newLoc)){
3556             continue;
3557         }
3558         uloc_getDisplayName(oldLoc, ULOC_US, name1, capacity, &status);
3559         if(U_FAILURE(status)){
3560             log_err("uloc_getDisplayName(%s) failed %s\n", oldLoc, u_errorName(status));
3561         }
3562 
3563         uloc_getDisplayName(newLoc, ULOC_US, name2, capacity, &status);
3564         if(U_FAILURE(status)){
3565             log_err("uloc_getDisplayName(%s) failed %s\n", newLoc, u_errorName(status));
3566         }
3567 
3568         if (u_strcmp(name1, name2)!=0) {
3569             log_err("The locales are not equal!.Old: %s, New: %s \n", oldLoc, newLoc);
3570         }
3571         u_austrcpy(names1, name1);
3572         u_austrcpy(names2, name2);
3573         log_verbose("uloc_getDisplayName old:%s   new:%s\n", names1, names2);
3574     }
3575     ures_close(resIndex);
3576 
3577 }
3578 
TestUResourceBundle(void)3579 static void TestUResourceBundle(void) {
3580     const char* us1;
3581     const char* us2;
3582 
3583     UResourceBundle* rb1 = NULL;
3584     UResourceBundle* rb2 = NULL;
3585     UErrorCode status = U_ZERO_ERROR;
3586     int i;
3587     UResourceBundle *resIndex = NULL;
3588     if(U_FAILURE(status)){
3589         log_err("Could not open res_index.res. Exiting. Error: %s\n", u_errorName(status));
3590         return;
3591     }
3592     resIndex = ures_open(NULL,"res_index", &status);
3593     for (i=0; i<UPRV_LENGTHOF(LOCALE_ALIAS); i++) {
3594 
3595         const char* oldLoc = LOCALE_ALIAS[i][0];
3596         const char* newLoc = LOCALE_ALIAS[i][1];
3597         if(!isLocaleAvailable(resIndex, newLoc)){
3598             continue;
3599         }
3600         rb1 = ures_open(NULL, oldLoc, &status);
3601         if (U_FAILURE(status)) {
3602             log_err("ures_open(%s) failed %s\n", oldLoc, u_errorName(status));
3603         }
3604 
3605         us1 = ures_getLocaleByType(rb1, ULOC_ACTUAL_LOCALE, &status);
3606 
3607         status = U_ZERO_ERROR;
3608         rb2 = ures_open(NULL, newLoc, &status);
3609         if (U_FAILURE(status)) {
3610             log_err("ures_open(%s) failed %s\n", oldLoc, u_errorName(status));
3611         }
3612         us2 = ures_getLocaleByType(rb2, ULOC_ACTUAL_LOCALE, &status);
3613 
3614         if (strcmp(us1,newLoc)!=0 || strcmp(us1,us2)!=0 ) {
3615             log_err("The locales are not equal!.Old: %s, New: %s \n", oldLoc, newLoc);
3616         }
3617 
3618         log_verbose("ures_getStringByKey old:%s   new:%s\n", us1, us2);
3619         ures_close(rb1);
3620         rb1 = NULL;
3621         ures_close(rb2);
3622         rb2 = NULL;
3623     }
3624     ures_close(resIndex);
3625 }
3626 
TestDisplayName(void)3627 static void TestDisplayName(void) {
3628 
3629     UChar oldCountry[256] = {'\0'};
3630     UChar newCountry[256] = {'\0'};
3631     UChar oldLang[256] = {'\0'};
3632     UChar newLang[256] = {'\0'};
3633     char country[256] ={'\0'};
3634     char language[256] ={'\0'};
3635     int32_t capacity = 256;
3636     int i =0;
3637     int j=0;
3638     for (i=0; i<UPRV_LENGTHOF(LOCALE_ALIAS); i++) {
3639         const char* oldLoc = LOCALE_ALIAS[i][0];
3640         const char* newLoc = LOCALE_ALIAS[i][1];
3641         UErrorCode status = U_ZERO_ERROR;
3642         int32_t available = uloc_countAvailable();
3643 
3644         for(j=0; j<available; j++){
3645 
3646             const char* dispLoc = uloc_getAvailable(j);
3647             int32_t oldCountryLen = uloc_getDisplayCountry(oldLoc,dispLoc, oldCountry, capacity, &status);
3648             int32_t newCountryLen = uloc_getDisplayCountry(newLoc, dispLoc, newCountry, capacity, &status);
3649             int32_t oldLangLen = uloc_getDisplayLanguage(oldLoc, dispLoc, oldLang, capacity, &status);
3650             int32_t newLangLen = uloc_getDisplayLanguage(newLoc, dispLoc, newLang, capacity, &status );
3651 
3652             int32_t countryLen = uloc_getCountry(newLoc, country, capacity, &status);
3653             int32_t langLen  = uloc_getLanguage(newLoc, language, capacity, &status);
3654             /* there is a display name for the current country ID */
3655             if(countryLen != newCountryLen ){
3656                 if(u_strncmp(oldCountry,newCountry,oldCountryLen)!=0){
3657                     log_err("uloc_getDisplayCountry() failed for %s in display locale %s \n", oldLoc, dispLoc);
3658                 }
3659             }
3660             /* there is a display name for the current lang ID */
3661             if(langLen!=newLangLen){
3662                 if(u_strncmp(oldLang,newLang,oldLangLen)){
3663                     log_err("uloc_getDisplayLanguage() failed for %s in display locale %s \n", oldLoc, dispLoc);                }
3664             }
3665         }
3666     }
3667 }
3668 
TestGetLocaleForLCID(void)3669 static void TestGetLocaleForLCID(void) {
3670     int32_t i, length, lengthPre;
3671     const char* testLocale = 0;
3672     UErrorCode status = U_ZERO_ERROR;
3673     char            temp2[40], temp3[40];
3674     uint32_t lcid;
3675 
3676     lcid = uloc_getLCID("en_US");
3677     if (lcid != 0x0409) {
3678         log_err("  uloc_getLCID(\"en_US\") = %d, expected 0x0409\n", lcid);
3679     }
3680 
3681     lengthPre = uloc_getLocaleForLCID(lcid, temp2, 4, &status);
3682     if (status != U_BUFFER_OVERFLOW_ERROR) {
3683         log_err("  unexpected result from uloc_getLocaleForLCID with small buffer: %s\n", u_errorName(status));
3684     }
3685     else {
3686         status = U_ZERO_ERROR;
3687     }
3688 
3689     length = uloc_getLocaleForLCID(lcid, temp2, UPRV_LENGTHOF(temp2), &status);
3690     if (U_FAILURE(status)) {
3691         log_err("  unexpected result from uloc_getLocaleForLCID(0x0409): %s\n", u_errorName(status));
3692         status = U_ZERO_ERROR;
3693     }
3694 
3695     if (length != lengthPre) {
3696         log_err("  uloc_getLocaleForLCID(0x0409): returned length %d does not match preflight length %d\n", length, lengthPre);
3697     }
3698 
3699     length = uloc_getLocaleForLCID(0x12345, temp2, UPRV_LENGTHOF(temp2), &status);
3700     if (U_SUCCESS(status)) {
3701         log_err("  unexpected result from uloc_getLocaleForLCID(0x12345): %s, status %s\n", temp2, u_errorName(status));
3702     }
3703     status = U_ZERO_ERROR;
3704 
3705     log_verbose("Testing getLocaleForLCID vs. locale data\n");
3706     for (i = 0; i < LOCALE_SIZE; i++) {
3707 
3708         testLocale=rawData2[NAME][i];
3709 
3710         log_verbose("Testing   %s ......\n", testLocale);
3711 
3712         sscanf(rawData2[LCID][i], "%x", &lcid);
3713         length = uloc_getLocaleForLCID(lcid, temp2, UPRV_LENGTHOF(temp2), &status);
3714         if (U_FAILURE(status)) {
3715             log_err("  unexpected failure of uloc_getLocaleForLCID(%#04x), status %s\n", lcid, u_errorName(status));
3716             status = U_ZERO_ERROR;
3717             continue;
3718         }
3719 
3720         if (length != (int32_t)uprv_strlen(temp2)) {
3721             log_err("  returned length %d not correct for uloc_getLocaleForLCID(%#04x), expected %d\n", length, lcid, uprv_strlen(temp2));
3722         }
3723 
3724         /* Compare language, country, script */
3725         length = uloc_getLanguage(temp2, temp3, UPRV_LENGTHOF(temp3), &status);
3726         if (U_FAILURE(status)) {
3727             log_err("  couldn't get language in uloc_getLocaleForLCID(%#04x) = %s, status %s\n", lcid, temp2, u_errorName(status));
3728             status = U_ZERO_ERROR;
3729         }
3730         else if (uprv_strcmp(temp3, rawData2[LANG][i]) && !(uprv_strcmp(temp3, "nn") == 0 && uprv_strcmp(rawData2[VAR][i], "NY") == 0)) {
3731             log_err("  language doesn't match expected %s in in uloc_getLocaleForLCID(%#04x) = %s\n", rawData2[LANG][i], lcid, temp2);
3732         }
3733 
3734         length = uloc_getScript(temp2, temp3, UPRV_LENGTHOF(temp3), &status);
3735         if (U_FAILURE(status)) {
3736             log_err("  couldn't get script in uloc_getLocaleForLCID(%#04x) = %s, status %s\n", lcid, temp2, u_errorName(status));
3737             status = U_ZERO_ERROR;
3738         }
3739         else if (uprv_strcmp(temp3, rawData2[SCRIPT][i])) {
3740             log_err("  script doesn't match expected %s in in uloc_getLocaleForLCID(%#04x) = %s\n", rawData2[SCRIPT][i], lcid, temp2);
3741         }
3742 
3743         length = uloc_getCountry(temp2, temp3, UPRV_LENGTHOF(temp3), &status);
3744         if (U_FAILURE(status)) {
3745             log_err("  couldn't get country in uloc_getLocaleForLCID(%#04x) = %s, status %s\n", lcid, temp2, u_errorName(status));
3746             status = U_ZERO_ERROR;
3747         }
3748         else if (uprv_strlen(rawData2[CTRY][i]) && uprv_strcmp(temp3, rawData2[CTRY][i])) {
3749             log_err("  country doesn't match expected %s in in uloc_getLocaleForLCID(%#04x) = %s\n", rawData2[CTRY][i], lcid, temp2);
3750         }
3751     }
3752 
3753 }
3754 
3755 const char* const basic_maximize_data[][2] = {
3756   {
3757     "zu_Zzzz_Zz",
3758     "zu_Latn_ZA",
3759   }, {
3760     "ZU_Zz",
3761     "zu_Latn_ZA"
3762   }, {
3763     "zu_LATN",
3764     "zu_Latn_ZA"
3765   }, {
3766     "en_Zz",
3767     "en_Latn_US"
3768   }, {
3769     "en_us",
3770     "en_Latn_US"
3771   }, {
3772     "en_Kore",
3773     "en_Kore_US"
3774   }, {
3775     "en_Kore_Zz",
3776     "en_Kore_US"
3777   }, {
3778     "en_Kore_ZA",
3779     "en_Kore_ZA"
3780   }, {
3781     "en_Kore_ZA_POSIX",
3782     "en_Kore_ZA_POSIX"
3783   }, {
3784     "en_Gujr",
3785     "en_Gujr_US"
3786   }, {
3787     "en_ZA",
3788     "en_Latn_ZA"
3789   }, {
3790     "en_Gujr_Zz",
3791     "en_Gujr_US"
3792   }, {
3793     "en_Gujr_ZA",
3794     "en_Gujr_ZA"
3795   }, {
3796     "en_Gujr_ZA_POSIX",
3797     "en_Gujr_ZA_POSIX"
3798   }, {
3799     "en_US_POSIX_1901",
3800     "en_Latn_US_POSIX_1901"
3801   }, {
3802     "en_Latn__POSIX_1901",
3803     "en_Latn_US_POSIX_1901"
3804   }, {
3805     "en__POSIX_1901",
3806     "en_Latn_US_POSIX_1901"
3807   }, {
3808     "de__POSIX_1901",
3809     "de_Latn_DE_POSIX_1901"
3810   }, {
3811     "en_US_BOSTON",
3812     "en_Latn_US_BOSTON"
3813   }, {
3814     "th@calendar=buddhist",
3815     "th_Thai_TH@calendar=buddhist"
3816   }, {
3817     "ar_ZZ",
3818     "ar_Arab_EG"
3819   }, {
3820     "zh",
3821     "zh_Hans_CN"
3822   }, {
3823     "zh_TW",
3824     "zh_Hant_TW"
3825   }, {
3826     "zh_HK",
3827     "zh_Hant_HK"
3828   }, {
3829     "zh_Hant",
3830     "zh_Hant_TW"
3831   }, {
3832     "zh_Zzzz_CN",
3833     "zh_Hans_CN"
3834   }, {
3835     "und_US",
3836     "en_Latn_US"
3837   }, {
3838     "und_HK",
3839     "zh_Hant_HK"
3840   }, {
3841     "zzz",
3842     ""
3843   }, {
3844      "de_u_co_phonebk",
3845      "de_Latn_DE@collation=phonebook"
3846   }, {
3847      "de_Latn_u_co_phonebk",
3848       "de_Latn_DE@collation=phonebook"
3849   }, {
3850      "de_Latn_DE_u_co_phonebk",
3851       "de_Latn_DE@collation=phonebook"
3852   }, {
3853     "_Arab@em=emoji",
3854     "ar_Arab_EG@em=emoji"
3855   }, {
3856     "_Latn@em=emoji",
3857     "en_Latn_US@em=emoji"
3858   }, {
3859     "_Latn_DE@em=emoji",
3860     "de_Latn_DE@em=emoji"
3861   }, {
3862     "_Zzzz_DE@em=emoji",
3863     "de_Latn_DE@em=emoji"
3864   }, {
3865     "_DE@em=emoji",
3866     "de_Latn_DE@em=emoji"
3867   }, {
3868     // ICU-22547
3869     // unicode_language_id = "root" |
3870     //   (unicode_language_subtag (sep unicode_script_subtag)?  | unicode_script_subtag)
3871     //     (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
3872     // so "aaaa" is a well-formed unicode_language_id
3873     "aaaa",
3874     "aaaa",
3875   }, {
3876     // ICU-22727
3877     // unicode_language_subtag = alpha{2,3} | alpha{5,8};
3878     // so "bbbbb", "cccccc", "ddddddd", "eeeeeeee" are
3879     // well-formed unicode_language_subtag and therefore
3880     // well-formed unicode_language_id
3881     // but "fffffffff" is not.
3882     "bbbbb",
3883     "bbbbb",
3884   }, {
3885     // ICU-22727
3886     "cccccc",
3887     "cccccc",
3888   }, {
3889     // ICU-22727
3890     "ddddddd",
3891     "ddddddd",
3892   }, {
3893     // ICU-22727
3894     "eeeeeeee",
3895     "eeeeeeee",
3896   }, {
3897     // ICU-22546
3898     "und-Zzzz",
3899     "en_Latn_US" // If change, please also update common/unicode/uloc.h
3900   }, {
3901     // ICU-22546
3902     "en",
3903     "en_Latn_US" // If change, please also update common/unicode/uloc.h
3904   }, {
3905     // ICU-22546
3906     "de",
3907     "de_Latn_DE" // If change, please also update common/unicode/uloc.h
3908   }, {
3909     // ICU-22546
3910     "sr",
3911     "sr_Cyrl_RS" // If change, please also update common/unicode/uloc.h
3912   }, {
3913     // ICU-22546
3914     "sh",
3915     "sh" // If change, please also update common/unicode/uloc.h
3916   }, {
3917     // ICU-22546
3918     "zh_Hani",
3919     "zh_Hani_CN" // If change, please also update common/unicode/uloc.h
3920   }, {
3921     // ICU-22545 & ICU-22742
3922     "en_XA",
3923     "en_Latn_XA"
3924   }, {
3925     // ICU-22545 & ICU-22742
3926     "ar_XB",
3927     "ar_Arab_XB"
3928   }, {
3929     // ICU-22545 & ICU-22742
3930     "ru_XC",
3931     "ru_Cyrl_XC"
3932   }
3933 };
3934 
3935 const char* const basic_minimize_data[][2] = {
3936   {
3937     "en_Latn_US",
3938     "en"
3939   }, {
3940     "en_Latn_US_POSIX_1901",
3941     "en__POSIX_1901"
3942   }, {
3943     "EN_Latn_US_POSIX_1901",
3944     "en__POSIX_1901"
3945   }, {
3946     "en_Zzzz_US_POSIX_1901",
3947     "en__POSIX_1901"
3948   }, {
3949     "de_Latn_DE_POSIX_1901",
3950     "de__POSIX_1901"
3951   }, {
3952     "zzz",
3953     ""
3954   }, {
3955     "en_Latn_US@calendar=gregorian",
3956     "en@calendar=gregorian"
3957   }
3958 };
3959 
3960 const char* const full_data[][3] = {
3961   {
3962     /*   "FROM", */
3963     /*   "ADD-LIKELY", */
3964     /*   "REMOVE-LIKELY" */
3965     /* }, { */
3966     "aa",
3967     "aa_Latn_ET",
3968     "aa"
3969   }, {
3970     "af",
3971     "af_Latn_ZA",
3972     "af"
3973   }, {
3974     "ak",
3975     "ak_Latn_GH",
3976     "ak"
3977   }, {
3978     "am",
3979     "am_Ethi_ET",
3980     "am"
3981   }, {
3982     "ar",
3983     "ar_Arab_EG",
3984     "ar"
3985   }, {
3986     "as",
3987     "as_Beng_IN",
3988     "as"
3989   }, {
3990     "az",
3991     "az_Latn_AZ",
3992     "az"
3993   }, {
3994     "be",
3995     "be_Cyrl_BY",
3996     "be"
3997   }, {
3998     "bg",
3999     "bg_Cyrl_BG",
4000     "bg"
4001   }, {
4002     "bn",
4003     "bn_Beng_BD",
4004     "bn"
4005   }, {
4006     "bo",
4007     "bo_Tibt_CN",
4008     "bo"
4009   }, {
4010     "bs",
4011     "bs_Latn_BA",
4012     "bs"
4013   }, {
4014     "ca",
4015     "ca_Latn_ES",
4016     "ca"
4017   }, {
4018     "ch",
4019     "ch_Latn_GU",
4020     "ch"
4021   }, {
4022     "chk",
4023     "chk_Latn_FM",
4024     "chk"
4025   }, {
4026     "cs",
4027     "cs_Latn_CZ",
4028     "cs"
4029   }, {
4030     "cy",
4031     "cy_Latn_GB",
4032     "cy"
4033   }, {
4034     "da",
4035     "da_Latn_DK",
4036     "da"
4037   }, {
4038     "de",
4039     "de_Latn_DE",
4040     "de"
4041   }, {
4042     "dv",
4043     "dv_Thaa_MV",
4044     "dv"
4045   }, {
4046     "dz",
4047     "dz_Tibt_BT",
4048     "dz"
4049   }, {
4050     "ee",
4051     "ee_Latn_GH",
4052     "ee"
4053   }, {
4054     "el",
4055     "el_Grek_GR",
4056     "el"
4057   }, {
4058     "en",
4059     "en_Latn_US",
4060     "en"
4061   }, {
4062     "es",
4063     "es_Latn_ES",
4064     "es"
4065   }, {
4066     "et",
4067     "et_Latn_EE",
4068     "et"
4069   }, {
4070     "eu",
4071     "eu_Latn_ES",
4072     "eu"
4073   }, {
4074     "fa",
4075     "fa_Arab_IR",
4076     "fa"
4077   }, {
4078     "fi",
4079     "fi_Latn_FI",
4080     "fi"
4081   }, {
4082     "fil",
4083     "fil_Latn_PH",
4084     "fil"
4085   }, {
4086     "fo",
4087     "fo_Latn_FO",
4088     "fo"
4089   }, {
4090     "fr",
4091     "fr_Latn_FR",
4092     "fr"
4093   }, {
4094     "fur",
4095     "fur_Latn_IT",
4096     "fur"
4097   }, {
4098     "ga",
4099     "ga_Latn_IE",
4100     "ga"
4101   }, {
4102     "gaa",
4103     "gaa_Latn_GH",
4104     "gaa"
4105   }, {
4106     "gl",
4107     "gl_Latn_ES",
4108     "gl"
4109   }, {
4110     "gn",
4111     "gn_Latn_PY",
4112     "gn"
4113   }, {
4114     "gu",
4115     "gu_Gujr_IN",
4116     "gu"
4117   }, {
4118     "ha",
4119     "ha_Latn_NG",
4120     "ha"
4121   }, {
4122     "haw",
4123     "haw_Latn_US",
4124     "haw"
4125   }, {
4126     "he",
4127     "he_Hebr_IL",
4128     "he"
4129   }, {
4130     "hi",
4131     "hi_Deva_IN",
4132     "hi"
4133   }, {
4134     "hr",
4135     "hr_Latn_HR",
4136     "hr"
4137   }, {
4138     "ht",
4139     "ht_Latn_HT",
4140     "ht"
4141   }, {
4142     "hu",
4143     "hu_Latn_HU",
4144     "hu"
4145   }, {
4146     "hy",
4147     "hy_Armn_AM",
4148     "hy"
4149   }, {
4150     "id",
4151     "id_Latn_ID",
4152     "id"
4153   }, {
4154     "ig",
4155     "ig_Latn_NG",
4156     "ig"
4157   }, {
4158     "ii",
4159     "ii_Yiii_CN",
4160     "ii"
4161   }, {
4162     "is",
4163     "is_Latn_IS",
4164     "is"
4165   }, {
4166     "it",
4167     "it_Latn_IT",
4168     "it"
4169   }, {
4170     "ja",
4171     "ja_Jpan_JP",
4172     "ja"
4173   }, {
4174     "ka",
4175     "ka_Geor_GE",
4176     "ka"
4177   }, {
4178     "kaj",
4179     "kaj_Latn_NG",
4180     "kaj"
4181   }, {
4182     "kam",
4183     "kam_Latn_KE",
4184     "kam"
4185   }, {
4186     "kk",
4187     "kk_Cyrl_KZ",
4188     "kk"
4189   }, {
4190     "kl",
4191     "kl_Latn_GL",
4192     "kl"
4193   }, {
4194     "km",
4195     "km_Khmr_KH",
4196     "km"
4197   }, {
4198     "kn",
4199     "kn_Knda_IN",
4200     "kn"
4201   }, {
4202     "ko",
4203     "ko_Kore_KR",
4204     "ko"
4205   }, {
4206     "kok",
4207     "kok_Deva_IN",
4208     "kok"
4209   }, {
4210     "kpe",
4211     "kpe_Latn_LR",
4212     "kpe"
4213   }, {
4214     "ku",
4215     "ku_Latn_TR",
4216     "ku"
4217   }, {
4218     "ky",
4219     "ky_Cyrl_KG",
4220     "ky"
4221   }, {
4222     "la",
4223     "la_Latn_VA",
4224     "la"
4225   }, {
4226     "ln",
4227     "ln_Latn_CD",
4228     "ln"
4229   }, {
4230     "lo",
4231     "lo_Laoo_LA",
4232     "lo"
4233   }, {
4234     "lt",
4235     "lt_Latn_LT",
4236     "lt"
4237   }, {
4238     "lv",
4239     "lv_Latn_LV",
4240     "lv"
4241   }, {
4242     "mg",
4243     "mg_Latn_MG",
4244     "mg"
4245   }, {
4246     "mh",
4247     "mh_Latn_MH",
4248     "mh"
4249   }, {
4250     "mk",
4251     "mk_Cyrl_MK",
4252     "mk"
4253   }, {
4254     "ml",
4255     "ml_Mlym_IN",
4256     "ml"
4257   }, {
4258     "mn",
4259     "mn_Cyrl_MN",
4260     "mn"
4261   }, {
4262     "mr",
4263     "mr_Deva_IN",
4264     "mr"
4265   }, {
4266     "ms",
4267     "ms_Latn_MY",
4268     "ms"
4269   }, {
4270     "mt",
4271     "mt_Latn_MT",
4272     "mt"
4273   }, {
4274     "my",
4275     "my_Mymr_MM",
4276     "my"
4277   }, {
4278     "na",
4279     "na_Latn_NR",
4280     "na"
4281   }, {
4282     "ne",
4283     "ne_Deva_NP",
4284     "ne"
4285   }, {
4286     "niu",
4287     "niu_Latn_NU",
4288     "niu"
4289   }, {
4290     "nl",
4291     "nl_Latn_NL",
4292     "nl"
4293   }, {
4294     "nn",
4295     "nn_Latn_NO",
4296     "nn"
4297   }, {
4298     "no",
4299     "no_Latn_NO",
4300     "no"
4301   }, {
4302     "nr",
4303     "nr_Latn_ZA",
4304     "nr"
4305   }, {
4306     "nso",
4307     "nso_Latn_ZA",
4308     "nso"
4309   }, {
4310     "ny",
4311     "ny_Latn_MW",
4312     "ny"
4313   }, {
4314     "om",
4315     "om_Latn_ET",
4316     "om"
4317   }, {
4318     "or",
4319     "or_Orya_IN",
4320     "or"
4321   }, {
4322     "pa",
4323     "pa_Guru_IN",
4324     "pa"
4325   }, {
4326     "pa_Arab",
4327     "pa_Arab_PK",
4328     "pa_PK"
4329   }, {
4330     "pa_PK",
4331     "pa_Arab_PK",
4332     "pa_PK"
4333   }, {
4334     "pap",
4335     "pap_Latn_CW",
4336     "pap"
4337   }, {
4338     "pau",
4339     "pau_Latn_PW",
4340     "pau"
4341   }, {
4342     "pl",
4343     "pl_Latn_PL",
4344     "pl"
4345   }, {
4346     "ps",
4347     "ps_Arab_AF",
4348     "ps"
4349   }, {
4350     "pt",
4351     "pt_Latn_BR",
4352     "pt"
4353   }, {
4354     "rn",
4355     "rn_Latn_BI",
4356     "rn"
4357   }, {
4358     "ro",
4359     "ro_Latn_RO",
4360     "ro"
4361   }, {
4362     "ru",
4363     "ru_Cyrl_RU",
4364     "ru"
4365   }, {
4366     "rw",
4367     "rw_Latn_RW",
4368     "rw"
4369   }, {
4370     "sa",
4371     "sa_Deva_IN",
4372     "sa"
4373   }, {
4374     "se",
4375     "se_Latn_NO",
4376     "se"
4377   }, {
4378     "sg",
4379     "sg_Latn_CF",
4380     "sg"
4381   }, {
4382     "si",
4383     "si_Sinh_LK",
4384     "si"
4385   }, {
4386     "sid",
4387     "sid_Latn_ET",
4388     "sid"
4389   }, {
4390     "sk",
4391     "sk_Latn_SK",
4392     "sk"
4393   }, {
4394     "sl",
4395     "sl_Latn_SI",
4396     "sl"
4397   }, {
4398     "sm",
4399     "sm_Latn_WS",
4400     "sm"
4401   }, {
4402     "so",
4403     "so_Latn_SO",
4404     "so"
4405   }, {
4406     "sq",
4407     "sq_Latn_AL",
4408     "sq"
4409   }, {
4410     "sr",
4411     "sr_Cyrl_RS",
4412     "sr"
4413   }, {
4414     "ss",
4415     "ss_Latn_ZA",
4416     "ss"
4417   }, {
4418     "st",
4419     "st_Latn_ZA",
4420     "st"
4421   }, {
4422     "sv",
4423     "sv_Latn_SE",
4424     "sv"
4425   }, {
4426     "sw",
4427     "sw_Latn_TZ",
4428     "sw"
4429   }, {
4430     "ta",
4431     "ta_Taml_IN",
4432     "ta"
4433   }, {
4434     "te",
4435     "te_Telu_IN",
4436     "te"
4437   }, {
4438     "tet",
4439     "tet_Latn_TL",
4440     "tet"
4441   }, {
4442     "tg",
4443     "tg_Cyrl_TJ",
4444     "tg"
4445   }, {
4446     "th",
4447     "th_Thai_TH",
4448     "th"
4449   }, {
4450     "ti",
4451     "ti_Ethi_ET",
4452     "ti"
4453   }, {
4454     "tig",
4455     "tig_Ethi_ER",
4456     "tig"
4457   }, {
4458     "tk",
4459     "tk_Latn_TM",
4460     "tk"
4461   }, {
4462     "tkl",
4463     "tkl_Latn_TK",
4464     "tkl"
4465   }, {
4466     "tn",
4467     "tn_Latn_ZA",
4468     "tn"
4469   }, {
4470     "to",
4471     "to_Latn_TO",
4472     "to"
4473   }, {
4474     "tpi",
4475     "tpi_Latn_PG",
4476     "tpi"
4477   }, {
4478     "tr",
4479     "tr_Latn_TR",
4480     "tr"
4481   }, {
4482     "ts",
4483     "ts_Latn_ZA",
4484     "ts"
4485   }, {
4486     "tt",
4487     "tt_Cyrl_RU",
4488     "tt"
4489   }, {
4490     "tvl",
4491     "tvl_Latn_TV",
4492     "tvl"
4493   }, {
4494     "ty",
4495     "ty_Latn_PF",
4496     "ty"
4497   }, {
4498     "uk",
4499     "uk_Cyrl_UA",
4500     "uk"
4501   }, {
4502     "und",
4503     "en_Latn_US",
4504     "en"
4505   }, {
4506     "und_AD",
4507     "ca_Latn_AD",
4508     "ca_AD"
4509   }, {
4510     "und_AE",
4511     "ar_Arab_AE",
4512     "ar_AE"
4513   }, {
4514     "und_AF",
4515     "fa_Arab_AF",
4516     "fa_AF"
4517   }, {
4518     "und_AL",
4519     "sq_Latn_AL",
4520     "sq"
4521   }, {
4522     "und_AM",
4523     "hy_Armn_AM",
4524     "hy"
4525   }, {
4526     "und_AO",
4527     "pt_Latn_AO",
4528     "pt_AO"
4529   }, {
4530     "und_AR",
4531     "es_Latn_AR",
4532     "es_AR"
4533   }, {
4534     "und_AS",
4535     "sm_Latn_AS",
4536     "sm_AS"
4537   }, {
4538     "und_AT",
4539     "de_Latn_AT",
4540     "de_AT"
4541   }, {
4542     "und_AW",
4543     "nl_Latn_AW",
4544     "nl_AW"
4545   }, {
4546     "und_AX",
4547     "sv_Latn_AX",
4548     "sv_AX"
4549   }, {
4550     "und_AZ",
4551     "az_Latn_AZ",
4552     "az"
4553   }, {
4554     "und_Arab",
4555     "ar_Arab_EG",
4556     "ar"
4557   }, {
4558     "und_Arab_IN",
4559     "ur_Arab_IN",
4560     "ur_IN"
4561   }, {
4562     "und_Arab_PK",
4563     "ur_Arab_PK",
4564     "ur"
4565   }, {
4566     "und_Arab_SN",
4567     "ar_Arab_SN",
4568     "ar_SN"
4569   }, {
4570     "und_Armn",
4571     "hy_Armn_AM",
4572     "hy"
4573   }, {
4574     "und_BA",
4575     "bs_Latn_BA",
4576     "bs"
4577   }, {
4578     "und_BD",
4579     "bn_Beng_BD",
4580     "bn"
4581   }, {
4582     "und_BE",
4583     "nl_Latn_BE",
4584     "nl_BE"
4585   }, {
4586     "und_BF",
4587     "fr_Latn_BF",
4588     "fr_BF"
4589   }, {
4590     "und_BG",
4591     "bg_Cyrl_BG",
4592     "bg"
4593   }, {
4594     "und_BH",
4595     "ar_Arab_BH",
4596     "ar_BH"
4597   }, {
4598     "und_BI",
4599     "rn_Latn_BI",
4600     "rn"
4601   }, {
4602     "und_BJ",
4603     "fr_Latn_BJ",
4604     "fr_BJ"
4605   }, {
4606     "und_BN",
4607     "ms_Latn_BN",
4608     "ms_BN"
4609   }, {
4610     "und_BO",
4611     "es_Latn_BO",
4612     "es_BO"
4613   }, {
4614     "und_BR",
4615     "pt_Latn_BR",
4616     "pt"
4617   }, {
4618     "und_BT",
4619     "dz_Tibt_BT",
4620     "dz"
4621   }, {
4622     "und_BY",
4623     "be_Cyrl_BY",
4624     "be"
4625   }, {
4626     "und_Beng",
4627     "bn_Beng_BD",
4628     "bn"
4629   }, {
4630     "und_Beng_IN",
4631     "bn_Beng_IN",
4632     "bn_IN"
4633   }, {
4634     "und_CD",
4635     "sw_Latn_CD",
4636     "sw_CD"
4637   }, {
4638     "und_CF",
4639     "fr_Latn_CF",
4640     "fr_CF"
4641   }, {
4642     "und_CG",
4643     "fr_Latn_CG",
4644     "fr_CG"
4645   }, {
4646     "und_CH",
4647     "de_Latn_CH",
4648     "de_CH"
4649   }, {
4650     "und_CI",
4651     "fr_Latn_CI",
4652     "fr_CI"
4653   }, {
4654     "und_CL",
4655     "es_Latn_CL",
4656     "es_CL"
4657   }, {
4658     "und_CM",
4659     "fr_Latn_CM",
4660     "fr_CM"
4661   }, {
4662     "und_CN",
4663     "zh_Hans_CN",
4664     "zh"
4665   }, {
4666     "und_CO",
4667     "es_Latn_CO",
4668     "es_CO"
4669   }, {
4670     "und_CR",
4671     "es_Latn_CR",
4672     "es_CR"
4673   }, {
4674     "und_CU",
4675     "es_Latn_CU",
4676     "es_CU"
4677   }, {
4678     "und_CV",
4679     "pt_Latn_CV",
4680     "pt_CV"
4681   }, {
4682     "und_CY",
4683     "el_Grek_CY",
4684     "el_CY"
4685   }, {
4686     "und_CZ",
4687     "cs_Latn_CZ",
4688     "cs"
4689   }, {
4690     "und_Cher",
4691     "chr_Cher_US",
4692     "chr"
4693   }, {
4694     "und_Cyrl",
4695     "ru_Cyrl_RU",
4696     "ru"
4697   }, {
4698     "und_Cyrl_KZ",
4699     "ru_Cyrl_KZ",
4700     "ru_KZ"
4701   }, {
4702     "und_DE",
4703     "de_Latn_DE",
4704     "de"
4705   }, {
4706     "und_DJ",
4707     "aa_Latn_DJ",
4708     "aa_DJ"
4709   }, {
4710     "und_DK",
4711     "da_Latn_DK",
4712     "da"
4713   }, {
4714     "und_DO",
4715     "es_Latn_DO",
4716     "es_DO"
4717   }, {
4718     "und_DZ",
4719     "ar_Arab_DZ",
4720     "ar_DZ"
4721   }, {
4722     "und_Deva",
4723     "hi_Deva_IN",
4724     "hi"
4725   }, {
4726     "und_EC",
4727     "es_Latn_EC",
4728     "es_EC"
4729   }, {
4730     "und_EE",
4731     "et_Latn_EE",
4732     "et"
4733   }, {
4734     "und_EG",
4735     "ar_Arab_EG",
4736     "ar"
4737   }, {
4738     "und_EH",
4739     "ar_Arab_EH",
4740     "ar_EH"
4741   }, {
4742     "und_ER",
4743     "ti_Ethi_ER",
4744     "ti_ER"
4745   }, {
4746     "und_ES",
4747     "es_Latn_ES",
4748     "es"
4749   }, {
4750     "und_ET",
4751     "am_Ethi_ET",
4752     "am"
4753   }, {
4754     "und_Ethi",
4755     "am_Ethi_ET",
4756     "am"
4757   }, {
4758     "und_Ethi_ER",
4759     "ti_Ethi_ER",
4760     "ti_ER"
4761   }, {
4762     "und_FI",
4763     "fi_Latn_FI",
4764     "fi"
4765   }, {
4766     "und_FM",
4767     "en_Latn_FM",
4768     "en_FM"
4769   }, {
4770     "und_FO",
4771     "fo_Latn_FO",
4772     "fo"
4773   }, {
4774     "und_FR",
4775     "fr_Latn_FR",
4776     "fr"
4777   }, {
4778     "und_GA",
4779     "fr_Latn_GA",
4780     "fr_GA"
4781   }, {
4782     "und_GE",
4783     "ka_Geor_GE",
4784     "ka"
4785   }, {
4786     "und_GF",
4787     "fr_Latn_GF",
4788     "fr_GF"
4789   }, {
4790     "und_GL",
4791     "kl_Latn_GL",
4792     "kl"
4793   }, {
4794     "und_GN",
4795     "fr_Latn_GN",
4796     "fr_GN"
4797   }, {
4798     "und_GP",
4799     "fr_Latn_GP",
4800     "fr_GP"
4801   }, {
4802     "und_GQ",
4803     "es_Latn_GQ",
4804     "es_GQ"
4805   }, {
4806     "und_GR",
4807     "el_Grek_GR",
4808     "el"
4809   }, {
4810     "und_GT",
4811     "es_Latn_GT",
4812     "es_GT"
4813   }, {
4814     "und_GU",
4815     "en_Latn_GU",
4816     "en_GU"
4817   }, {
4818     "und_GW",
4819     "pt_Latn_GW",
4820     "pt_GW"
4821   }, {
4822     "und_Geor",
4823     "ka_Geor_GE",
4824     "ka"
4825   }, {
4826     "und_Grek",
4827     "el_Grek_GR",
4828     "el"
4829   }, {
4830     "und_Gujr",
4831     "gu_Gujr_IN",
4832     "gu"
4833   }, {
4834     "und_Guru",
4835     "pa_Guru_IN",
4836     "pa"
4837   }, {
4838     "und_HK",
4839     "zh_Hant_HK",
4840     "zh_HK"
4841   }, {
4842     "und_HN",
4843     "es_Latn_HN",
4844     "es_HN"
4845   }, {
4846     "und_HR",
4847     "hr_Latn_HR",
4848     "hr"
4849   }, {
4850     "und_HT",
4851     "ht_Latn_HT",
4852     "ht"
4853   }, {
4854     "und_HU",
4855     "hu_Latn_HU",
4856     "hu"
4857   }, {
4858     "und_Hani",
4859     "zh_Hani_CN",
4860     "zh_Hani"
4861   }, {
4862     "und_Hans",
4863     "zh_Hans_CN",
4864     "zh"
4865   }, {
4866     "und_Hant",
4867     "zh_Hant_TW",
4868     "zh_TW"
4869   }, {
4870     "und_Hebr",
4871     "he_Hebr_IL",
4872     "he"
4873   }, {
4874     "und_IL",
4875     "he_Hebr_IL",
4876     "he"
4877   }, {
4878     "und_IN",
4879     "hi_Deva_IN",
4880     "hi"
4881   }, {
4882     "und_IQ",
4883     "ar_Arab_IQ",
4884     "ar_IQ"
4885   }, {
4886     "und_IR",
4887     "fa_Arab_IR",
4888     "fa"
4889   }, {
4890     "und_IS",
4891     "is_Latn_IS",
4892     "is"
4893   }, {
4894     "und_IT",
4895     "it_Latn_IT",
4896     "it"
4897   }, {
4898     "und_JO",
4899     "ar_Arab_JO",
4900     "ar_JO"
4901   }, {
4902     "und_JP",
4903     "ja_Jpan_JP",
4904     "ja"
4905   }, {
4906     "und_Jpan",
4907     "ja_Jpan_JP",
4908     "ja"
4909   }, {
4910     "und_KG",
4911     "ky_Cyrl_KG",
4912     "ky"
4913   }, {
4914     "und_KH",
4915     "km_Khmr_KH",
4916     "km"
4917   }, {
4918     "und_KM",
4919     "ar_Arab_KM",
4920     "ar_KM"
4921   }, {
4922     "und_KP",
4923     "ko_Kore_KP",
4924     "ko_KP"
4925   }, {
4926     "und_KR",
4927     "ko_Kore_KR",
4928     "ko"
4929   }, {
4930     "und_KW",
4931     "ar_Arab_KW",
4932     "ar_KW"
4933   }, {
4934     "und_KZ",
4935     "ru_Cyrl_KZ",
4936     "ru_KZ"
4937   }, {
4938     "und_Khmr",
4939     "km_Khmr_KH",
4940     "km"
4941   }, {
4942     "und_Knda",
4943     "kn_Knda_IN",
4944     "kn"
4945   }, {
4946     "und_Kore",
4947     "ko_Kore_KR",
4948     "ko"
4949   }, {
4950     "und_LA",
4951     "lo_Laoo_LA",
4952     "lo"
4953   }, {
4954     "und_LB",
4955     "ar_Arab_LB",
4956     "ar_LB"
4957   }, {
4958     "und_LI",
4959     "de_Latn_LI",
4960     "de_LI"
4961   }, {
4962     "und_LK",
4963     "si_Sinh_LK",
4964     "si"
4965   }, {
4966     "und_LS",
4967     "st_Latn_LS",
4968     "st_LS"
4969   }, {
4970     "und_LT",
4971     "lt_Latn_LT",
4972     "lt"
4973   }, {
4974     "und_LU",
4975     "fr_Latn_LU",
4976     "fr_LU"
4977   }, {
4978     "und_LV",
4979     "lv_Latn_LV",
4980     "lv"
4981   }, {
4982     "und_LY",
4983     "ar_Arab_LY",
4984     "ar_LY"
4985   }, {
4986     "und_Laoo",
4987     "lo_Laoo_LA",
4988     "lo"
4989   }, {
4990     "und_Latn_ES",
4991     "es_Latn_ES",
4992     "es"
4993   }, {
4994     "und_Latn_ET",
4995     "en_Latn_ET",
4996     "en_ET"
4997   }, {
4998     "und_Latn_GB",
4999     "en_Latn_GB",
5000     "en_GB"
5001   }, {
5002     "und_Latn_GH",
5003     "ak_Latn_GH",
5004     "ak"
5005   }, {
5006     "und_Latn_ID",
5007     "id_Latn_ID",
5008     "id"
5009   }, {
5010     "und_Latn_IT",
5011     "it_Latn_IT",
5012     "it"
5013   }, {
5014     "und_Latn_NG",
5015     "en_Latn_NG",
5016     "en_NG"
5017   }, {
5018     "und_Latn_TR",
5019     "tr_Latn_TR",
5020     "tr"
5021   }, {
5022     "und_Latn_ZA",
5023     "en_Latn_ZA",
5024     "en_ZA"
5025   }, {
5026     "und_MA",
5027     "ar_Arab_MA",
5028     "ar_MA"
5029   }, {
5030     "und_MC",
5031     "fr_Latn_MC",
5032     "fr_MC"
5033   }, {
5034     "und_MD",
5035     "ro_Latn_MD",
5036     "ro_MD"
5037   }, {
5038     "und_ME",
5039     "sr_Latn_ME",
5040     "sr_ME"
5041   }, {
5042     "und_MG",
5043     "mg_Latn_MG",
5044     "mg"
5045   }, {
5046     "und_MH",
5047     "en_Latn_MH",
5048     "en_MH"
5049   }, {
5050     "und_MK",
5051     "mk_Cyrl_MK",
5052     "mk"
5053   }, {
5054     "und_ML",
5055     "bm_Latn_ML",
5056     "bm"
5057   }, {
5058     "und_MM",
5059     "my_Mymr_MM",
5060     "my"
5061   }, {
5062     "und_MN",
5063     "mn_Cyrl_MN",
5064     "mn"
5065   }, {
5066     "und_MO",
5067     "zh_Hant_MO",
5068     "zh_MO"
5069   }, {
5070     "und_MQ",
5071     "fr_Latn_MQ",
5072     "fr_MQ"
5073   }, {
5074     "und_MR",
5075     "ar_Arab_MR",
5076     "ar_MR"
5077   }, {
5078     "und_MT",
5079     "mt_Latn_MT",
5080     "mt"
5081   }, {
5082     "und_MV",
5083     "dv_Thaa_MV",
5084     "dv"
5085   }, {
5086     "und_MW",
5087     "en_Latn_MW",
5088     "en_MW"
5089   }, {
5090     "und_MX",
5091     "es_Latn_MX",
5092     "es_MX"
5093   }, {
5094     "und_MY",
5095     "ms_Latn_MY",
5096     "ms"
5097   }, {
5098     "und_MZ",
5099     "pt_Latn_MZ",
5100     "pt_MZ"
5101   }, {
5102     "und_Mlym",
5103     "ml_Mlym_IN",
5104     "ml"
5105   }, {
5106     "und_Mymr",
5107     "my_Mymr_MM",
5108     "my"
5109   }, {
5110     "und_NC",
5111     "fr_Latn_NC",
5112     "fr_NC"
5113   }, {
5114     "und_NE",
5115     "ha_Latn_NE",
5116     "ha_NE"
5117   }, {
5118     "und_NG",
5119     "en_Latn_NG",
5120     "en_NG"
5121   }, {
5122     "und_NI",
5123     "es_Latn_NI",
5124     "es_NI"
5125   }, {
5126     "und_NL",
5127     "nl_Latn_NL",
5128     "nl"
5129   }, {
5130     "und_NO",
5131     "nb_Latn_NO",
5132     "nb"
5133   }, {
5134     "und_NP",
5135     "ne_Deva_NP",
5136     "ne"
5137   }, {
5138     "und_NR",
5139     "en_Latn_NR",
5140     "en_NR"
5141   }, {
5142     "und_NU",
5143     "en_Latn_NU",
5144     "en_NU"
5145   }, {
5146     "und_OM",
5147     "ar_Arab_OM",
5148     "ar_OM"
5149   }, {
5150     "und_Orya",
5151     "or_Orya_IN",
5152     "or"
5153   }, {
5154     "und_PA",
5155     "es_Latn_PA",
5156     "es_PA"
5157   }, {
5158     "und_PE",
5159     "es_Latn_PE",
5160     "es_PE"
5161   }, {
5162     "und_PF",
5163     "fr_Latn_PF",
5164     "fr_PF"
5165   }, {
5166     "und_PG",
5167     "tpi_Latn_PG",
5168     "tpi"
5169   }, {
5170     "und_PH",
5171     "fil_Latn_PH",
5172     "fil"
5173   }, {
5174     "und_PL",
5175     "pl_Latn_PL",
5176     "pl"
5177   }, {
5178     "und_PM",
5179     "fr_Latn_PM",
5180     "fr_PM"
5181   }, {
5182     "und_PR",
5183     "es_Latn_PR",
5184     "es_PR"
5185   }, {
5186     "und_PS",
5187     "ar_Arab_PS",
5188     "ar_PS"
5189   }, {
5190     "und_PT",
5191     "pt_Latn_PT",
5192     "pt_PT"
5193   }, {
5194     "und_PW",
5195     "pau_Latn_PW",
5196     "pau"
5197   }, {
5198     "und_PY",
5199     "gn_Latn_PY",
5200     "gn"
5201   }, {
5202     "und_QA",
5203     "ar_Arab_QA",
5204     "ar_QA"
5205   }, {
5206     "und_RE",
5207     "fr_Latn_RE",
5208     "fr_RE"
5209   }, {
5210     "und_RO",
5211     "ro_Latn_RO",
5212     "ro"
5213   }, {
5214     "und_RS",
5215     "sr_Cyrl_RS",
5216     "sr"
5217   }, {
5218     "und_RU",
5219     "ru_Cyrl_RU",
5220     "ru"
5221   }, {
5222     "und_RW",
5223     "rw_Latn_RW",
5224     "rw"
5225   }, {
5226     "und_SA",
5227     "ar_Arab_SA",
5228     "ar_SA"
5229   }, {
5230     "und_SD",
5231     "ar_Arab_SD",
5232     "ar_SD"
5233   }, {
5234     "und_SE",
5235     "sv_Latn_SE",
5236     "sv"
5237   }, {
5238     "und_SG",
5239     "en_Latn_SG",
5240     "en_SG"
5241   }, {
5242     "und_SI",
5243     "sl_Latn_SI",
5244     "sl"
5245   }, {
5246     "und_SJ",
5247     "nb_Latn_SJ",
5248     "nb_SJ"
5249   }, {
5250     "und_SK",
5251     "sk_Latn_SK",
5252     "sk"
5253   }, {
5254     "und_SM",
5255     "it_Latn_SM",
5256     "it_SM"
5257   }, {
5258     "und_SN",
5259     "fr_Latn_SN",
5260     "fr_SN"
5261   }, {
5262     "und_SO",
5263     "so_Latn_SO",
5264     "so"
5265   }, {
5266     "und_SR",
5267     "nl_Latn_SR",
5268     "nl_SR"
5269   }, {
5270     "und_ST",
5271     "pt_Latn_ST",
5272     "pt_ST"
5273   }, {
5274     "und_SV",
5275     "es_Latn_SV",
5276     "es_SV"
5277   }, {
5278     "und_SY",
5279     "ar_Arab_SY",
5280     "ar_SY"
5281   }, {
5282     "und_Sinh",
5283     "si_Sinh_LK",
5284     "si"
5285   }, {
5286     "und_TD",
5287     "fr_Latn_TD",
5288     "fr_TD"
5289   }, {
5290     "und_TG",
5291     "fr_Latn_TG",
5292     "fr_TG"
5293   }, {
5294     "und_TH",
5295     "th_Thai_TH",
5296     "th"
5297   }, {
5298     "und_TJ",
5299     "tg_Cyrl_TJ",
5300     "tg"
5301   }, {
5302     "und_TK",
5303     "tkl_Latn_TK",
5304     "tkl"
5305   }, {
5306     "und_TL",
5307     "pt_Latn_TL",
5308     "pt_TL"
5309   }, {
5310     "und_TM",
5311     "tk_Latn_TM",
5312     "tk"
5313   }, {
5314     "und_TN",
5315     "ar_Arab_TN",
5316     "ar_TN"
5317   }, {
5318     "und_TO",
5319     "to_Latn_TO",
5320     "to"
5321   }, {
5322     "und_TR",
5323     "tr_Latn_TR",
5324     "tr"
5325   }, {
5326     "und_TV",
5327     "tvl_Latn_TV",
5328     "tvl"
5329   }, {
5330     "und_TW",
5331     "zh_Hant_TW",
5332     "zh_TW"
5333   }, {
5334     "und_Taml",
5335     "ta_Taml_IN",
5336     "ta"
5337   }, {
5338     "und_Telu",
5339     "te_Telu_IN",
5340     "te"
5341   }, {
5342     "und_Thaa",
5343     "dv_Thaa_MV",
5344     "dv"
5345   }, {
5346     "und_Thai",
5347     "th_Thai_TH",
5348     "th"
5349   }, {
5350     "und_Tibt",
5351     "bo_Tibt_CN",
5352     "bo"
5353   }, {
5354     "und_UA",
5355     "uk_Cyrl_UA",
5356     "uk"
5357   }, {
5358     "und_UY",
5359     "es_Latn_UY",
5360     "es_UY"
5361   }, {
5362     "und_UZ",
5363     "uz_Latn_UZ",
5364     "uz"
5365   }, {
5366     "und_VA",
5367     "it_Latn_VA",
5368     "it_VA"
5369   }, {
5370     "und_VE",
5371     "es_Latn_VE",
5372     "es_VE"
5373   }, {
5374     "und_VN",
5375     "vi_Latn_VN",
5376     "vi"
5377   }, {
5378     "und_VU",
5379     "bi_Latn_VU",
5380     "bi"
5381   }, {
5382     "und_WF",
5383     "fr_Latn_WF",
5384     "fr_WF"
5385   }, {
5386     "und_WS",
5387     "sm_Latn_WS",
5388     "sm"
5389   }, {
5390     "und_YE",
5391     "ar_Arab_YE",
5392     "ar_YE"
5393   }, {
5394     "und_YT",
5395     "fr_Latn_YT",
5396     "fr_YT"
5397   }, {
5398     "und_Yiii",
5399     "ii_Yiii_CN",
5400     "ii"
5401   }, {
5402     "ur",
5403     "ur_Arab_PK",
5404     "ur"
5405   }, {
5406     "uz",
5407     "uz_Latn_UZ",
5408     "uz"
5409   }, {
5410     "uz_AF",
5411     "uz_Arab_AF",
5412     "uz_AF"
5413   }, {
5414     "uz_Arab",
5415     "uz_Arab_AF",
5416     "uz_AF"
5417   }, {
5418     "ve",
5419     "ve_Latn_ZA",
5420     "ve"
5421   }, {
5422     "vi",
5423     "vi_Latn_VN",
5424     "vi"
5425   }, {
5426     "wal",
5427     "wal_Ethi_ET",
5428     "wal"
5429   }, {
5430     "wo",
5431     "wo_Latn_SN",
5432     "wo"
5433   }, {
5434     "xh",
5435     "xh_Latn_ZA",
5436     "xh"
5437   }, {
5438     "yo",
5439     "yo_Latn_NG",
5440     "yo"
5441   }, {
5442     "zh",
5443     "zh_Hans_CN",
5444     "zh"
5445   }, {
5446     "zh_HK",
5447     "zh_Hant_HK",
5448     "zh_HK"
5449   }, {
5450     "zh_Hani",
5451     "zh_Hani_CN", /* changed due to cldrbug 6204, may be an error */
5452     "zh_Hani", /* changed due to cldrbug 6204, may be an error */
5453   }, {
5454     "zh_Hant",
5455     "zh_Hant_TW",
5456     "zh_TW"
5457   }, {
5458     "zh_MO",
5459     "zh_Hant_MO",
5460     "zh_MO"
5461   }, {
5462     "zh_TW",
5463     "zh_Hant_TW",
5464     "zh_TW"
5465   }, {
5466     "zu",
5467     "zu_Latn_ZA",
5468     "zu"
5469   }, {
5470     "und",
5471     "en_Latn_US",
5472     "en"
5473   }, {
5474     "und_ZZ",
5475     "en_Latn_US",
5476     "en"
5477   }, {
5478     "und_CN",
5479     "zh_Hans_CN",
5480     "zh"
5481   }, {
5482     "und_TW",
5483     "zh_Hant_TW",
5484     "zh_TW"
5485   }, {
5486     "und_HK",
5487     "zh_Hant_HK",
5488     "zh_HK"
5489   }, {
5490     "und_AQ",
5491     "en_Latn_AQ",
5492     "en_AQ"
5493   }, {
5494     "und_Zzzz",
5495     "en_Latn_US",
5496     "en"
5497   }, {
5498     "und_Zzzz_ZZ",
5499     "en_Latn_US",
5500     "en"
5501   }, {
5502     "und_Zzzz_CN",
5503     "zh_Hans_CN",
5504     "zh"
5505   }, {
5506     "und_Zzzz_TW",
5507     "zh_Hant_TW",
5508     "zh_TW"
5509   }, {
5510     "und_Zzzz_HK",
5511     "zh_Hant_HK",
5512     "zh_HK"
5513   }, {
5514     "und_Zzzz_AQ",
5515     "en_Latn_AQ",
5516     "en_AQ"
5517   }, {
5518     "und_Latn",
5519     "en_Latn_US",
5520     "en"
5521   }, {
5522     "und_Latn_ZZ",
5523     "en_Latn_US",
5524     "en"
5525   }, {
5526     "und_Latn_CN",
5527     "za_Latn_CN",
5528     "za"
5529   }, {
5530     "und_Latn_TW",
5531     "trv_Latn_TW",
5532     "trv"
5533   }, {
5534     "und_Latn_HK",
5535     "en_Latn_HK",
5536     "en_HK"
5537   }, {
5538     "und_Latn_AQ",
5539     "en_Latn_AQ",
5540     "en_AQ"
5541   }, {
5542     "und_Hans",
5543     "zh_Hans_CN",
5544     "zh"
5545   }, {
5546     "und_Hans_ZZ",
5547     "zh_Hans_CN",
5548     "zh"
5549   }, {
5550     "und_Hans_CN",
5551     "zh_Hans_CN",
5552     "zh"
5553   }, {
5554     "und_Hans_TW",
5555     "zh_Hans_TW",
5556     "zh_Hans_TW"
5557   }, {
5558     "und_Hans_HK",
5559     "zh_Hans_HK",
5560     "zh_Hans_HK"
5561   }, {
5562     "und_Hans_AQ",
5563     "zh_Hans_AQ",
5564     "zh_AQ"
5565   }, {
5566     "und_Hant",
5567     "zh_Hant_TW",
5568     "zh_TW"
5569   }, {
5570     "und_Hant_ZZ",
5571     "zh_Hant_TW",
5572     "zh_TW"
5573   }, {
5574     "und_Hant_CN",
5575     "zh_Hant_CN",
5576     "zh_Hant_CN"
5577   }, {
5578     "und_Hant_TW",
5579     "zh_Hant_TW",
5580     "zh_TW"
5581   }, {
5582     "und_Hant_HK",
5583     "zh_Hant_HK",
5584     "zh_HK"
5585   }, {
5586     "und_Hant_AQ",
5587     "zh_Hant_AQ",
5588     "zh_Hant_AQ"
5589   }, {
5590     "und_Moon",
5591     "en_Moon_US",
5592     "en_Moon"
5593   }, {
5594     "und_Moon_ZZ",
5595     "en_Moon_US",
5596     "en_Moon"
5597   }, {
5598     "und_Moon_CN",
5599     "zh_Moon_CN",
5600     "zh_Moon"
5601   }, {
5602     "und_Moon_TW",
5603     "zh_Moon_TW",
5604     "zh_Moon_TW"
5605   }, {
5606     "und_Moon_HK",
5607     "zh_Moon_HK",
5608     "zh_Moon_HK"
5609   }, {
5610     "und_Moon_AQ",
5611     "en_Moon_AQ",
5612     "en_Moon_AQ"
5613   }, {
5614     "es",
5615     "es_Latn_ES",
5616     "es"
5617   }, {
5618     "es_ZZ",
5619     "es_Latn_ES",
5620     "es"
5621   }, {
5622     "es_CN",
5623     "es_Latn_CN",
5624     "es_CN"
5625   }, {
5626     "es_TW",
5627     "es_Latn_TW",
5628     "es_TW"
5629   }, {
5630     "es_HK",
5631     "es_Latn_HK",
5632     "es_HK"
5633   }, {
5634     "es_AQ",
5635     "es_Latn_AQ",
5636     "es_AQ"
5637   }, {
5638     "es_Zzzz",
5639     "es_Latn_ES",
5640     "es"
5641   }, {
5642     "es_Zzzz_ZZ",
5643     "es_Latn_ES",
5644     "es"
5645   }, {
5646     "es_Zzzz_CN",
5647     "es_Latn_CN",
5648     "es_CN"
5649   }, {
5650     "es_Zzzz_TW",
5651     "es_Latn_TW",
5652     "es_TW"
5653   }, {
5654     "es_Zzzz_HK",
5655     "es_Latn_HK",
5656     "es_HK"
5657   }, {
5658     "es_Zzzz_AQ",
5659     "es_Latn_AQ",
5660     "es_AQ"
5661   }, {
5662     "es_Latn",
5663     "es_Latn_ES",
5664     "es"
5665   }, {
5666     "es_Latn_ZZ",
5667     "es_Latn_ES",
5668     "es"
5669   }, {
5670     "es_Latn_CN",
5671     "es_Latn_CN",
5672     "es_CN"
5673   }, {
5674     "es_Latn_TW",
5675     "es_Latn_TW",
5676     "es_TW"
5677   }, {
5678     "es_Latn_HK",
5679     "es_Latn_HK",
5680     "es_HK"
5681   }, {
5682     "es_Latn_AQ",
5683     "es_Latn_AQ",
5684     "es_AQ"
5685   }, {
5686     "es_Hans",
5687     "es_Hans_ES",
5688     "es_Hans"
5689   }, {
5690     "es_Hans_ZZ",
5691     "es_Hans_ES",
5692     "es_Hans"
5693   }, {
5694     "es_Hans_CN",
5695     "es_Hans_CN",
5696     "es_Hans_CN"
5697   }, {
5698     "es_Hans_TW",
5699     "es_Hans_TW",
5700     "es_Hans_TW"
5701   }, {
5702     "es_Hans_HK",
5703     "es_Hans_HK",
5704     "es_Hans_HK"
5705   }, {
5706     "es_Hans_AQ",
5707     "es_Hans_AQ",
5708     "es_Hans_AQ"
5709   }, {
5710     "es_Hant",
5711     "es_Hant_ES",
5712     "es_Hant"
5713   }, {
5714     "es_Hant_ZZ",
5715     "es_Hant_ES",
5716     "es_Hant"
5717   }, {
5718     "es_Hant_CN",
5719     "es_Hant_CN",
5720     "es_Hant_CN"
5721   }, {
5722     "es_Hant_TW",
5723     "es_Hant_TW",
5724     "es_Hant_TW"
5725   }, {
5726     "es_Hant_HK",
5727     "es_Hant_HK",
5728     "es_Hant_HK"
5729   }, {
5730     "es_Hant_AQ",
5731     "es_Hant_AQ",
5732     "es_Hant_AQ"
5733   }, {
5734     "es_Moon",
5735     "es_Moon_ES",
5736     "es_Moon"
5737   }, {
5738     "es_Moon_ZZ",
5739     "es_Moon_ES",
5740     "es_Moon"
5741   }, {
5742     "es_Moon_CN",
5743     "es_Moon_CN",
5744     "es_Moon_CN"
5745   }, {
5746     "es_Moon_TW",
5747     "es_Moon_TW",
5748     "es_Moon_TW"
5749   }, {
5750     "es_Moon_HK",
5751     "es_Moon_HK",
5752     "es_Moon_HK"
5753   }, {
5754     "es_Moon_AQ",
5755     "es_Moon_AQ",
5756     "es_Moon_AQ"
5757   }, {
5758     "zh",
5759     "zh_Hans_CN",
5760     "zh"
5761   }, {
5762     "zh_ZZ",
5763     "zh_Hans_CN",
5764     "zh"
5765   }, {
5766     "zh_CN",
5767     "zh_Hans_CN",
5768     "zh"
5769   }, {
5770     "zh_TW",
5771     "zh_Hant_TW",
5772     "zh_TW"
5773   }, {
5774     "zh_HK",
5775     "zh_Hant_HK",
5776     "zh_HK"
5777   }, {
5778     "zh_AQ",
5779     "zh_Hans_AQ",
5780     "zh_AQ"
5781   }, {
5782     "zh_Zzzz",
5783     "zh_Hans_CN",
5784     "zh"
5785   }, {
5786     "zh_Zzzz_ZZ",
5787     "zh_Hans_CN",
5788     "zh"
5789   }, {
5790     "zh_Zzzz_CN",
5791     "zh_Hans_CN",
5792     "zh"
5793   }, {
5794     "zh_Zzzz_TW",
5795     "zh_Hant_TW",
5796     "zh_TW"
5797   }, {
5798     "zh_Zzzz_HK",
5799     "zh_Hant_HK",
5800     "zh_HK"
5801   }, {
5802     "zh_Zzzz_AQ",
5803     "zh_Hans_AQ",
5804     "zh_AQ"
5805   }, {
5806     "zh_Latn",
5807     "zh_Latn_CN",
5808     "zh_Latn"
5809   }, {
5810     "zh_Latn_ZZ",
5811     "zh_Latn_CN",
5812     "zh_Latn"
5813   }, {
5814     "zh_Latn_CN",
5815     "zh_Latn_CN",
5816     "zh_Latn"
5817   }, {
5818     "zh_Latn_TW",
5819     "zh_Latn_TW",
5820     "zh_Latn_TW"
5821   }, {
5822     "zh_Latn_HK",
5823     "zh_Latn_HK",
5824     "zh_Latn_HK"
5825   }, {
5826     "zh_Latn_AQ",
5827     "zh_Latn_AQ",
5828     "zh_Latn_AQ"
5829   }, {
5830     "zh_Hans",
5831     "zh_Hans_CN",
5832     "zh"
5833   }, {
5834     "zh_Hans_ZZ",
5835     "zh_Hans_CN",
5836     "zh"
5837   }, {
5838     "zh_Hans_TW",
5839     "zh_Hans_TW",
5840     "zh_Hans_TW"
5841   }, {
5842     "zh_Hans_HK",
5843     "zh_Hans_HK",
5844     "zh_Hans_HK"
5845   }, {
5846     "zh_Hans_AQ",
5847     "zh_Hans_AQ",
5848     "zh_AQ"
5849   }, {
5850     "zh_Hant",
5851     "zh_Hant_TW",
5852     "zh_TW"
5853   }, {
5854     "zh_Hant_ZZ",
5855     "zh_Hant_TW",
5856     "zh_TW"
5857   }, {
5858     "zh_Hant_CN",
5859     "zh_Hant_CN",
5860     "zh_Hant_CN"
5861   }, {
5862     "zh_Hant_AQ",
5863     "zh_Hant_AQ",
5864     "zh_Hant_AQ"
5865   }, {
5866     "zh_Moon",
5867     "zh_Moon_CN",
5868     "zh_Moon"
5869   }, {
5870     "zh_Moon_ZZ",
5871     "zh_Moon_CN",
5872     "zh_Moon"
5873   }, {
5874     "zh_Moon_CN",
5875     "zh_Moon_CN",
5876     "zh_Moon"
5877   }, {
5878     "zh_Moon_TW",
5879     "zh_Moon_TW",
5880     "zh_Moon_TW"
5881   }, {
5882     "zh_Moon_HK",
5883     "zh_Moon_HK",
5884     "zh_Moon_HK"
5885   }, {
5886     "zh_Moon_AQ",
5887     "zh_Moon_AQ",
5888     "zh_Moon_AQ"
5889   }, {
5890     "art",
5891     "",
5892     ""
5893   }, {
5894     "art_ZZ",
5895     "",
5896     ""
5897   }, {
5898     "art_CN",
5899     "",
5900     ""
5901   }, {
5902     "art_TW",
5903     "",
5904     ""
5905   }, {
5906     "art_HK",
5907     "",
5908     ""
5909   }, {
5910     "art_AQ",
5911     "",
5912     ""
5913   }, {
5914     "art_Zzzz",
5915     "",
5916     ""
5917   }, {
5918     "art_Zzzz_ZZ",
5919     "",
5920     ""
5921   }, {
5922     "art_Zzzz_CN",
5923     "",
5924     ""
5925   }, {
5926     "art_Zzzz_TW",
5927     "",
5928     ""
5929   }, {
5930     "art_Zzzz_HK",
5931     "",
5932     ""
5933   }, {
5934     "art_Zzzz_AQ",
5935     "",
5936     ""
5937   }, {
5938     "art_Latn",
5939     "",
5940     ""
5941   }, {
5942     "art_Latn_ZZ",
5943     "",
5944     ""
5945   }, {
5946     "art_Latn_CN",
5947     "",
5948     ""
5949   }, {
5950     "art_Latn_TW",
5951     "",
5952     ""
5953   }, {
5954     "art_Latn_HK",
5955     "",
5956     ""
5957   }, {
5958     "art_Latn_AQ",
5959     "",
5960     ""
5961   }, {
5962     "art_Hans",
5963     "",
5964     ""
5965   }, {
5966     "art_Hans_ZZ",
5967     "",
5968     ""
5969   }, {
5970     "art_Hans_CN",
5971     "",
5972     ""
5973   }, {
5974     "art_Hans_TW",
5975     "",
5976     ""
5977   }, {
5978     "art_Hans_HK",
5979     "",
5980     ""
5981   }, {
5982     "art_Hans_AQ",
5983     "",
5984     ""
5985   }, {
5986     "art_Hant",
5987     "",
5988     ""
5989   }, {
5990     "art_Hant_ZZ",
5991     "",
5992     ""
5993   }, {
5994     "art_Hant_CN",
5995     "",
5996     ""
5997   }, {
5998     "art_Hant_TW",
5999     "",
6000     ""
6001   }, {
6002     "art_Hant_HK",
6003     "",
6004     ""
6005   }, {
6006     "art_Hant_AQ",
6007     "",
6008     ""
6009   }, {
6010     "art_Moon",
6011     "",
6012     ""
6013   }, {
6014     "art_Moon_ZZ",
6015     "",
6016     ""
6017   }, {
6018     "art_Moon_CN",
6019     "",
6020     ""
6021   }, {
6022     "art_Moon_TW",
6023     "",
6024     ""
6025   }, {
6026     "art_Moon_HK",
6027     "",
6028     ""
6029   }, {
6030     "art_Moon_AQ",
6031     "",
6032     ""
6033   }, {
6034     "de@collation=phonebook",
6035     "de_Latn_DE@collation=phonebook",
6036     "de@collation=phonebook"
6037   }
6038 };
6039 
6040 typedef struct errorDataTag {
6041     const char* tag;
6042     const char* expected;
6043     UErrorCode uerror;
6044     int32_t  bufferSize;
6045 } errorData;
6046 
6047 const errorData maximizeErrors[] = {
6048     {
6049         "enfueiujhytdf",
6050         NULL,
6051         U_ILLEGAL_ARGUMENT_ERROR,
6052         0
6053     },
6054     {
6055         "en_THUJIOGIURJHGJFURYHFJGURYYYHHGJURHG",
6056         NULL,
6057         U_ILLEGAL_ARGUMENT_ERROR,
6058         0
6059     },
6060     {
6061         "en_THUJIOGIURJHGJFURYHFJGURYYYHHGJURHG",
6062         NULL,
6063         U_ILLEGAL_ARGUMENT_ERROR,
6064         0
6065     },
6066     {
6067         "en_Latn_US_POSIX@currency=EURO",
6068         "en_Latn_US_POSIX@currency=EURO",
6069         U_BUFFER_OVERFLOW_ERROR,
6070         29
6071     },
6072     {
6073         "en_Latn_US_POSIX@currency=EURO",
6074         "en_Latn_US_POSIX@currency=EURO",
6075         U_STRING_NOT_TERMINATED_WARNING,
6076         30
6077     },
6078     {
6079         // ICU-22727
6080         // unicode_language_subtag = alpha{2,3} | alpha{5,8};
6081         // so "bbbbb", "cccccc", "ddddddd", "eeeeeeee" are
6082         // well-formed unicode_language_id but "fffffffff" is not.
6083         "fffffffff",
6084         NULL,
6085         U_ILLEGAL_ARGUMENT_ERROR,
6086         0
6087     }
6088 };
6089 
6090 const errorData minimizeErrors[] = {
6091     {
6092         "enfueiujhytdf",
6093         NULL,
6094         U_ILLEGAL_ARGUMENT_ERROR,
6095         0
6096     },
6097     {
6098         "en_THUJIOGIURJHGJFURYHFJGURYYYHHGJURHG",
6099         NULL,
6100         U_ILLEGAL_ARGUMENT_ERROR,
6101         0
6102     },
6103     {
6104         "en_Latn_US_POSIX@currency=EURO",
6105         "en__POSIX@currency=EURO",
6106         U_BUFFER_OVERFLOW_ERROR,
6107         22
6108     },
6109     {
6110         "en_Latn_US_POSIX@currency=EURO",
6111         "en__POSIX@currency=EURO",
6112         U_STRING_NOT_TERMINATED_WARNING,
6113         23
6114     }
6115 };
6116 
getExpectedReturnValue(const errorData * data)6117 static int32_t getExpectedReturnValue(const errorData* data)
6118 {
6119     if (data->uerror == U_BUFFER_OVERFLOW_ERROR ||
6120         data->uerror == U_STRING_NOT_TERMINATED_WARNING)
6121     {
6122         return (int32_t)strlen(data->expected);
6123     }
6124     else
6125     {
6126         return 0;
6127     }
6128 }
6129 
getBufferSize(const errorData * data,int32_t actualSize)6130 static int32_t getBufferSize(const errorData* data, int32_t actualSize)
6131 {
6132     if (data->expected == NULL)
6133     {
6134         return actualSize;
6135     }
6136     else if (data->bufferSize < 0)
6137     {
6138         return (int32_t)strlen(data->expected) + 1;
6139     }
6140     else
6141     {
6142         return data->bufferSize;
6143     }
6144 }
6145 
TestLikelySubtags(void)6146 static void TestLikelySubtags(void)
6147 {
6148     char buffer[ULOC_FULLNAME_CAPACITY + ULOC_KEYWORD_AND_VALUES_CAPACITY + 1];
6149     int32_t i = 0;
6150 
6151     for (; i < UPRV_LENGTHOF(basic_maximize_data); ++i)
6152     {
6153         UErrorCode status = U_ZERO_ERROR;
6154         const char* const minimal = basic_maximize_data[i][0];
6155         const char* const maximal = basic_maximize_data[i][1];
6156 
6157         /* const int32_t length = */
6158             uloc_addLikelySubtags(
6159                 minimal,
6160                 buffer,
6161                 sizeof(buffer),
6162                 &status);
6163         if (U_FAILURE(status)) {
6164             log_err_status(status, "  unexpected failure of uloc_addLikelySubtags(), minimal \"%s\" status %s\n", minimal, u_errorName(status));
6165             status = U_ZERO_ERROR;
6166         }
6167         else if (uprv_strlen(maximal) == 0) {
6168             if (uprv_stricmp(minimal, buffer) != 0) {
6169                 log_err("  unexpected maximal value \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
6170             }
6171         }
6172         else if (uprv_stricmp(maximal, buffer) != 0) {
6173             log_err("1  maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %s\n", maximal, minimal, buffer);
6174         }
6175     }
6176 
6177     for (i = 0; i < UPRV_LENGTHOF(basic_minimize_data); ++i) {
6178 
6179         UErrorCode status = U_ZERO_ERROR;
6180         const char* const maximal = basic_minimize_data[i][0];
6181         const char* const minimal = basic_minimize_data[i][1];
6182 
6183         /* const int32_t length = */
6184             uloc_minimizeSubtags(
6185                 maximal,
6186                 buffer,
6187                 sizeof(buffer),
6188                 &status);
6189 
6190         if (U_FAILURE(status)) {
6191             log_err_status(status, "  unexpected failure of uloc_MinimizeSubtags(), maximal \"%s\" status %s\n", maximal, u_errorName(status));
6192             status = U_ZERO_ERROR;
6193         }
6194         else if (uprv_strlen(minimal) == 0) {
6195             if (uprv_stricmp(maximal, buffer) != 0) {
6196                 log_err("  unexpected minimal value \"%s\" in uloc_minimizeSubtags(), maximal \"%s\" = \"%s\"\n", minimal, maximal, buffer);
6197             }
6198         }
6199         else if (uprv_stricmp(minimal, buffer) != 0) {
6200             log_err("  minimal doesn't match expected %s in uloc_MinimizeSubtags(), maximal \"%s\" = %s\n", minimal, maximal, buffer);
6201         }
6202     }
6203 
6204     for (i = 0; i < UPRV_LENGTHOF(full_data); ++i) {
6205 
6206         UErrorCode status = U_ZERO_ERROR;
6207         const char* const minimal = full_data[i][0];
6208         const char* const maximal = full_data[i][1];
6209 
6210         /* const int32_t length = */
6211             uloc_addLikelySubtags(
6212                 minimal,
6213                 buffer,
6214                 sizeof(buffer),
6215                 &status);
6216         if (U_FAILURE(status)) {
6217             log_err_status(status, "  unexpected failure of uloc_addLikelySubtags(), minimal \"%s\" status \"%s\"\n", minimal, u_errorName(status));
6218             status = U_ZERO_ERROR;
6219         }
6220         else if (uprv_strlen(maximal) == 0) {
6221             if (uprv_stricmp(minimal, buffer) != 0) {
6222                 log_err("  unexpected maximal value \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
6223             }
6224         }
6225         else if (uprv_stricmp(maximal, buffer) != 0) {
6226             log_err("2  maximal doesn't match expected \"%s\" in uloc_addLikelySubtags(), minimal \"%s\" = \"%s\"\n", maximal, minimal, buffer);
6227         }
6228     }
6229 
6230     for (i = 0; i < UPRV_LENGTHOF(full_data); ++i) {
6231 
6232         UErrorCode status = U_ZERO_ERROR;
6233         const char* const maximal = full_data[i][1];
6234         const char* const minimal = full_data[i][2];
6235 
6236         if (strlen(maximal) > 0) {
6237 
6238             /* const int32_t length = */
6239                 uloc_minimizeSubtags(
6240                     maximal,
6241                     buffer,
6242                     sizeof(buffer),
6243                     &status);
6244 
6245             if (U_FAILURE(status)) {
6246                 log_err_status(status, "  unexpected failure of uloc_minimizeSubtags(), maximal \"%s\" status %s\n", maximal, u_errorName(status));
6247                 status = U_ZERO_ERROR;
6248             }
6249             else if (uprv_strlen(minimal) == 0) {
6250                 if (uprv_stricmp(maximal, buffer) != 0) {
6251                     log_err("  unexpected minimal value \"%s\" in uloc_minimizeSubtags(), maximal \"%s\" = \"%s\"\n", minimal, maximal, buffer);
6252                 }
6253             }
6254             else if (uprv_stricmp(minimal, buffer) != 0) {
6255                 log_err("  minimal doesn't match expected %s in uloc_MinimizeSubtags(), maximal \"%s\" = %s\n", minimal, maximal, buffer);
6256             }
6257         }
6258     }
6259 
6260     for (i = 0; i < UPRV_LENGTHOF(maximizeErrors); ++i) {
6261 
6262         UErrorCode status = U_ZERO_ERROR;
6263         const char* const minimal = maximizeErrors[i].tag;
6264         const char* const maximal = maximizeErrors[i].expected;
6265         const UErrorCode expectedStatus = maximizeErrors[i].uerror;
6266         const int32_t expectedLength = getExpectedReturnValue(&maximizeErrors[i]);
6267         const int32_t bufferSize = getBufferSize(&maximizeErrors[i], sizeof(buffer));
6268 
6269         const int32_t length =
6270             uloc_addLikelySubtags(
6271                 minimal,
6272                 buffer,
6273                 bufferSize,
6274                 &status);
6275 
6276         if (status == U_ZERO_ERROR) {
6277             log_err("  unexpected U_ZERO_ERROR for uloc_addLikelySubtags(), minimal \"%s\" expected status %s\n", minimal, u_errorName(expectedStatus));
6278             status = U_ZERO_ERROR;
6279         }
6280         else if (status != expectedStatus) {
6281             log_err_status(status, "  unexpected status for uloc_addLikelySubtags(), minimal \"%s\" expected status %s, but got %s\n", minimal, u_errorName(expectedStatus), u_errorName(status));
6282         }
6283         else if (length != expectedLength) {
6284             log_err("  unexpected length for uloc_addLikelySubtags(), minimal \"%s\" expected length %d, but got %d\n", minimal, expectedLength, length);
6285         }
6286         else if (status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
6287             if (uprv_strnicmp(maximal, buffer, bufferSize) != 0) {
6288                 log_err("3  maximal doesn't match expected %s in uloc_addLikelySubtags(), minimal \"%s\" = %*s\n",
6289                     maximal, minimal, (int)sizeof(buffer), buffer);
6290             }
6291         }
6292     }
6293 
6294     for (i = 0; i < UPRV_LENGTHOF(minimizeErrors); ++i) {
6295 
6296         UErrorCode status = U_ZERO_ERROR;
6297         const char* const maximal = minimizeErrors[i].tag;
6298         const char* const minimal = minimizeErrors[i].expected;
6299         const UErrorCode expectedStatus = minimizeErrors[i].uerror;
6300         const int32_t expectedLength = getExpectedReturnValue(&minimizeErrors[i]);
6301         const int32_t bufferSize = getBufferSize(&minimizeErrors[i], sizeof(buffer));
6302 
6303         const int32_t length =
6304             uloc_minimizeSubtags(
6305                 maximal,
6306                 buffer,
6307                 bufferSize,
6308                 &status);
6309 
6310         if (status == U_ZERO_ERROR) {
6311             log_err("  unexpected U_ZERO_ERROR for uloc_minimizeSubtags(), maximal \"%s\" expected status %s\n", maximal, u_errorName(expectedStatus));
6312             status = U_ZERO_ERROR;
6313         }
6314         else if (status != expectedStatus) {
6315             log_err_status(status, "  unexpected status for uloc_minimizeSubtags(), maximal \"%s\" expected status %s, but got %s\n", maximal, u_errorName(expectedStatus), u_errorName(status));
6316         }
6317         else if (length != expectedLength) {
6318             log_err("  unexpected length for uloc_minimizeSubtags(), maximal \"%s\" expected length %d, but got %d\n", maximal, expectedLength, length);
6319         }
6320         else if (status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) {
6321             if (uprv_strnicmp(minimal, buffer, bufferSize) != 0) {
6322                 log_err("  minimal doesn't match expected \"%s\" in uloc_minimizeSubtags(), minimal \"%s\" = \"%*s\"\n",
6323                     minimal, maximal, (int)sizeof(buffer), buffer);
6324             }
6325         }
6326     }
6327 }
6328 
6329 const char* const locale_to_langtag[][3] = {
6330     {"",            "und",          "und"},
6331     {"en",          "en",           "en"},
6332     {"en_US",       "en-US",        "en-US"},
6333     {"iw_IL",       "he-IL",        "he-IL"},
6334     {"sr_Latn_SR",  "sr-Latn-SR",   "sr-Latn-SR"},
6335     {"en__POSIX",   "en-u-va-posix", "en-u-va-posix"},
6336     {"en_POSIX",    "en-u-va-posix", "en-u-va-posix"},
6337     {"en_US_POSIX_VAR", "en-US-posix-x-lvariant-var", NULL},  /* variant POSIX_VAR is processed as regular variant */
6338     {"en_US_VAR_POSIX", "en-US-x-lvariant-var-posix", NULL},  /* variant VAR_POSIX is processed as regular variant */
6339     {"en_US_POSIX@va=posix2",   "en-US-u-va-posix2",  "en-US-u-va-posix2"},           /* if keyword va=xxx already exists, variant POSIX is simply dropped */
6340     {"en_US_POSIX@ca=japanese",  "en-US-u-ca-japanese-va-posix", "en-US-u-ca-japanese-va-posix"},
6341     {"und_555",     "und-555",      "und-555"},
6342     {"123",         "und",          NULL},
6343     {"%$#&",        "und",          NULL},
6344     {"_Latn",       "und-Latn",     "und-Latn"},
6345     {"_DE",         "und-DE",       "und-DE"},
6346     {"und_FR",      "und-FR",       "und-FR"},
6347     {"th_TH_TH",    "th-TH-x-lvariant-th", NULL},
6348     {"bogus",       "bogus",        "bogus"},
6349     {"foooobarrr",  "und",          NULL},
6350     {"aa_BB_CYRL",  "aa-BB-x-lvariant-cyrl", NULL},
6351     {"en_US_1234",  "en-US-1234",   "en-US-1234"},
6352     {"en_US_VARIANTA_VARIANTB", "en-US-varianta-variantb",  "en-US-varianta-variantb"},
6353     {"en_US_VARIANTB_VARIANTA", "en-US-varianta-variantb",  "en-US-varianta-variantb"}, /* ICU-20478 */
6354     {"ja__9876_5432",   "ja-5432-9876", "ja-5432-9876"}, /* ICU-20478 */
6355     {"sl__ROZAJ_BISKE_1994",   "sl-1994-biske-rozaj", "sl-1994-biske-rozaj"}, /* ICU-20478 */
6356     {"en__SCOUSE_FONIPA",   "en-fonipa-scouse", "en-fonipa-scouse"}, /* ICU-20478 */
6357     {"zh_Hant__VAR",    "zh-Hant-x-lvariant-var", NULL},
6358     {"es__BADVARIANT_GOODVAR",  "es-goodvar",   NULL},
6359     {"en@calendar=gregorian",   "en-u-ca-gregory",  "en-u-ca-gregory"},
6360     {"de@collation=phonebook;calendar=gregorian",   "de-u-ca-gregory-co-phonebk",   "de-u-ca-gregory-co-phonebk"},
6361     {"th@numbers=thai;z=extz;x=priv-use;a=exta",   "th-a-exta-u-nu-thai-z-extz-x-priv-use", "th-a-exta-u-nu-thai-z-extz-x-priv-use"},
6362     {"en@timezone=America/New_York;calendar=japanese",    "en-u-ca-japanese-tz-usnyc",    "en-u-ca-japanese-tz-usnyc"},
6363     {"en@timezone=US/Eastern",  "en-u-tz-usnyc",    "en-u-tz-usnyc"},
6364     {"en@x=x-y-z;a=a-b-c",  "en-x-x-y-z",   NULL},
6365     {"it@collation=badcollationtype;colStrength=identical;cu=usd-eur", "it-u-cu-usd-eur-ks-identic",  NULL},
6366     {"en_US_POSIX", "en-US-u-va-posix", "en-US-u-va-posix"},
6367     {"en_US_POSIX@calendar=japanese;currency=EUR","en-US-u-ca-japanese-cu-eur-va-posix", "en-US-u-ca-japanese-cu-eur-va-posix"},
6368     {"@x=elmer",    "und-x-elmer",      "und-x-elmer"},
6369     {"en@x=elmer",  "en-x-elmer",   "en-x-elmer"},
6370     {"@x=elmer;a=exta", "und-a-exta-x-elmer",   "und-a-exta-x-elmer"},
6371     {"en_US@attribute=attr1-attr2;calendar=gregorian", "en-US-u-attr1-attr2-ca-gregory", "en-US-u-attr1-attr2-ca-gregory"},
6372     /* #12671 */
6373     {"en@a=bar;attribute=baz",  "en-a-bar-u-baz",   "en-a-bar-u-baz"},
6374     {"en@a=bar;attribute=baz;x=u-foo",  "en-a-bar-u-baz-x-u-foo",   "en-a-bar-u-baz-x-u-foo"},
6375     {"en@attribute=baz",    "en-u-baz", "en-u-baz"},
6376     {"en@attribute=baz;calendar=islamic-civil", "en-u-baz-ca-islamic-civil",    "en-u-baz-ca-islamic-civil"},
6377     {"en@a=bar;calendar=islamic-civil;x=u-foo", "en-a-bar-u-ca-islamic-civil-x-u-foo",  "en-a-bar-u-ca-islamic-civil-x-u-foo"},
6378     {"en@a=bar;attribute=baz;calendar=islamic-civil;x=u-foo",   "en-a-bar-u-baz-ca-islamic-civil-x-u-foo",  "en-a-bar-u-baz-ca-islamic-civil-x-u-foo"},
6379     {"en@9=efg;a=baz",    "en-9-efg-a-baz", "en-9-efg-a-baz"},
6380 
6381     // Before ICU 64, ICU locale canonicalization had some additional mappings.
6382     // They were removed for ICU-20187 "drop support for long-obsolete locale ID variants".
6383     // The following now uses standard canonicalization.
6384     {"az_AZ_CYRL", "az-AZ-x-lvariant-cyrl", NULL},
6385 
6386 
6387     /* ICU-20310 */
6388     {"en-u-kn-true",   "en-u-kn", "en-u-kn"},
6389     {"en-u-kn",   "en-u-kn", "en-u-kn"},
6390     {"de-u-co-yes",   "de-u-co", "de-u-co"},
6391     {"de-u-co",   "de-u-co", "de-u-co"},
6392     {"de@collation=yes",   "de-u-co", "de-u-co"},
6393     {"cmn-hans-cn-u-ca-t-ca-x-t-u",   "cmn-Hans-CN-t-ca-u-ca-x-t-u", "cmn-Hans-CN-t-ca-u-ca-x-t-u"},
6394     {NULL,          NULL,           NULL}
6395 };
6396 
TestToLanguageTag(void)6397 static void TestToLanguageTag(void) {
6398     char langtag[256];
6399     int32_t i;
6400     UErrorCode status;
6401     int32_t len;
6402     const char *inloc;
6403     const char *expected;
6404 
6405     for (i = 0; locale_to_langtag[i][0] != NULL; i++) {
6406         inloc = locale_to_langtag[i][0];
6407 
6408         /* testing non-strict mode */
6409         status = U_ZERO_ERROR;
6410         langtag[0] = 0;
6411         expected = locale_to_langtag[i][1];
6412 
6413         len = uloc_toLanguageTag(inloc, langtag, sizeof(langtag), false, &status);
6414         (void)len;    /* Suppress set but not used warning. */
6415         if (U_FAILURE(status)) {
6416             if (expected != NULL) {
6417                 log_err("Error returned by uloc_toLanguageTag for locale id [%s] - error: %s\n",
6418                     inloc, u_errorName(status));
6419             }
6420         } else {
6421             if (expected == NULL) {
6422                 log_err("Error should be returned by uloc_toLanguageTag for locale id [%s], but [%s] is returned without errors\n",
6423                     inloc, langtag);
6424             } else if (uprv_strcmp(langtag, expected) != 0) {
6425                 log_data_err("uloc_toLanguageTag returned language tag [%s] for input locale [%s] - expected: [%s]. Are you missing data?\n",
6426                     langtag, inloc, expected);
6427             }
6428         }
6429 
6430         /* testing strict mode */
6431         status = U_ZERO_ERROR;
6432         langtag[0] = 0;
6433         expected = locale_to_langtag[i][2];
6434 
6435         len = uloc_toLanguageTag(inloc, langtag, sizeof(langtag), true, &status);
6436         if (U_FAILURE(status)) {
6437             if (expected != NULL) {
6438                 log_data_err("Error returned by uloc_toLanguageTag {strict} for locale id [%s] - error: %s Are you missing data?\n",
6439                     inloc, u_errorName(status));
6440             }
6441         } else {
6442             if (expected == NULL) {
6443                 log_err("Error should be returned by uloc_toLanguageTag {strict} for locale id [%s], but [%s] is returned without errors\n",
6444                     inloc, langtag);
6445             } else if (uprv_strcmp(langtag, expected) != 0) {
6446                 log_err("uloc_toLanguageTag {strict} returned language tag [%s] for input locale [%s] - expected: [%s]\n",
6447                     langtag, inloc, expected);
6448             }
6449         }
6450     }
6451 }
6452 
TestBug20132(void)6453 static void TestBug20132(void) {
6454     char langtag[256];
6455     UErrorCode status;
6456     int32_t len;
6457 
6458     static const char inloc[] = "en-C";
6459     static const char expected[] = "en-x-lvariant-c";
6460     const int32_t expected_len = (int32_t)uprv_strlen(expected);
6461 
6462     /* Before ICU-20132 was fixed, calling uloc_toLanguageTag() with a too small
6463      * buffer would not immediately return the buffer size actually needed, but
6464      * instead require several iterations before getting the correct size. */
6465 
6466     status = U_ZERO_ERROR;
6467     len = uloc_toLanguageTag(inloc, langtag, 1, false, &status);
6468 
6469     if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
6470         log_data_err("Error returned by uloc_toLanguageTag for locale id [%s] - error: %s Are you missing data?\n",
6471             inloc, u_errorName(status));
6472     }
6473 
6474     if (len != expected_len) {
6475         log_err("Bad length returned by uloc_toLanguageTag for locale id [%s]: %i != %i\n", inloc, len, expected_len);
6476     }
6477 
6478     status = U_ZERO_ERROR;
6479     len = uloc_toLanguageTag(inloc, langtag, expected_len, false, &status);
6480 
6481     if (U_FAILURE(status)) {
6482         log_data_err("Error returned by uloc_toLanguageTag for locale id [%s] - error: %s Are you missing data?\n",
6483             inloc, u_errorName(status));
6484     }
6485 
6486     if (len != expected_len) {
6487         log_err("Bad length returned by uloc_toLanguageTag for locale id [%s]: %i != %i\n", inloc, len, expected_len);
6488     } else if (uprv_strncmp(langtag, expected, expected_len) != 0) {
6489         log_data_err("uloc_toLanguageTag returned language tag [%.*s] for input locale [%s] - expected: [%s]. Are you missing data?\n",
6490             len, langtag, inloc, expected);
6491     }
6492 }
6493 
6494 #define FULL_LENGTH -1
6495 static const struct {
6496     const char  *bcpID;
6497     const char  *locID;
6498     int32_t     len;
6499 } langtag_to_locale[] = {
6500     {"en",                  "en",                   FULL_LENGTH},
6501     {"en-us",               "en_US",                FULL_LENGTH},
6502     {"und-US",              "_US",                  FULL_LENGTH},
6503     {"und-latn",            "_Latn",                FULL_LENGTH},
6504     {"en-US-posix",         "en_US_POSIX",          FULL_LENGTH},
6505     {"de-de_euro",          "de",                   2},
6506     {"kok-IN",              "kok_IN",               FULL_LENGTH},
6507     {"123",                 "",                     0},
6508     {"en_us",               "",                     0},
6509     {"en-latn-x",           "en_Latn",              7},
6510     {"art-lojban",          "jbo",                  FULL_LENGTH},
6511     {"zh-hakka",            "hak",                  FULL_LENGTH},
6512     {"zh-cmn-CH",           "cmn_CH",               FULL_LENGTH},
6513     {"zh-cmn-CH-u-co-pinyin", "cmn_CH@collation=pinyin", FULL_LENGTH},
6514     {"xxx-yy",              "xxx_YY",               FULL_LENGTH},
6515     {"fr-234",              "fr_234",               FULL_LENGTH},
6516     {"i-default",           "en@x=i-default",       FULL_LENGTH},
6517     {"i-test",              "",                     0},
6518     {"ja-jp-jp",            "ja_JP",                5},
6519     {"bogus",               "bogus",                FULL_LENGTH},
6520     {"boguslang",           "",                     0},
6521     {"EN-lATN-us",          "en_Latn_US",           FULL_LENGTH},
6522     {"und-variant-1234",    "__1234_VARIANT",       FULL_LENGTH}, /* ICU-20478 */
6523     {"ja-9876-5432",    "ja__5432_9876",       FULL_LENGTH}, /* ICU-20478 */
6524     {"en-US-varianta-variantb",    "en_US_VARIANTA_VARIANTB",       FULL_LENGTH}, /* ICU-20478 */
6525     {"en-US-variantb-varianta",    "en_US_VARIANTA_VARIANTB",       FULL_LENGTH}, /* ICU-20478 */
6526     {"sl-rozaj-1994-biske",    "sl__1994_BISKE_ROZAJ",       FULL_LENGTH}, /* ICU-20478 */
6527     {"sl-biske-1994-rozaj",    "sl__1994_BISKE_ROZAJ",       FULL_LENGTH}, /* ICU-20478 */
6528     {"sl-1994-rozaj-biske",    "sl__1994_BISKE_ROZAJ",       FULL_LENGTH}, /* ICU-20478 */
6529     {"sl-rozaj-biske-1994",    "sl__1994_BISKE_ROZAJ",       FULL_LENGTH}, /* ICU-20478 */
6530     {"en-fonipa-scouse",    "en__FONIPA_SCOUSE",       FULL_LENGTH}, /* ICU-20478 */
6531     {"en-scouse-fonipa",    "en__FONIPA_SCOUSE",       FULL_LENGTH}, /* ICU-20478 */
6532     {"und-varzero-var1-vartwo", "__VARZERO",        11},
6533     {"en-u-ca-gregory",     "en@calendar=gregorian",    FULL_LENGTH},
6534     {"en-U-cu-USD",         "en@currency=usd",      FULL_LENGTH},
6535     {"en-US-u-va-posix",    "en_US_POSIX",          FULL_LENGTH},
6536     {"en-us-u-ca-gregory-va-posix", "en_US_POSIX@calendar=gregorian",   FULL_LENGTH},
6537     {"en-us-posix-u-va-posix",   "en_US_POSIX@va=posix",    FULL_LENGTH},
6538     {"en-us-u-va-posix2",        "en_US@va=posix2",         FULL_LENGTH},
6539     {"en-us-vari1-u-va-posix",   "en_US_VARI1@va=posix",    FULL_LENGTH},
6540     {"ar-x-1-2-3",          "ar@x=1-2-3",           FULL_LENGTH},
6541     {"fr-u-nu-latn-cu-eur", "fr@currency=eur;numbers=latn", FULL_LENGTH},
6542     {"de-k-kext-u-co-phonebk-nu-latn",  "de@collation=phonebook;k=kext;numbers=latn",   FULL_LENGTH},
6543     {"ja-u-cu-jpy-ca-jp",   "ja@calendar=yes;currency=jpy;jp=yes",  FULL_LENGTH},
6544     {"en-us-u-tz-usnyc",    "en_US@timezone=America/New_York",  FULL_LENGTH},
6545     {"und-a-abc-def",       "und@a=abc-def",        FULL_LENGTH},
6546     {"zh-u-ca-chinese-x-u-ca-chinese",  "zh@calendar=chinese;x=u-ca-chinese",   FULL_LENGTH},
6547     {"x-elmer",             "@x=elmer",             FULL_LENGTH},
6548     {"en-US-u-attr1-attr2-ca-gregory", "en_US@attribute=attr1-attr2;calendar=gregorian",    FULL_LENGTH},
6549     {"sr-u-kn",             "sr@colnumeric=yes",    FULL_LENGTH},
6550     {"de-u-kn-co-phonebk",  "de@collation=phonebook;colnumeric=yes",    FULL_LENGTH},
6551     {"en-u-attr2-attr1-kn-kb",  "en@attribute=attr1-attr2;colbackwards=yes;colnumeric=yes", FULL_LENGTH},
6552     {"ja-u-ijkl-efgh-abcd-ca-japanese-xx-yyy-zzz-kn",   "ja@attribute=abcd-efgh-ijkl;calendar=japanese;colnumeric=yes;xx=yyy-zzz",  FULL_LENGTH},
6553     {"de-u-xc-xphonebk-co-phonebk-ca-buddhist-mo-very-lo-extensi-xd-that-de-should-vc-probably-xz-killthebuffer",
6554      "de@calendar=buddhist;collation=phonebook;de=should;lo=extensi;mo=very;vc=probably;xc=xphonebk;xd=that;xz=yes", 91},
6555     {"de-1901-1901", "de__1901", 7},
6556     {"de-DE-1901-1901", "de_DE_1901", 10},
6557     {"en-a-bbb-a-ccc", "en@a=bbb", 8},
6558     /* #12761 */
6559     {"en-a-bar-u-baz",      "en@a=bar;attribute=baz",   FULL_LENGTH},
6560     {"en-a-bar-u-baz-x-u-foo",  "en@a=bar;attribute=baz;x=u-foo",   FULL_LENGTH},
6561     {"en-u-baz",            "en@attribute=baz",     FULL_LENGTH},
6562     {"en-u-baz-ca-islamic-civil",   "en@attribute=baz;calendar=islamic-civil",  FULL_LENGTH},
6563     {"en-a-bar-u-ca-islamic-civil-x-u-foo", "en@a=bar;calendar=islamic-civil;x=u-foo",  FULL_LENGTH},
6564     {"en-a-bar-u-baz-ca-islamic-civil-x-u-foo", "en@a=bar;attribute=baz;calendar=islamic-civil;x=u-foo",    FULL_LENGTH},
6565     {"und-Arab-u-em-emoji", "_Arab@em=emoji", FULL_LENGTH},
6566     {"und-Latn-u-em-emoji", "_Latn@em=emoji", FULL_LENGTH},
6567     {"und-Latn-DE-u-em-emoji", "_Latn_DE@em=emoji", FULL_LENGTH},
6568     {"und-Zzzz-DE-u-em-emoji", "_Zzzz_DE@em=emoji", FULL_LENGTH},
6569     {"und-DE-u-em-emoji", "_DE@em=emoji", FULL_LENGTH},
6570     // #20098
6571     {"hant-cmn-cn", "hant", 4},
6572     {"zh-cmn-TW", "cmn_TW", FULL_LENGTH},
6573     {"zh-x_t-ab", "zh", 2},
6574     {"zh-hans-cn-u-ca-x_t-u", "zh_Hans_CN@calendar=yes", 15},
6575     /* #20140 dupe keys in U-extension */
6576     {"zh-u-ca-chinese-ca-gregory", "zh@calendar=chinese", FULL_LENGTH},
6577     {"zh-u-ca-gregory-co-pinyin-ca-chinese", "zh@calendar=gregorian;collation=pinyin", FULL_LENGTH},
6578     {"de-latn-DE-1901-u-co-phonebk-co-pinyin-ca-gregory", "de_Latn_DE_1901@calendar=gregorian;collation=phonebook", FULL_LENGTH},
6579     {"th-u-kf-nu-thai-kf-false", "th@colcasefirst=yes;numbers=thai", FULL_LENGTH},
6580     /* #9562 IANA language tag data update */
6581     {"en-gb-oed", "en_GB_OXENDICT", FULL_LENGTH},
6582     {"i-navajo", "nv", FULL_LENGTH},
6583     {"i-navajo-a-foo", "nv@a=foo", FULL_LENGTH},
6584     {"i-navajo-latn-us", "nv_Latn_US", FULL_LENGTH},
6585     {"sgn-br", "bzs", FULL_LENGTH},
6586     {"sgn-br-u-co-phonebk", "bzs@collation=phonebook", FULL_LENGTH},
6587     {"ja-latn-hepburn-heploc", "ja_Latn__ALALC97", FULL_LENGTH},
6588     {"ja-latn-hepburn-heploc-u-ca-japanese", "ja_Latn__ALALC97@calendar=japanese", FULL_LENGTH},
6589     {"en-a-bcde-0-fgh", "en@0=fgh;a=bcde", FULL_LENGTH},
6590 };
6591 
TestForLanguageTag(void)6592 static void TestForLanguageTag(void) {
6593     char locale[256];
6594     int32_t i;
6595     UErrorCode status;
6596     int32_t parsedLen;
6597     int32_t expParsedLen;
6598 
6599     for (i = 0; i < UPRV_LENGTHOF(langtag_to_locale); i++) {
6600         status = U_ZERO_ERROR;
6601         locale[0] = 0;
6602         expParsedLen = langtag_to_locale[i].len;
6603         if (expParsedLen == FULL_LENGTH) {
6604             expParsedLen = (int32_t)uprv_strlen(langtag_to_locale[i].bcpID);
6605         }
6606         uloc_forLanguageTag(langtag_to_locale[i].bcpID, locale, sizeof(locale), &parsedLen, &status);
6607         if (U_FAILURE(status)) {
6608             log_err_status(status, "Error returned by uloc_forLanguageTag for language tag [%s] - error: %s\n",
6609                 langtag_to_locale[i].bcpID, u_errorName(status));
6610         } else {
6611             if (uprv_strcmp(langtag_to_locale[i].locID, locale) != 0) {
6612                 log_data_err("uloc_forLanguageTag returned locale [%s] for input language tag [%s] - expected: [%s]\n",
6613                     locale, langtag_to_locale[i].bcpID, langtag_to_locale[i].locID);
6614             }
6615             if (parsedLen != expParsedLen) {
6616                 log_err("uloc_forLanguageTag parsed length of %d for input language tag [%s] - expected parsed length: %d\n",
6617                     parsedLen, langtag_to_locale[i].bcpID, expParsedLen);
6618             }
6619         }
6620     }
6621 }
6622 
6623 static const struct {
6624     const char  *input;
6625     const char  *canonical;
6626 } langtag_to_canonical[] = {
6627     {"de-DD", "de-DE"},
6628     {"de-DD-u-co-phonebk", "de-DE-u-co-phonebk"},
6629     {"jw-id", "jv-ID"},
6630     {"jw-id-u-ca-islamic-civil", "jv-ID-u-ca-islamic-civil"},
6631     {"mo-md", "ro-MD"},
6632     {"my-bu-u-nu-mymr", "my-MM-u-nu-mymr"},
6633     {"yuu-ru", "yug-RU"},
6634 };
6635 
6636 
TestLangAndRegionCanonicalize(void)6637 static void TestLangAndRegionCanonicalize(void) {
6638     char locale[256];
6639     char canonical[256];
6640     int32_t i;
6641     UErrorCode status;
6642     for (i = 0; i < UPRV_LENGTHOF(langtag_to_canonical); i++) {
6643         status = U_ZERO_ERROR;
6644         const char* input = langtag_to_canonical[i].input;
6645         uloc_forLanguageTag(input, locale, sizeof(locale), NULL, &status);
6646         uloc_toLanguageTag(locale, canonical, sizeof(canonical), true, &status);
6647         if (U_FAILURE(status)) {
6648             log_err_status(status, "Error returned by uloc_forLanguageTag or uloc_toLanguageTag "
6649                            "for language tag [%s] - error: %s\n", input, u_errorName(status));
6650         } else {
6651             const char* expected_canonical = langtag_to_canonical[i].canonical;
6652             if (uprv_strcmp(expected_canonical, canonical) != 0) {
6653                 log_data_err("input language tag [%s] is canonicalized to [%s] - expected: [%s]\n",
6654                     input, canonical, expected_canonical);
6655             }
6656         }
6657     }
6658 }
6659 
TestToUnicodeLocaleKey(void)6660 static void TestToUnicodeLocaleKey(void)
6661 {
6662     /* $IN specifies the result should be the input pointer itself */
6663     static const char* DATA[][2] = {
6664         {"calendar",    "ca"},
6665         {"CALEndar",    "ca"},  /* difference casing */
6666         {"ca",          "ca"},  /* bcp key itself */
6667         {"kv",          "kv"},  /* no difference between legacy and bcp */
6668         {"foo",         NULL},  /* unknown, bcp ill-formed */
6669         {"ZZ",          "$IN"}, /* unknown, bcp well-formed -  */
6670         {NULL,          NULL}
6671     };
6672 
6673     int32_t i;
6674     for (i = 0; DATA[i][0] != NULL; i++) {
6675         const char* keyword = DATA[i][0];
6676         const char* expected = DATA[i][1];
6677         const char* bcpKey = NULL;
6678 
6679         bcpKey = uloc_toUnicodeLocaleKey(keyword);
6680         if (expected == NULL) {
6681             if (bcpKey != NULL) {
6682                 log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", keyword, bcpKey);
6683             }
6684         } else if (bcpKey == NULL) {
6685             log_data_err("toUnicodeLocaleKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
6686         } else if (uprv_strcmp(expected, "$IN") == 0) {
6687             if (bcpKey != keyword) {
6688                 log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, bcpKey, keyword);
6689             }
6690         } else if (uprv_strcmp(bcpKey, expected) != 0) {
6691             log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=%s\n", keyword, bcpKey, expected);
6692         }
6693     }
6694 }
6695 
TestBug20321UnicodeLocaleKey(void)6696 static void TestBug20321UnicodeLocaleKey(void)
6697 {
6698     // key = alphanum alpha ;
6699     static const char* invalid[] = {
6700         "a0",
6701         "00",
6702         "a@",
6703         "0@",
6704         "@a",
6705         "@a",
6706         "abc",
6707         "0bc",
6708     };
6709     for (int i = 0; i < UPRV_LENGTHOF(invalid); i++) {
6710         const char* bcpKey = NULL;
6711         bcpKey = uloc_toUnicodeLocaleKey(invalid[i]);
6712         if (bcpKey != NULL) {
6713             log_err("toUnicodeLocaleKey: keyword=%s => %s, expected=NULL\n", invalid[i], bcpKey);
6714         }
6715     }
6716     static const char* valid[] = {
6717         "aa",
6718         "0a",
6719     };
6720     for (int i = 0; i < UPRV_LENGTHOF(valid); i++) {
6721         const char* bcpKey = NULL;
6722         bcpKey = uloc_toUnicodeLocaleKey(valid[i]);
6723         if (bcpKey == NULL) {
6724             log_err("toUnicodeLocaleKey: keyword=%s => NULL, expected!=NULL\n", valid[i]);
6725         }
6726     }
6727 }
6728 
TestToLegacyKey(void)6729 static void TestToLegacyKey(void)
6730 {
6731     /* $IN specifies the result should be the input pointer itself */
6732     static const char* DATA[][2] = {
6733         {"kb",          "colbackwards"},
6734         {"kB",          "colbackwards"},    /* different casing */
6735         {"Collation",   "collation"},   /* keyword itself with different casing */
6736         {"kv",          "kv"},  /* no difference between legacy and bcp */
6737         {"foo",         "$IN"}, /* unknown, bcp ill-formed */
6738         {"ZZ",          "$IN"}, /* unknown, bcp well-formed */
6739         {"e=mc2",       NULL},  /* unknown, bcp/legacy ill-formed */
6740         {NULL,          NULL}
6741     };
6742 
6743     int32_t i;
6744     for (i = 0; DATA[i][0] != NULL; i++) {
6745         const char* keyword = DATA[i][0];
6746         const char* expected = DATA[i][1];
6747         const char* legacyKey = NULL;
6748 
6749         legacyKey = uloc_toLegacyKey(keyword);
6750         if (expected == NULL) {
6751             if (legacyKey != NULL) {
6752                 log_err("toLegacyKey: keyword=%s => %s, expected=NULL\n", keyword, legacyKey);
6753             }
6754         } else if (legacyKey == NULL) {
6755             log_err("toLegacyKey: keyword=%s => NULL, expected=%s\n", keyword, expected);
6756         } else if (uprv_strcmp(expected, "$IN") == 0) {
6757             if (legacyKey != keyword) {
6758                 log_err("toLegacyKey: keyword=%s => %s, expected=%s(input pointer)\n", keyword, legacyKey, keyword);
6759             }
6760         } else if (uprv_strcmp(legacyKey, expected) != 0) {
6761             log_data_err("toUnicodeLocaleKey: keyword=%s, %s, expected=%s\n", keyword, legacyKey, expected);
6762         }
6763     }
6764 }
6765 
TestToUnicodeLocaleType(void)6766 static void TestToUnicodeLocaleType(void)
6767 {
6768     /* $IN specifies the result should be the input pointer itself */
6769     static const char* DATA[][3] = {
6770         {"tz",              "Asia/Kolkata",     "inccu"},
6771         {"calendar",        "gregorian",        "gregory"},
6772         {"ca",              "gregorian",        "gregory"},
6773         {"ca",              "Gregorian",        "gregory"},
6774         {"ca",              "buddhist",         "buddhist"},
6775         {"Calendar",        "Japanese",         "japanese"},
6776         {"calendar",        "Islamic-Civil",    "islamic-civil"},
6777         {"calendar",        "islamicc",         "islamic-civil"},   /* bcp type alias */
6778         {"colalternate",    "NON-IGNORABLE",    "noignore"},
6779         {"colcaselevel",    "yes",              "true"},
6780         {"rg",              "GBzzzz",           "$IN"},
6781         {"tz",              "america/new_york", "usnyc"},
6782         {"tz",              "Asia/Kolkata",     "inccu"},
6783         {"timezone",        "navajo",           "usden"},
6784         {"ca",              "aaaa",             "$IN"},     /* unknown type, well-formed type */
6785         {"ca",              "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
6786         {"zz",              "gregorian",        NULL},      /* unknown key, ill-formed type */
6787         {"co",              "foo-",             NULL},      /* unknown type, ill-formed type */
6788         {"variableTop",     "00A0",             "$IN"},     /* valid codepoints type */
6789         {"variableTop",     "wxyz",             "$IN"},     /* invalid codepoints type - return as is for now */
6790         {"kr",              "space-punct",      "space-punct"}, /* valid reordercode type */
6791         {"kr",              "digit-spacepunct", NULL},      /* invalid (bcp ill-formed) reordercode type */
6792         {NULL,              NULL,               NULL}
6793     };
6794 
6795     int32_t i;
6796     for (i = 0; DATA[i][0] != NULL; i++) {
6797         const char* keyword = DATA[i][0];
6798         const char* value = DATA[i][1];
6799         const char* expected = DATA[i][2];
6800         const char* bcpType = NULL;
6801 
6802         bcpType = uloc_toUnicodeLocaleType(keyword, value);
6803         if (expected == NULL) {
6804             if (bcpType != NULL) {
6805                 log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, bcpType);
6806             }
6807         } else if (bcpType == NULL) {
6808             log_data_err("toUnicodeLocaleType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
6809         } else if (uprv_strcmp(expected, "$IN") == 0) {
6810             if (bcpType != value) {
6811                 log_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, bcpType, value);
6812             }
6813         } else if (uprv_strcmp(bcpType, expected) != 0) {
6814             log_data_err("toUnicodeLocaleType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, bcpType, expected);
6815         }
6816     }
6817 }
6818 
TestToLegacyType(void)6819 static void TestToLegacyType(void)
6820 {
6821     /* $IN specifies the result should be the input pointer itself */
6822     static const char* DATA[][3] = {
6823         {"calendar",        "gregory",          "gregorian"},
6824         {"ca",              "gregory",          "gregorian"},
6825         {"ca",              "Gregory",          "gregorian"},
6826         {"ca",              "buddhist",         "buddhist"},
6827         {"Calendar",        "Japanese",         "japanese"},
6828         {"calendar",        "Islamic-Civil",    "islamic-civil"},
6829         {"calendar",        "islamicc",         "islamic-civil"},   /* bcp type alias */
6830         {"colalternate",    "noignore",         "non-ignorable"},
6831         {"colcaselevel",    "true",             "yes"},
6832         {"rg",              "gbzzzz",           "gbzzzz"},
6833         {"tz",              "usnyc",            "America/New_York"},
6834         {"tz",              "inccu",            "Asia/Calcutta"},
6835         {"timezone",        "usden",            "America/Denver"},
6836         {"timezone",        "usnavajo",         "America/Denver"},  /* bcp type alias */
6837         {"colstrength",     "quarternary",      "quaternary"},  /* type alias */
6838         {"ca",              "aaaa",             "$IN"}, /* unknown type */
6839         {"calendar",        "gregory-japanese-islamic", "$IN"}, /* unknown type, well-formed type */
6840         {"zz",              "gregorian",        "$IN"}, /* unknown key, bcp ill-formed type */
6841         {"ca",              "gregorian-calendar",   "$IN"}, /* known key, bcp ill-formed type */
6842         {"co",              "e=mc2",            NULL},  /* known key, ill-formed bcp/legacy type */
6843         {"variableTop",     "00A0",             "$IN"},     /* valid codepoints type */
6844         {"variableTop",     "wxyz",             "$IN"},    /* invalid codepoints type - return as is for now */
6845         {"kr",              "space-punct",      "space-punct"}, /* valid reordercode type */
6846         {"kr",              "digit-spacepunct", "digit-spacepunct"},    /* invalid reordercode type, but ok for legacy syntax */
6847         {NULL,              NULL,               NULL}
6848     };
6849 
6850     int32_t i;
6851     for (i = 0; DATA[i][0] != NULL; i++) {
6852         const char* keyword = DATA[i][0];
6853         const char* value = DATA[i][1];
6854         const char* expected = DATA[i][2];
6855         const char* legacyType = NULL;
6856 
6857         legacyType = uloc_toLegacyType(keyword, value);
6858         if (expected == NULL) {
6859             if (legacyType != NULL) {
6860                 log_err("toLegacyType: keyword=%s, value=%s => %s, expected=NULL\n", keyword, value, legacyType);
6861             }
6862         } else if (legacyType == NULL) {
6863             log_err("toLegacyType: keyword=%s, value=%s => NULL, expected=%s\n", keyword, value, expected);
6864         } else if (uprv_strcmp(expected, "$IN") == 0) {
6865             if (legacyType != value) {
6866                 log_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s(input pointer)\n", keyword, value, legacyType, value);
6867             }
6868         } else if (uprv_strcmp(legacyType, expected) != 0) {
6869             log_data_err("toLegacyType: keyword=%s, value=%s => %s, expected=%s\n", keyword, value, legacyType, expected);
6870         } else {
6871             log_verbose("toLegacyType: keyword=%s, value=%s => %s\n", keyword, value, legacyType);
6872         }
6873     }
6874 }
6875 
6876 
6877 
test_unicode_define(const char * namech,char ch,const char * nameu,UChar uch)6878 static void test_unicode_define(const char *namech, char ch,
6879                                 const char *nameu, UChar uch)
6880 {
6881     UChar asUch[1];
6882     asUch[0]=0;
6883     log_verbose("Testing whether %s[\\x%02x,'%c'] == %s[U+%04X]\n",
6884                 namech, ch,(int)ch, nameu, (int) uch);
6885     u_charsToUChars(&ch, asUch, 1);
6886     if(asUch[0] != uch) {
6887         log_err("FAIL:  %s[\\x%02x,'%c'] maps to U+%04X, but %s = U+%04X\n",
6888                 namech, ch, (int)ch, (int)asUch[0], nameu, (int)uch);
6889     } else {
6890         log_verbose(" .. OK, == U+%04X\n", (int)asUch[0]);
6891     }
6892 }
6893 
checkTerminating(const char * locale,const char * inLocale)6894 static void checkTerminating(const char* locale, const char* inLocale)
6895 {
6896     UErrorCode status = U_ZERO_ERROR;
6897     int32_t preflight_length = uloc_getDisplayName(
6898         locale, inLocale, NULL, 0, &status);
6899     if (status != U_BUFFER_OVERFLOW_ERROR) {
6900         log_err("uloc_getDisplayName(%s, %s) preflight failed",
6901                 locale, inLocale);
6902     }
6903     UChar buff[256];
6904     const UChar sentinel1 = 0x6C38; // 永- a Han unicode as sentinel.
6905     const UChar sentinel2 = 0x92D2; // 鋒- a Han unicode as sentinel.
6906 
6907     // 1. Test when we set the maxResultSize to preflight_length + 1.
6908     // Set sentinel1 in the buff[preflight_length-1] to check it will be
6909     // replaced with display name.
6910     buff[preflight_length-1] = sentinel1;
6911     // Set sentinel2 in the buff[preflight_length] to check it will be
6912     // replaced by null.
6913     buff[preflight_length] = sentinel2;
6914     // It should be properly null terminated at buff[preflight_length].
6915     status = U_ZERO_ERROR;
6916     int32_t length = uloc_getDisplayName(
6917         locale, inLocale, buff, preflight_length + 1, &status);
6918     const char* result = U_SUCCESS(status) ?
6919         aescstrdup(buff, length) : "(undefined when failure)";
6920     if (length != preflight_length) {
6921         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length+1 returns "
6922                 "length %d different from preflight length %d. Returns '%s'\n",
6923                 locale, inLocale, length, preflight_length, result);
6924     }
6925     if (U_ZERO_ERROR != status) {
6926         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length+1 should "
6927                 "set status to U_ZERO_ERROR but got %d %s. Returns %s\n",
6928                 locale, inLocale, status, myErrorName(status), result);
6929     }
6930     if (buff[length-1] == sentinel1) {
6931         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length+1 does "
6932                 "not change memory in the end of buffer while it should. "
6933                 "Returns %s\n",
6934                 locale, inLocale, result);
6935     }
6936     if (buff[length] != 0x0000) {
6937         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length+1 should "
6938                 "null terminate at buff[length] but does not %x. Returns %s\n",
6939                 locale, inLocale, buff[length], result);
6940     }
6941 
6942     // 2. Test when we only set the maxResultSize to preflight_length.
6943 
6944     // Set sentinel1 in the buff[preflight_length-1] to check it will be
6945     // replaced with display name.
6946     buff[preflight_length-1] = sentinel1;
6947     // Set sentinel2 in the buff[preflight_length] to check it won't be replaced
6948     // by null.
6949     buff[preflight_length] = sentinel2;
6950     status = U_ZERO_ERROR;
6951     length = uloc_getDisplayName(
6952         locale, inLocale, buff, preflight_length, &status);
6953     result = U_SUCCESS(status) ?
6954         aescstrdup(buff, length) : "(undefined when failure)";
6955 
6956     if (length != preflight_length) {
6957         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length return "
6958                 "length %d different from preflight length %d. Returns '%s'\n",
6959                 locale, inLocale, length, preflight_length, result);
6960     }
6961     if (U_STRING_NOT_TERMINATED_WARNING != status) {
6962         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length should "
6963                 "set status to U_STRING_NOT_TERMINATED_WARNING but got %d %s. "
6964                 "Returns %s\n",
6965                 locale, inLocale, status, myErrorName(status), result);
6966     }
6967     if (buff[length-1] == sentinel1) {
6968         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length does not "
6969                 "change memory in the end of buffer while it should. Returns "
6970                 "'%s'\n",
6971                 locale, inLocale, result);
6972     }
6973     if (buff[length] != sentinel2) {
6974         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length change "
6975                 "memory beyond maxResultSize to %x. Returns '%s'\n",
6976                 locale, inLocale, buff[length], result);
6977     }
6978     if (buff[preflight_length - 1] == 0x0000) {
6979         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length null "
6980                 "terminated while it should not. Return '%s'\n",
6981                 locale, inLocale, result);
6982     }
6983 
6984     // 3. Test when we only set the maxResultSize to preflight_length-1.
6985     // Set sentinel1 in the buff[preflight_length-1] to check it will not be
6986     // replaced with display name.
6987     buff[preflight_length-1] = sentinel1;
6988     // Set sentinel2 in the buff[preflight_length] to check it won't be replaced
6989     // by null.
6990     buff[preflight_length] = sentinel2;
6991     status = U_ZERO_ERROR;
6992     length = uloc_getDisplayName(
6993         locale, inLocale, buff, preflight_length - 1, &status);
6994     result = U_SUCCESS(status) ?
6995         aescstrdup(buff, length) : "(undefined when failure)";
6996 
6997     if (length != preflight_length) {
6998         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length-1 return "
6999                 "length %d different from preflight length %d. Returns '%s'\n",
7000                 locale, inLocale, length, preflight_length, result);
7001     }
7002     if (U_BUFFER_OVERFLOW_ERROR != status) {
7003         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length-1 should "
7004                 "set status to U_BUFFER_OVERFLOW_ERROR but got %d %s. "
7005                 "Returns %s\n",
7006                 locale, inLocale, status, myErrorName(status), result);
7007     }
7008     if (buff[length-1] != sentinel1) {
7009         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length-1 should "
7010                 "not change memory in beyond the maxResultSize. Returns '%s'\n",
7011                 locale, inLocale, result);
7012     }
7013     if (buff[length] != sentinel2) {
7014         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length-1 change "
7015                 "memory beyond maxResultSize to %x. Returns '%s'\n",
7016                 locale, inLocale, buff[length], result);
7017     }
7018     if (buff[preflight_length - 2] == 0x0000) {
7019         log_err("uloc_getDisplayName(%s, %s) w/ maxResultSize=length-1 null "
7020                 "terminated while it should not. Return '%s'\n",
7021                 locale, inLocale, result);
7022     }
7023 }
7024 
Test21157CorrectTerminating(void)7025 static void Test21157CorrectTerminating(void) {
7026     checkTerminating("fr", "fr");
7027     checkTerminating("fr_BE", "fr");
7028     checkTerminating("fr_Latn_BE", "fr");
7029     checkTerminating("fr_Latn", "fr");
7030     checkTerminating("fr", "fr");
7031     checkTerminating("fr-CN", "fr");
7032     checkTerminating("fr-Hant-CN", "fr");
7033     checkTerminating("fr-Hant", "fr");
7034     checkTerminating("zh-u-co-pinyin", "fr");
7035 }
7036 
7037 #define TEST_UNICODE_DEFINE(x,y) test_unicode_define(#x, (char)(x), #y, (UChar)(y))
7038 
TestUnicodeDefines(void)7039 static void TestUnicodeDefines(void) {
7040   TEST_UNICODE_DEFINE(ULOC_KEYWORD_SEPARATOR, ULOC_KEYWORD_SEPARATOR_UNICODE);
7041   TEST_UNICODE_DEFINE(ULOC_KEYWORD_ASSIGN, ULOC_KEYWORD_ASSIGN_UNICODE);
7042   TEST_UNICODE_DEFINE(ULOC_KEYWORD_ITEM_SEPARATOR, ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE);
7043 }
7044 
TestIsRightToLeft(void)7045 static void TestIsRightToLeft(void) {
7046     // API test only. More test cases in intltest/LocaleTest.
7047     if(uloc_isRightToLeft("root") || !uloc_isRightToLeft("EN-HEBR")) {
7048         log_err("uloc_isRightToLeft() failed");
7049     }
7050     // ICU-22466 Make sure no crash when locale is bogus
7051     uloc_isRightToLeft(
7052         "uF-Vd_u-VaapoPos-u1-Pos-u1-Pos-u1-Pos-u1-oPos-u1-Pufu1-PuosPos-u1-Pos-u1-Pos-u1-Pzghu1-Pos-u1-PoP-u1@osus-u1");
7053     uloc_isRightToLeft("-Xa");
7054 }
7055 
7056 typedef struct {
7057     const char * badLocaleID;
7058     const char * displayLocale;
7059     const char * expectedName;
7060     UErrorCode   expectedStatus;
7061 } BadLocaleItem;
7062 
7063 static const BadLocaleItem badLocaleItems[] = {
7064     { "-9223372036854775808", "en", "Unknown language (9223372036854775808)", U_USING_DEFAULT_WARNING },
7065     /* add more in the future */
7066     { NULL, NULL, NULL, U_ZERO_ERROR } /* terminator */
7067 };
7068 
7069 enum { kUBufDispNameMax = 128, kBBufDispNameMax = 256 };
7070 
TestBadLocaleIDs(void)7071 static void TestBadLocaleIDs(void) {
7072     const BadLocaleItem* itemPtr;
7073     for (itemPtr = badLocaleItems; itemPtr->badLocaleID != NULL; itemPtr++) {
7074         UChar ubufExpect[kUBufDispNameMax], ubufGet[kUBufDispNameMax];
7075         UErrorCode status = U_ZERO_ERROR;
7076         int32_t ulenExpect = u_unescape(itemPtr->expectedName, ubufExpect, kUBufDispNameMax);
7077         int32_t ulenGet = uloc_getDisplayName(itemPtr->badLocaleID, itemPtr->displayLocale, ubufGet, kUBufDispNameMax, &status);
7078         if (status != itemPtr->expectedStatus ||
7079                 (U_SUCCESS(status) && (ulenGet != ulenExpect || u_strncmp(ubufGet, ubufExpect, ulenExpect) != 0))) {
7080             char bbufExpect[kBBufDispNameMax], bbufGet[kBBufDispNameMax];
7081             u_austrncpy(bbufExpect, ubufExpect, ulenExpect);
7082             u_austrncpy(bbufGet, ubufGet, ulenGet);
7083             log_err("FAIL: For localeID %s, displayLocale %s, calling uloc_getDisplayName:\n"
7084                     "    expected status %-26s, name (len %2d): %s\n"
7085                     "    got      status %-26s, name (len %2d): %s\n",
7086                     itemPtr->badLocaleID, itemPtr->displayLocale,
7087                     u_errorName(itemPtr->expectedStatus), ulenExpect, bbufExpect,
7088                     u_errorName(status), ulenGet, bbufGet );
7089         }
7090     }
7091 }
7092 
7093 // Test case for ICU-20370.
7094 // The issue shows as an Address Sanitizer failure.
TestBug20370(void)7095 static void TestBug20370(void) {
7096     const char *localeID = "x-privatebutreallylongtagfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobarfoobar";
7097     uint32_t lcid = uloc_getLCID(localeID);
7098     if (lcid != 0) {
7099         log_err("FAIL: Expected LCID value of 0 for invalid localeID input.");
7100     }
7101 }
7102 
7103 // Test case for ICU-20149
7104 // Handle the duplicate U extension attribute
TestBug20149(void)7105 static void TestBug20149(void) {
7106     const char *localeID = "zh-u-foo-foo-co-pinyin";
7107     char locale[256];
7108     UErrorCode status = U_ZERO_ERROR;
7109     int32_t parsedLen;
7110     locale[0] = '\0';
7111     uloc_forLanguageTag(localeID, locale, sizeof(locale), &parsedLen, &status);
7112     if (U_FAILURE(status) ||
7113         0 !=strcmp("zh@attribute=foo;collation=pinyin", locale)) {
7114         log_err("ERROR: in uloc_forLanguageTag %s return %s\n", myErrorName(status), locale);
7115     }
7116 }
7117 
7118 #if !UCONFIG_NO_FORMATTING
7119 typedef enum UldnNameType {
7120     TEST_ULDN_LOCALE,
7121     TEST_ULDN_LANGUAGE,
7122     TEST_ULDN_SCRIPT,
7123     TEST_ULDN_REGION,
7124     TEST_ULOC_LOCALE,   // only valid with optStdMidLong
7125     TEST_ULOC_LANGUAGE, // only valid with optStdMidLong
7126     TEST_ULOC_SCRIPT,   // only valid with optStdMidLong
7127     TEST_ULOC_REGION,   // only valid with optStdMidLong
7128 } UldnNameType;
7129 
7130 typedef struct {
7131     const char * localeToName; // NULL to terminate a list of these
7132     UldnNameType nameType;
7133     const UChar * expectResult;
7134 } UldnItem;
7135 
7136 typedef struct {
7137     const char *            displayLocale;
7138     const UDisplayContext * displayOptions; // set of 3 UDisplayContext items
7139     const UldnItem *        testItems;
7140     int32_t                 countItems;
7141 } UldnLocAndOpts;
7142 
7143 static const UDisplayContext optStdMidLong[3] = {UDISPCTX_STANDARD_NAMES, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,    UDISPCTX_LENGTH_FULL};
7144 static const UDisplayContext optStdMidShrt[3] = {UDISPCTX_STANDARD_NAMES, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,    UDISPCTX_LENGTH_SHORT};
7145 static const UDisplayContext optDiaMidLong[3] = {UDISPCTX_DIALECT_NAMES,  UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,    UDISPCTX_LENGTH_FULL};
7146 static const UDisplayContext optDiaMidShrt[3] = {UDISPCTX_DIALECT_NAMES,  UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,    UDISPCTX_LENGTH_SHORT};
7147 
7148 static const UldnItem en_StdMidLong[] = {
7149 	{ "en_US",                  TEST_ULDN_LOCALE, u"English (United States)" },
7150 	{ "en",                     TEST_ULDN_LANGUAGE, u"English" },
7151 	{ "en_US",                  TEST_ULOC_LOCALE, u"English (United States)" },
7152 	{ "en_US",                  TEST_ULOC_LANGUAGE, u"English" },
7153 	{ "en",                     TEST_ULOC_LANGUAGE, u"English" },
7154 	// https://unicode-org.atlassian.net/browse/ICU-20870
7155 	{ "fa_AF",                  TEST_ULDN_LOCALE, u"Persian (Afghanistan)" },
7156 	{ "prs",                    TEST_ULDN_LOCALE, u"Dari" },
7157 	{ "prs_AF",                 TEST_ULDN_LOCALE, u"Dari (Afghanistan)" },
7158 	{ "prs_TJ",                 TEST_ULDN_LOCALE, u"Dari (Tajikistan)" },
7159 	{ "prs",                    TEST_ULDN_LANGUAGE, u"Dari" },
7160 	{ "prs",                    TEST_ULOC_LANGUAGE, u"Dari" },
7161 	// https://unicode-org.atlassian.net/browse/ICU-21742
7162 	{ "ji",                     TEST_ULDN_LOCALE, u"Yiddish" },
7163 	{ "ji_US",                  TEST_ULDN_LOCALE, u"Yiddish (United States)" },
7164 	{ "ji",                     TEST_ULDN_LANGUAGE, u"Yiddish" },
7165 	{ "ji_US",                  TEST_ULOC_LOCALE, u"Yiddish (United States)" },
7166 	{ "ji",                     TEST_ULOC_LANGUAGE, u"Yiddish" },
7167 	// https://unicode-org.atlassian.net/browse/ICU-11563
7168 	{ "mo",                     TEST_ULDN_LOCALE, u"Romanian" },
7169 	{ "mo_MD",                  TEST_ULDN_LOCALE, u"Romanian (Moldova)" },
7170 	{ "mo",                     TEST_ULDN_LANGUAGE, u"Romanian" },
7171 	{ "mo_MD",                  TEST_ULOC_LOCALE, u"Romanian (Moldova)" },
7172 	{ "mo",                     TEST_ULOC_LANGUAGE, u"Romanian" },
7173 };
7174 
7175 static const UldnItem en_StdMidShrt[] = {
7176 	{ "en_US",                  TEST_ULDN_LOCALE, u"English (US)" },
7177 	{ "en",                     TEST_ULDN_LANGUAGE, u"English" },
7178 };
7179 
7180 static const UldnItem en_DiaMidLong[] = {
7181 	{ "en_US",                  TEST_ULDN_LOCALE, u"American English" },
7182 	{ "fa_AF",                  TEST_ULDN_LOCALE, u"Dari" },
7183 	{ "prs",                    TEST_ULDN_LOCALE, u"Dari" },
7184 	{ "prs_AF",                 TEST_ULDN_LOCALE, u"Dari (Afghanistan)" },
7185 	{ "prs_TJ",                 TEST_ULDN_LOCALE, u"Dari (Tajikistan)" },
7186 	{ "prs",                    TEST_ULDN_LANGUAGE, u"Dari" },
7187 	{ "mo",                     TEST_ULDN_LOCALE, u"Romanian" },
7188 	{ "mo",                     TEST_ULDN_LANGUAGE, u"Romanian" },
7189 };
7190 
7191 static const UldnItem en_DiaMidShrt[] = {
7192 	{ "en_US",                  TEST_ULDN_LOCALE, u"US English" },
7193 };
7194 
7195 static const UldnItem ro_StdMidLong[] = { // https://unicode-org.atlassian.net/browse/ICU-11563
7196 	{ "mo",                     TEST_ULDN_LOCALE, u"română" },
7197 	{ "mo_MD",                  TEST_ULDN_LOCALE, u"română (Republica Moldova)" },
7198 	{ "mo",                     TEST_ULDN_LANGUAGE, u"română" },
7199 	{ "mo_MD",                  TEST_ULOC_LOCALE, u"română (Republica Moldova)" },
7200 	{ "mo",                     TEST_ULOC_LANGUAGE, u"română" },
7201 };
7202 
7203 static const UldnItem yi_StdMidLong[] = { // https://unicode-org.atlassian.net/browse/ICU-21742
7204 	{ "ji",                     TEST_ULDN_LOCALE, u"ייִדיש" },
7205 	{ "ji_US",                  TEST_ULDN_LOCALE, u"ייִדיש (פֿאַראייניגטע שטאַטן)" },
7206 	{ "ji",                     TEST_ULDN_LANGUAGE, u"ייִדיש" },
7207 	{ "ji_US",                  TEST_ULOC_LOCALE, u"ייִדיש (פֿאַראייניגטע שטאַטן)" },
7208 	{ "ji",                     TEST_ULOC_LANGUAGE, u"ייִדיש" },
7209 };
7210 
7211 static const UldnItem zh_DiaMidLong[] = {
7212     // zh and zh_Hant both have dialect names for the following in ICU 73
7213     { "ar_001",                 TEST_ULDN_LOCALE, u"现代标准阿拉伯语" },
7214     { "nl_BE",                  TEST_ULDN_LOCALE, u"弗拉芒语" },
7215     { "ro_MD",                  TEST_ULDN_LOCALE, u"摩尔多瓦语" },
7216     // zh has dialect names for the following in ICU 73
7217     { "en_AU",                  TEST_ULDN_LOCALE, u"澳大利亚英语" },
7218     { "en_CA",                  TEST_ULDN_LOCALE, u"加拿大英语" },
7219     { "en_GB",                  TEST_ULDN_LOCALE, u"英国英语" },
7220     { "en_US",                  TEST_ULDN_LOCALE, u"美国英语" },
7221     { "es_419",                 TEST_ULDN_LOCALE, u"拉丁美洲西班牙语" },
7222     { "es_ES",                  TEST_ULDN_LOCALE, u"欧洲西班牙语" },
7223     { "es_MX",                  TEST_ULDN_LOCALE, u"墨西哥西班牙语" },
7224     { "fr_CA",                  TEST_ULDN_LOCALE, u"加拿大法语" },
7225     { "fr_CH",                  TEST_ULDN_LOCALE, u"瑞士法语" },
7226 };
7227 
7228 static const UldnItem zh_Hant_DiaMidLong[] = {
7229     // zh and zh_Hant both have dialect names for the following in ICU 73
7230     { "ar_001",                 TEST_ULDN_LOCALE, u"現代標準阿拉伯文" },
7231     { "nl_BE",                  TEST_ULDN_LOCALE, u"法蘭德斯文" },
7232     { "ro_MD",                  TEST_ULDN_LOCALE, u"摩爾多瓦文" },
7233     // zh_Hant no dialect names for the following in ICU-73,
7234     // use standard name
7235     { "en_AU",                  TEST_ULDN_LOCALE, u"英文(澳洲)" },
7236     { "en_CA",                  TEST_ULDN_LOCALE, u"英文(加拿大)" },
7237     { "en_GB",                  TEST_ULDN_LOCALE, u"英文(英國)" },
7238     { "en_US",                  TEST_ULDN_LOCALE, u"英文(美國)" },
7239     { "es_419",                 TEST_ULDN_LOCALE, u"西班牙文(拉丁美洲)" },
7240     { "es_ES",                  TEST_ULDN_LOCALE, u"西班牙文(西班牙)" },
7241     { "es_MX",                  TEST_ULDN_LOCALE, u"西班牙文(墨西哥)" },
7242     { "fr_CA",                  TEST_ULDN_LOCALE, u"法文(加拿大)" },
7243     { "fr_CH",                  TEST_ULDN_LOCALE, u"法文(瑞士)" },
7244 };
7245 
7246 static const UldnLocAndOpts uldnLocAndOpts[] = {
7247     { "en", optStdMidLong,      en_StdMidLong,      UPRV_LENGTHOF(en_StdMidLong) },
7248     { "en", optStdMidShrt,      en_StdMidShrt,      UPRV_LENGTHOF(en_StdMidShrt) },
7249     { "en", optDiaMidLong,      en_DiaMidLong,      UPRV_LENGTHOF(en_DiaMidLong) },
7250     { "en", optDiaMidShrt,      en_DiaMidShrt,      UPRV_LENGTHOF(en_DiaMidShrt) },
7251     { "ro", optStdMidLong,      ro_StdMidLong,      UPRV_LENGTHOF(ro_StdMidLong) },
7252     { "yi", optStdMidLong,      yi_StdMidLong,      UPRV_LENGTHOF(yi_StdMidLong) },
7253     { "zh", optDiaMidLong,      zh_DiaMidLong,      UPRV_LENGTHOF(zh_DiaMidLong) },
7254     { "zh_Hant", optDiaMidLong, zh_Hant_DiaMidLong, UPRV_LENGTHOF(zh_Hant_DiaMidLong) },
7255     { NULL, NULL, NULL, 0 }
7256 };
7257 
7258 enum { kUNameBuf = 128, kBNameBuf = 256 };
7259 
TestUldnNameVariants(void)7260 static void TestUldnNameVariants(void) {
7261     const UldnLocAndOpts * uloPtr;
7262     for (uloPtr = uldnLocAndOpts; uloPtr->displayLocale != NULL; uloPtr++) {
7263         UErrorCode status = U_ZERO_ERROR;
7264         ULocaleDisplayNames * uldn = uldn_openForContext(uloPtr->displayLocale, (UDisplayContext*)uloPtr->displayOptions, 3, &status);
7265         if (U_FAILURE(status)) {
7266             log_data_err("uldn_openForContext fails, displayLocale %s, contexts %03X %03X %03X: %s - Are you missing data?\n",
7267                     uloPtr->displayLocale, uloPtr->displayOptions[0], uloPtr->displayOptions[1], uloPtr->displayOptions[2],
7268                     u_errorName(status) );
7269             continue;
7270         }
7271         // API coverage: Expect to get back the dialect handling which is
7272         // the first item in the displayOptions test data.
7273         UDialectHandling dh = uldn_getDialectHandling(uldn);
7274         UDisplayContext dhContext = (UDisplayContext)dh;  // same numeric values
7275         if (dhContext != uloPtr->displayOptions[0]) {
7276             log_err("uldn_getDialectHandling()=%03X != expected UDisplayContext %03X\n",
7277                     dhContext, uloPtr->displayOptions[0]);
7278         }
7279         const UldnItem * itemPtr = uloPtr->testItems;
7280         int32_t itemCount = uloPtr->countItems;
7281         for (; itemCount-- > 0; itemPtr++) {
7282             UChar uget[kUNameBuf];
7283             int32_t ulenget, ulenexp;
7284             const char* typeString;
7285             status = U_ZERO_ERROR;
7286             switch (itemPtr->nameType) {
7287                 case TEST_ULDN_LOCALE:
7288                     ulenget = uldn_localeDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status);
7289                     typeString = "uldn_localeDisplayName";
7290                     break;
7291                 case TEST_ULDN_LANGUAGE:
7292                     ulenget = uldn_languageDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status);
7293                     typeString = "uldn_languageDisplayName";
7294                   break;
7295                 case TEST_ULDN_SCRIPT:
7296                     ulenget = uldn_scriptDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status);
7297                     typeString = "uldn_scriptDisplayName";
7298                     break;
7299                 case TEST_ULDN_REGION:
7300                     ulenget = uldn_regionDisplayName(uldn, itemPtr->localeToName, uget, kUNameBuf, &status);
7301                     typeString = "uldn_regionDisplayName";
7302                     break;
7303                 case TEST_ULOC_LOCALE:
7304                     ulenget = uloc_getDisplayName(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status);
7305                     typeString = "uloc_getDisplayName";
7306                     break;
7307                 case TEST_ULOC_LANGUAGE:
7308                     ulenget = uloc_getDisplayLanguage(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status);
7309                     typeString = "uloc_getDisplayLanguage";
7310                     break;
7311                 case TEST_ULOC_SCRIPT:
7312                     ulenget = uloc_getDisplayScript(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status);
7313                     typeString = "uloc_getDisplayScript";
7314                     break;
7315                 case TEST_ULOC_REGION:
7316                     ulenget = uloc_getDisplayCountry(itemPtr->localeToName, uloPtr->displayLocale, uget, kUNameBuf, &status);
7317                     typeString = "uloc_getDisplayCountry";
7318                     break;
7319                 default:
7320                     continue;
7321             }
7322             if (U_FAILURE(status)) {
7323                 log_data_err("%s fails, displayLocale %s, contexts %03X %03X %03X, localeToName %s: %s\n",
7324                         typeString, uloPtr->displayLocale, uloPtr->displayOptions[0], uloPtr->displayOptions[1], uloPtr->displayOptions[2],
7325                         itemPtr->localeToName, u_errorName(status) );
7326                 continue;
7327             }
7328             ulenexp = u_strlen(itemPtr->expectResult);
7329             if (ulenget != ulenexp || u_strncmp(uget, itemPtr->expectResult, ulenexp) != 0) {
7330                 char bexp[kBNameBuf], bget[kBNameBuf];
7331                 u_strToUTF8(bexp, kBNameBuf, NULL, itemPtr->expectResult, ulenexp, &status);
7332                 u_strToUTF8(bget, kBNameBuf, NULL, uget, ulenget, &status);
7333                 log_data_err("%s fails, displayLocale %s, contexts %03X %03X %03X, localeToName %s:\n    expect %2d: %s\n    get    %2d: %s\n",
7334                         typeString, uloPtr->displayLocale, uloPtr->displayOptions[0], uloPtr->displayOptions[1], uloPtr->displayOptions[2],
7335                         itemPtr->localeToName, ulenexp, bexp, ulenget, bget );
7336             }
7337         }
7338 
7339         uldn_close(uldn);
7340     }
7341 }
7342 #endif
7343 
TestUsingDefaultWarning(void)7344 static void TestUsingDefaultWarning(void) {
7345     UChar buff[256];
7346     char errorOutputBuff[256];
7347     UErrorCode status = U_ZERO_ERROR;
7348     const char* language = "jJj";
7349     int32_t length = uloc_getDisplayLanguage(language, "de", buff, 256, &status);
7350     if (status != U_USING_DEFAULT_WARNING ||
7351         u_strcmp(buff, u"jjj") != 0 ||
7352         length != 3) {
7353         u_UCharsToChars(buff, errorOutputBuff, length+1);
7354         log_err("ERROR: in uloc_getDisplayLanguage %s return len:%d %s with status %d %s\n",
7355                 language, length, errorOutputBuff, status, myErrorName(status));
7356     }
7357 
7358     status = U_ZERO_ERROR;
7359     const char* script = "und-lALA";
7360     length = uloc_getDisplayScript(script, "de", buff, 256, &status);
7361     if (status != U_USING_DEFAULT_WARNING ||
7362         u_strcmp(buff, u"Lala") != 0 ||
7363         length != 4) {
7364         u_UCharsToChars(buff, errorOutputBuff, length+1);
7365         log_err("ERROR: in uloc_getDisplayScript %s return len:%d %s with status %d %s\n",
7366                 script, length, errorOutputBuff, status, myErrorName(status));
7367     }
7368 
7369     status = U_ZERO_ERROR;
7370     const char* region = "und-wt";
7371     length = uloc_getDisplayCountry(region, "de", buff, 256, &status);
7372     if (status != U_USING_DEFAULT_WARNING ||
7373         u_strcmp(buff, u"WT") != 0 ||
7374         length != 2) {
7375         u_UCharsToChars(buff, errorOutputBuff, length+1);
7376         log_err("ERROR: in uloc_getDisplayCountry %s return len:%d %s with status %d %s\n",
7377                 region, length, errorOutputBuff, status, myErrorName(status));
7378     }
7379 
7380     status = U_ZERO_ERROR;
7381     const char* variant = "und-abcde";
7382     length = uloc_getDisplayVariant(variant, "de", buff, 256, &status);
7383     if (status != U_USING_DEFAULT_WARNING ||
7384         u_strcmp(buff, u"ABCDE") != 0 ||
7385         length != 5) {
7386         u_UCharsToChars(buff, errorOutputBuff, length+1);
7387         log_err("ERROR: in uloc_getDisplayVariant %s return len:%d %s with status %d %s\n",
7388                 variant, length, errorOutputBuff, status, myErrorName(status));
7389     }
7390 
7391     status = U_ZERO_ERROR;
7392     const char* keyword = "postCODE";
7393     length = uloc_getDisplayKeyword(keyword, "de", buff, 256, &status);
7394     if (status != U_USING_DEFAULT_WARNING ||
7395         u_strcmp(buff, u"postCODE") != 0 ||
7396         length != 8) {
7397         u_UCharsToChars(buff, errorOutputBuff, length+1);
7398         log_err("ERROR: in uloc_getDisplayKeyword %s return len:%d %s with status %d %s\n",
7399                 keyword, length, errorOutputBuff, status, myErrorName(status));
7400     }
7401 
7402     status = U_ZERO_ERROR;
7403     const char* keyword_value = "de_DE@postCode=fOObAR";
7404     length = uloc_getDisplayKeywordValue(keyword_value, keyword, "de", buff, 256, &status);
7405     if (status != U_USING_DEFAULT_WARNING ||
7406         u_strcmp(buff, u"fOObAR") != 0 ||
7407         length != 6) {
7408         u_UCharsToChars(buff, errorOutputBuff, length+1);
7409         log_err("ERROR: in uloc_getDisplayKeywordValue %s %s return len:%d %s with status %d %s\n",
7410                 keyword_value, keyword, length, errorOutputBuff, status, myErrorName(status));
7411       }
7412 }
7413 
7414 // Test case for ICU-20575
7415 // This test checks if the environment variable LANG is set,
7416 // and if so ensures that both C and C.UTF-8 cause ICU's default locale to be en_US_POSIX.
TestCDefaultLocale(void)7417 static void TestCDefaultLocale(void) {
7418     const char *defaultLocale = uloc_getDefault();
7419     char *env_var = getenv("LANG");
7420     if (env_var == NULL) {
7421       log_verbose("Skipping TestCDefaultLocale test, as the LANG variable is not set.");
7422       return;
7423     }
7424     if (getenv("LC_ALL") != NULL) {
7425       log_verbose("Skipping TestCDefaultLocale test, as the LC_ALL variable is set.");
7426       return;
7427     }
7428     if ((strcmp(env_var, "C") == 0 || strcmp(env_var, "C.UTF-8") == 0) && strcmp(defaultLocale, "en_US_POSIX") != 0) {
7429       log_err("The default locale for LANG=%s should be en_US_POSIX, not %s\n", env_var, defaultLocale);
7430     }
7431 }
7432 
7433 // Test case for ICU-21449
TestBug21449InfiniteLoop(void)7434 static void TestBug21449InfiniteLoop(void) {
7435     UErrorCode status = U_ZERO_ERROR;
7436     const char* invalidLocaleId = RES_PATH_SEPARATOR_S;
7437 
7438     // The issue causes an infinite loop to occur when looking up a non-existent resource for the invalid locale ID,
7439     // so the test is considered passed if the call to the API below returns anything at all.
7440     uloc_getDisplayLanguage(invalidLocaleId, invalidLocaleId, NULL, 0, &status);
7441 }
7442 
7443 // rdar://79296849 and https://unicode-org.atlassian.net/browse/ICU-21639
TestExcessivelyLongIDs(void)7444 static void TestExcessivelyLongIDs(void) {
7445     const char* reallyLongID =
7446         "de-u-cu-eur-em-default-hc-h23-ks-level1-lb-strict-lw-normal-ms-metric"
7447         "-nu-latn-rg-atzzzz-sd-atat1-ss-none-tz-atvie-va-posix";
7448     char minimizedID[ULOC_FULLNAME_CAPACITY];
7449     char maximizedID[ULOC_FULLNAME_CAPACITY];
7450     int32_t actualMinimizedLength = 0;
7451     int32_t actualMaximizedLength = 0;
7452     UErrorCode err = U_ZERO_ERROR;
7453 
7454     actualMinimizedLength = uloc_minimizeSubtags(reallyLongID, minimizedID, ULOC_FULLNAME_CAPACITY, &err);
7455     assertTrue("uloc_minimizeSubtags() with too-small buffer didn't fail as expected",
7456             U_FAILURE(err) && actualMinimizedLength > ULOC_FULLNAME_CAPACITY);
7457 
7458     err = U_ZERO_ERROR;
7459     actualMaximizedLength = uloc_addLikelySubtags(reallyLongID, maximizedID, ULOC_FULLNAME_CAPACITY, &err);
7460     assertTrue("uloc_addLikelySubtags() with too-small buffer didn't fail as expected",
7461             U_FAILURE(err) && actualMaximizedLength > ULOC_FULLNAME_CAPACITY);
7462 
7463     err = U_ZERO_ERROR;
7464     char* realMinimizedID = (char*)uprv_malloc(actualMinimizedLength + 1);
7465     uloc_minimizeSubtags(reallyLongID, realMinimizedID, actualMinimizedLength + 1, &err);
7466     if (assertSuccess("uloc_minimizeSubtags() failed", &err)) {
7467         assertEquals("Wrong result from uloc_minimizeSubtags()",
7468                      "de__POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;"
7469                          "lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna",
7470                      realMinimizedID);
7471     }
7472     uprv_free(realMinimizedID);
7473 
7474     char* realMaximizedID = (char*)uprv_malloc(actualMaximizedLength + 1);
7475     uloc_addLikelySubtags(reallyLongID, realMaximizedID, actualMaximizedLength + 1, &err);
7476     if (assertSuccess("uloc_addLikelySubtags() failed", &err)) {
7477         assertEquals("Wrong result from uloc_addLikelySubtags()",
7478                      "de_Latn_DE_POSIX@colstrength=primary;currency=eur;em=default;hours=h23;lb=strict;"
7479                          "lw=normal;measure=metric;numbers=latn;rg=atzzzz;sd=atat1;ss=none;timezone=Europe/Vienna",
7480                      realMaximizedID);
7481     }
7482     uprv_free(realMaximizedID);
7483 }
7484