• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include <memory>
5 
6 #include "cmemory.h"
7 #include "cstring.h"
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
11 
LocaleBuilderTest()12 LocaleBuilderTest::LocaleBuilderTest()
13 {
14 }
15 
~LocaleBuilderTest()16 LocaleBuilderTest::~LocaleBuilderTest()
17 {
18 }
19 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)20 void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21 {
22     TESTCASE_AUTO_BEGIN;
23     TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24     TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25     TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26     TESTCASE_AUTO(TestLocaleBuilder);
27     TESTCASE_AUTO(TestLocaleBuilderBasic);
28     TESTCASE_AUTO(TestLocaleBuilderBasicWithExtensionsOnDefaultLocale);
29     TESTCASE_AUTO(TestPosixCases);
30     TESTCASE_AUTO(TestSetExtensionOthers);
31     TESTCASE_AUTO(TestSetExtensionPU);
32     TESTCASE_AUTO(TestSetExtensionT);
33     TESTCASE_AUTO(TestSetExtensionU);
34     TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
35     TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
36     TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
37     TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
38     TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
39     TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
40     TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
41     TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
42     TESTCASE_AUTO(TestSetLanguageIllFormed);
43     TESTCASE_AUTO(TestSetLanguageWellFormed);
44     TESTCASE_AUTO(TestSetLocale);
45     TESTCASE_AUTO(TestSetRegionIllFormed);
46     TESTCASE_AUTO(TestSetRegionWellFormed);
47     TESTCASE_AUTO(TestSetScriptIllFormed);
48     TESTCASE_AUTO(TestSetScriptWellFormed);
49     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
50     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
51     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
52     TESTCASE_AUTO(TestSetVariantIllFormed);
53     TESTCASE_AUTO(TestSetVariantWellFormed);
54     TESTCASE_AUTO_END;
55 }
56 
Verify(LocaleBuilder & bld,const char * expected,const char * msg)57 void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
58     UErrorCode status = U_ZERO_ERROR;
59     UErrorCode copyStatus = U_ZERO_ERROR;
60     UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
61     if (bld.copyErrorTo(copyStatus)) {
62         errln(msg, u_errorName(copyStatus));
63     }
64     if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
65         errln("Should always get the previous error and return false");
66     }
67     Locale loc = bld.build(status);
68     if (U_FAILURE(status)) {
69         errln(msg, u_errorName(status));
70     }
71     if (status != copyStatus) {
72         errln(msg, u_errorName(status));
73     }
74     std::string tag = loc.toLanguageTag<std::string>(status);
75     if (U_FAILURE(status)) {
76         errln("loc.toLanguageTag() got Error: %s\n",
77               u_errorName(status));
78     }
79     if (tag != expected) {
80         errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
81     }
82 }
83 
TestLocaleBuilder()84 void LocaleBuilderTest::TestLocaleBuilder() {
85     // The following test data are copy from
86     // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
87     // "L": +1 = language
88     // "S": +1 = script
89     // "R": +1 = region
90     // "V": +1 = variant
91     // "K": +1 = Unicode locale key / +2 = Unicode locale type
92     // "A": +1 = Unicode locale attribute
93     // "E": +1 = extension letter / +2 = extension value
94     // "P": +1 = private use
95     // "U": +1 = ULocale
96     // "B": +1 = BCP47 language tag
97     // "C": Clear all
98     // "N": Clear extensions
99     // "D": +1 = Unicode locale attribute to be removed
100     // "X": indicates an exception must be thrown
101     // "T": +1 = expected language tag / +2 = expected locale string
102     const char* TESTCASES[][14] = {
103         {"L", "en", "R", "us", "T", "en-US", "en_US"},
104         {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
105         {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
106         {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
107         {"L", "123", "X"},
108         {"R", "us", "T", "und-US", "_US"},
109         {"R", "usa", "X"},
110         {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
111         {"R", "123", "L", "it", "R", "", "T", "it", "it"},
112         {"R", "123", "L", "en", "T", "en-123", "en_123"},
113         {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
114         {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
115         {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
116         {"S", "latin", "X"},
117         {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
118         {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
119         {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
120         {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
121         {"V", "123", "X"},
122         {"U", "en_US", "T", "en-US", "en_US"},
123         {"U", "en_US_WIN", "X"},
124         {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
125           "fr-FR-1606nict-u-ca-gregory-x-test",
126           "fr_FR_1606NICT@calendar=gregorian;x=test"},
127         {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
128         {"B", "und-CA", "T", "und-CA", "_CA"},
129         // Blocked by ICU-20327
130         // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
131         // "en_US_VAR@x=test"},
132         {"B", "en-US-VAR", "X"},
133         {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
134           "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
135         {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
136           "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
137         {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
138           "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
139           "ja_JP@attribute=attr1;calendar=gregorian"},
140         {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
141           "en@colnumeric=yes"},
142         {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
143           "th_TH@numbers=thai"},
144         {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
145         {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
146         {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
147         {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
148         {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
149         {"E", "a", "x", "X"},
150         {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
151         // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
152         // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
153         // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
154         // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
155         // key = alphanum alpha
156         {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
157          "en@0a=yes;attribute=aaa-bbb"},
158         {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
159           "fr_FR@x=yoshito-icu"},
160         {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
161           "ja_JP@calendar=japanese"},
162         {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
163           "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
164         {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
165         {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
166           "th@calendar=gregorian;numbers=thai"},
167         {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
168           "en_US@timezone=America/New_York"},
169         {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
170           "true", "T", "de-u-co-phonebk-kk-ks-level1",
171           "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
172         {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
173           "en_US@calendar=gregorian"},
174         {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
175         {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
176         {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
177           "en_US@colnumeric=yes"},
178         {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
179         {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
180         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
181           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
182         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
183           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
184         {"L", "en", "A", "aa", "X"},
185         {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
186     };
187     UErrorCode status = U_ZERO_ERROR;
188     LocaleBuilder bld;
189     for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
190         const char* (&testCase)[14] = TESTCASES[tidx];
191         std::string actions;
192         for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
193              if (testCase[p] == nullptr) {
194                  actions += " (nullptr)";
195                  break;
196              }
197              if (p > 0) actions += " ";
198              actions += testCase[p];
199         }
200         int i = 0;
201         const char* method;
202         status = U_ZERO_ERROR;
203         bld.clear();
204         while (true) {
205             status = U_ZERO_ERROR;
206             UErrorCode copyStatus = U_ZERO_ERROR;
207             method = testCase[i++];
208             if (strcmp("L", method) == 0) {
209                 bld.setLanguage(testCase[i++]);
210                 bld.copyErrorTo(copyStatus);
211                 bld.build(status);
212             } else if (strcmp("S", method) == 0) {
213                 bld.setScript(testCase[i++]);
214                 bld.copyErrorTo(copyStatus);
215                 bld.build(status);
216             } else if (strcmp("R", method) == 0) {
217                 bld.setRegion(testCase[i++]);
218                 bld.copyErrorTo(copyStatus);
219                 bld.build(status);
220             } else if (strcmp("V", method) == 0) {
221                 bld.setVariant(testCase[i++]);
222                 bld.copyErrorTo(copyStatus);
223                 bld.build(status);
224             } else if (strcmp("K", method) == 0) {
225                 const char* key = testCase[i++];
226                 const char* type = testCase[i++];
227                 bld.setUnicodeLocaleKeyword(key, type);
228                 bld.copyErrorTo(copyStatus);
229                 bld.build(status);
230             } else if (strcmp("A", method) == 0) {
231                 bld.addUnicodeLocaleAttribute(testCase[i++]);
232                 bld.copyErrorTo(copyStatus);
233                 bld.build(status);
234             } else if (strcmp("E", method) == 0) {
235                 const char* key = testCase[i++];
236                 const char* value = testCase[i++];
237                 bld.setExtension(key[0], value);
238                 bld.copyErrorTo(copyStatus);
239                 bld.build(status);
240             } else if (strcmp("P", method) == 0) {
241                 bld.setExtension('x', testCase[i++]);
242                 bld.copyErrorTo(copyStatus);
243                 bld.build(status);
244             } else if (strcmp("U", method) == 0) {
245                 bld.setLocale(Locale(testCase[i++]));
246                 bld.copyErrorTo(copyStatus);
247                 bld.build(status);
248             } else if (strcmp("B", method) == 0) {
249                 bld.setLanguageTag(testCase[i++]);
250                 bld.copyErrorTo(copyStatus);
251                 bld.build(status);
252             }
253             // clear / remove
254             else if (strcmp("C", method) == 0) {
255                 bld.clear();
256                 bld.copyErrorTo(copyStatus);
257                 bld.build(status);
258             } else if (strcmp("N", method) == 0) {
259                 bld.clearExtensions();
260                 bld.copyErrorTo(copyStatus);
261                 bld.build(status);
262             } else if (strcmp("D", method) == 0) {
263                 bld.removeUnicodeLocaleAttribute(testCase[i++]);
264                 bld.copyErrorTo(copyStatus);
265                 bld.build(status);
266             }
267             // result
268             else if (strcmp("X", method) == 0) {
269                 if (U_SUCCESS(status)) {
270                     errln("FAIL: No error return - test case: %s", actions.c_str());
271                 }
272             } else if (strcmp("T", method) == 0) {
273                 status = U_ZERO_ERROR;
274                 Locale loc = bld.build(status);
275                 if (status != copyStatus) {
276                     errln("copyErrorTo not matching");
277                 }
278                 if (U_FAILURE(status) ||
279                     strcmp(loc.getName(), testCase[i + 1]) != 0) {
280                     errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
281                             " for test case: ", actions.c_str());
282                 }
283                 std::string langtag = loc.toLanguageTag<std::string>(status);
284                 if (U_FAILURE(status) || langtag != testCase[i]) {
285                     errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
286                             " for test case: ", actions.c_str());
287                 }
288                 break;
289             } else {
290                 // Unknown test method
291                 errln("Unknown test case method: There is an error in the test case data.");
292                 break;
293             }
294             if (status != copyStatus) {
295                 errln("copyErrorTo not matching");
296             }
297             if (U_FAILURE(status)) {
298                 if (strcmp("X", testCase[i]) == 0) {
299                     // This failure is expected
300                     break;
301                 } else {
302                     errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
303                           " in test case: ", actions.c_str());
304                     break;
305                 }
306             }
307             if (strcmp("T", method) == 0) {
308                 break;
309             }
310         }  // while(true)
311     }  // for TESTCASES
312 }
313 
TestLocaleBuilderBasic()314 void LocaleBuilderTest::TestLocaleBuilderBasic() {
315     LocaleBuilder bld;
316     bld.setLanguage("zh");
317     Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
318 
319     bld.setScript("Hant");
320     Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
321 
322     bld.setRegion("SG");
323     Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
324 
325     bld.setRegion("HK");
326     bld.setScript("Hans");
327     Verify(bld, "zh-Hans-HK",
328            "setRegion('HK') and setScript('Hans') got Error: %s\n");
329 
330     bld.setVariant("revised");
331     Verify(bld, "zh-Hans-HK-revised",
332            "setVariant('revised') got Error: %s\n");
333 
334     bld.setUnicodeLocaleKeyword("nu", "thai");
335     Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
336            "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
337 
338     bld.setUnicodeLocaleKeyword("co", "pinyin");
339     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
340            "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
341 
342     bld.setUnicodeLocaleKeyword("nu", "latn");
343     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
344            "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
345 
346     bld.setUnicodeLocaleKeyword("nu", nullptr);
347     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
348            "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
349 
350     bld.setUnicodeLocaleKeyword("co", nullptr);
351     Verify(bld, "zh-Hans-HK-revised",
352            "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
353 
354     bld.setScript("");
355     Verify(bld, "zh-HK-revised",
356            "setScript('') got Error: %s\n");
357 
358     bld.setVariant("");
359     Verify(bld, "zh-HK",
360            "setVariant('') got Error: %s\n");
361 
362     bld.setRegion("");
363     Verify(bld, "zh",
364            "setRegion('') got Error: %s\n");
365 }
366 
TestLocaleBuilderBasicWithExtensionsOnDefaultLocale()367 void LocaleBuilderTest::TestLocaleBuilderBasicWithExtensionsOnDefaultLocale() {
368     // Change the default locale to one with extension tags.
369     UErrorCode status = U_ZERO_ERROR;
370     Locale originalDefault;
371     Locale::setDefault(Locale::createFromName("en-US-u-hc-h12"), status);
372     if (U_FAILURE(status)) {
373         errln("ERROR: Could not change the default locale");
374         return;
375     }
376 
377     // Invoke the basic test now that the default locale has been changed.
378     TestLocaleBuilderBasic();
379 
380     Locale::setDefault(originalDefault, status);
381     if (U_FAILURE(status)) {
382         errln("ERROR: Could not restore the default locale");
383     }
384 }
385 
TestSetLanguageWellFormed()386 void LocaleBuilderTest::TestSetLanguageWellFormed() {
387     // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
388     // unicode_language_subtag = alpha{2,3} | alpha{5,8};
389     // ICUTC decided also support alpha{4}
390     static const char* wellFormedLanguages[] = {
391         "",
392 
393         // alpha{2}
394         "en",
395         "NE",
396         "eN",
397         "Ne",
398 
399         // alpha{3}
400         "aNe",
401         "zzz",
402         "AAA",
403 
404         // alpha{4}
405         "ABCD",
406         "abcd",
407 
408         // alpha{5}
409         "efgij",
410         "AbCAD",
411         "ZAASD",
412 
413         // alpha{6}
414         "efgijk",
415         "AADGFE",
416         "AkDfFz",
417 
418         // alpha{7}
419         "asdfads",
420         "ADSFADF",
421         "piSFkDk",
422 
423         // alpha{8}
424         "oieradfz",
425         "IADSFJKR",
426         "kkDSFJkR",
427     };
428     for (const char* lang : wellFormedLanguages) {
429         UErrorCode status = U_ZERO_ERROR;
430         LocaleBuilder bld;
431         bld.setLanguage(lang);
432         Locale loc = bld.build(status);
433         if (U_FAILURE(status)) {
434             errln("setLanguage(\"%s\") got Error: %s\n",
435                   lang, u_errorName(status));
436         }
437     }
438 }
439 
TestSetLanguageIllFormed()440 void LocaleBuilderTest::TestSetLanguageIllFormed() {
441     static const char* illFormed[] = {
442         "a",
443         "z",
444         "A",
445         "F",
446         "2",
447         "0",
448         "9",
449         "{",
450         ".",
451         "[",
452         "]",
453         "\\",
454 
455         "e1",
456         "N2",
457         "3N",
458         "4e",
459         "e:",
460         "43",
461         "a9",
462 
463         "aN0",
464         "z1z",
465         "2zz",
466         "3A3",
467         "456",
468         "af)",
469 
470         // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
471         // "latn",
472         // "Arab",
473         // "LATN",
474 
475         "e)gij",
476         "Ab3AD",
477         "ZAAS8",
478 
479         "efgi[]",
480         "AA9GFE",
481         "7kD3Fz",
482         "as8fads",
483         "0DSFADF",
484         "'iSFkDk",
485 
486         "oieradf+",
487         "IADSFJK-",
488         "kkDSFJk0",
489 
490         // alpha{9}
491         "oieradfab",
492         "IADSFJKDE",
493         "kkDSFJkzf",
494     };
495     for (const char* ill : illFormed) {
496         UErrorCode status = U_ZERO_ERROR;
497         LocaleBuilder bld;
498         bld.setLanguage(ill);
499         Locale loc = bld.build(status);
500         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
501             errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
502         }
503     }
504 }
505 
TestSetScriptWellFormed()506 void LocaleBuilderTest::TestSetScriptWellFormed() {
507     // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
508     // unicode_script_subtag = alpha{4} ;
509     static const char* wellFormedScripts[] = {
510         "",
511 
512         "Latn",
513         "latn",
514         "lATN",
515         "laTN",
516         "arBN",
517         "ARbn",
518         "adsf",
519         "aADF",
520         "BSVS",
521         "LATn",
522     };
523     for (const char* script : wellFormedScripts) {
524         UErrorCode status = U_ZERO_ERROR;
525         LocaleBuilder bld;
526         bld.setScript(script);
527         Locale loc = bld.build(status);
528         if (U_FAILURE(status)) {
529             errln("setScript(\"%s\") got Error: %s\n",
530                   script, u_errorName(status));
531         }
532     }
533 }
534 
TestSetScriptIllFormed()535 void LocaleBuilderTest::TestSetScriptIllFormed() {
536     static const char* illFormed[] = {
537         "a",
538         "z",
539         "A",
540         "F",
541         "2",
542         "0",
543         "9",
544         "{",
545         ".",
546         "[",
547         "]",
548         "\\",
549 
550         "e1",
551         "N2",
552         "3N",
553         "4e",
554         "e:",
555         "43",
556         "a9",
557 
558         "aN0",
559         "z1z",
560         "2zz",
561         "3A3",
562         "456",
563         "af)",
564 
565         "0atn",
566         "l1tn",
567         "lA2N",
568         "la4N",
569         "arB5",
570         "1234",
571 
572         "e)gij",
573         "Ab3AD",
574         "ZAAS8",
575 
576         "efgi[]",
577         "AA9GFE",
578         "7kD3Fz",
579 
580         "as8fads",
581         "0DSFADF",
582         "'iSFkDk",
583 
584         "oieradf+",
585         "IADSFJK-",
586         "kkDSFJk0",
587 
588         // alpha{9}
589         "oieradfab",
590         "IADSFJKDE",
591         "kkDSFJkzf",
592     };
593     for (const char* ill : illFormed) {
594         UErrorCode status = U_ZERO_ERROR;
595         LocaleBuilder bld;
596         bld.setScript(ill);
597         Locale loc = bld.build(status);
598         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
599             errln("setScript(\"%s\") should fail but has no Error\n", ill);
600         }
601     }
602 }
603 
TestSetRegionWellFormed()604 void LocaleBuilderTest::TestSetRegionWellFormed() {
605     // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
606     // unicode_region_subtag = (alpha{2} | digit{3})
607     static const char* wellFormedRegions[] = {
608         "",
609 
610         // alpha{2}
611         "en",
612         "NE",
613         "eN",
614         "Ne",
615 
616         // digit{3}
617         "000",
618         "999",
619         "123",
620         "987"
621     };
622     for (const char* region : wellFormedRegions) {
623         UErrorCode status = U_ZERO_ERROR;
624         LocaleBuilder bld;
625         bld.setRegion(region);
626         Locale loc = bld.build(status);
627         if (U_FAILURE(status)) {
628             errln("setRegion(\"%s\") got Error: %s\n",
629                   region, u_errorName(status));
630         }
631     }
632 }
633 
TestSetRegionIllFormed()634 void LocaleBuilderTest::TestSetRegionIllFormed() {
635     static const char* illFormed[] = {
636         "a",
637         "z",
638         "A",
639         "F",
640         "2",
641         "0",
642         "9",
643         "{",
644         ".",
645         "[",
646         "]",
647         "\\",
648 
649         "e1",
650         "N2",
651         "3N",
652         "4e",
653         "e:",
654         "43",
655         "a9",
656 
657         "aN0",
658         "z1z",
659         "2zz",
660         "3A3",
661         "4.6",
662         "af)",
663 
664         "0atn",
665         "l1tn",
666         "lA2N",
667         "la4N",
668         "arB5",
669         "1234",
670 
671         "e)gij",
672         "Ab3AD",
673         "ZAAS8",
674 
675         "efgi[]",
676         "AA9GFE",
677         "7kD3Fz",
678 
679         "as8fads",
680         "0DSFADF",
681         "'iSFkDk",
682 
683         "oieradf+",
684         "IADSFJK-",
685         "kkDSFJk0",
686 
687         // alpha{9}
688         "oieradfab",
689         "IADSFJKDE",
690         "kkDSFJkzf",
691     };
692     for (const char* ill : illFormed) {
693         UErrorCode status = U_ZERO_ERROR;
694         LocaleBuilder bld;
695         bld.setRegion(ill);
696         Locale loc = bld.build(status);
697         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
698             errln("setRegion(\"%s\") should fail but has no Error\n", ill);
699         }
700     }
701 }
702 
TestSetVariantWellFormed()703 void LocaleBuilderTest::TestSetVariantWellFormed() {
704     // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
705     // (sep unicode_variant_subtag)*
706     // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
707     static const char* wellFormedVariants[] = {
708         "",
709 
710         // alphanum{5}
711         "efgij",
712         "AbCAD",
713         "ZAASD",
714         "0AASD",
715         "A1CAD",
716         "ef2ij",
717         "ads3X",
718         "owqF4",
719 
720         // alphanum{6}
721         "efgijk",
722         "AADGFE",
723         "AkDfFz",
724         "0ADGFE",
725         "A9DfFz",
726         "AADG7E",
727 
728         // alphanum{7}
729         "asdfads",
730         "ADSFADF",
731         "piSFkDk",
732         "a0dfads",
733         "ADSF3DF",
734         "piSFkD9",
735 
736         // alphanum{8}
737         "oieradfz",
738         "IADSFJKR",
739         "kkDSFJkR",
740         "0ADSFJKR",
741         "12345679",
742 
743         // digit alphanum{3}
744         "0123",
745         "1abc",
746         "20EF",
747         "30EF",
748         "8A03",
749         "3Ax3",
750         "9Axy",
751 
752         // (sep unicode_variant_subtag)*
753         "0123-4567",
754         "0ab3-ABCDE",
755         "9ax3-xByD9",
756         "9ax3-xByD9-adfk934a",
757 
758         "0123_4567",
759         "0ab3_ABCDE",
760         "9ax3_xByD9",
761         "9ax3_xByD9_adfk934a",
762 
763         "9ax3-xByD9_adfk934a",
764         "9ax3_xByD9-adfk934a",
765     };
766     for (const char* variant : wellFormedVariants) {
767         UErrorCode status = U_ZERO_ERROR;
768         LocaleBuilder bld;
769         bld.setVariant(variant);
770         Locale loc = bld.build(status);
771         if (U_FAILURE(status)) {
772             errln("setVariant(\"%s\") got Error: %s\n",
773                   variant, u_errorName(status));
774         }
775     }
776 }
777 
TestSetVariantIllFormed()778 void LocaleBuilderTest::TestSetVariantIllFormed() {
779     static const char* illFormed[] = {
780         "a",
781         "z",
782         "A",
783         "F",
784         "2",
785         "0",
786         "9",
787         "{",
788         ".",
789         "[",
790         "]",
791         "\\",
792 
793         "e1",
794         "N2",
795         "3N",
796         "4e",
797         "e:",
798         "43",
799         "a9",
800         "en",
801         "NE",
802         "eN",
803         "Ne",
804 
805         "aNe",
806         "zzz",
807         "AAA",
808         "aN0",
809         "z1z",
810         "2zz",
811         "3A3",
812         "4.6",
813         "af)",
814         "345",
815         "923",
816 
817         "Latn",
818         "latn",
819         "lATN",
820         "laTN",
821         "arBN",
822         "ARbn",
823         "adsf",
824         "aADF",
825         "BSVS",
826         "LATn",
827         "l1tn",
828         "lA2N",
829         "la4N",
830         "arB5",
831         "abc3",
832         "A3BC",
833 
834         "e)gij",
835         "A+3AD",
836         "ZAA=8",
837 
838         "efgi[]",
839         "AA9]FE",
840         "7k[3Fz",
841 
842         "as8f/ds",
843         "0DSFAD{",
844         "'iSFkDk",
845 
846         "oieradf+",
847         "IADSFJK-",
848         "k}DSFJk0",
849 
850         // alpha{9}
851         "oieradfab",
852         "IADSFJKDE",
853         "kkDSFJkzf",
854         "123456789",
855 
856         "-0123",
857         "-0123-4567",
858         "0123-4567-",
859         "-123-4567",
860         "_0123",
861         "_0123_4567",
862         "0123_4567_",
863         "_123_4567",
864 
865         "-abcde-figjk",
866         "abcde-figjk-",
867         "-abcde-figjk-",
868         "_abcde_figjk",
869         "abcde_figjk_",
870         "_abcde_figjk_",
871     };
872     for (const char* ill : illFormed) {
873         UErrorCode status = U_ZERO_ERROR;
874         LocaleBuilder bld;
875         bld.setVariant(ill);
876         Locale loc = bld.build(status);
877         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
878             errln("setVariant(\"%s\") should fail but has no Error\n", ill);
879         }
880     }
881 }
882 
TestSetUnicodeLocaleKeywordWellFormed()883 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
884     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
885     // keyword = key (sep type)? ;
886     // key = alphanum alpha ;
887     // type = alphanum{3,8} (sep alphanum{3,8})* ;
888     static const char* wellFormed_key_value[] = {
889         "aa", "123",
890         "3b", "zyzbcdef",
891         "0Z", "1ZB30zk9-abc",
892         "cZ", "2ck30zfZ-adsf023-234kcZ",
893         "ZZ", "Lant",
894         "ko", "",
895     };
896     for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
897         UErrorCode status = U_ZERO_ERROR;
898         LocaleBuilder bld;
899         bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
900                                     wellFormed_key_value[i + 1]);
901         Locale loc = bld.build(status);
902         if (U_FAILURE(status)) {
903             errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
904                   wellFormed_key_value[i],
905                   wellFormed_key_value[i + 1],
906                   u_errorName(status));
907         }
908     }
909 }
910 
TestSetUnicodeLocaleKeywordIllFormedKey()911 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
912     static const char* illFormed[] = {
913         "34",
914         "ab-cde",
915         "123",
916         "b3",
917         "zyzabcdef",
918         "Z0",
919     };
920     for (const char* ill : illFormed) {
921         UErrorCode status = U_ZERO_ERROR;
922         LocaleBuilder bld;
923         bld.setUnicodeLocaleKeyword(ill, "abc");
924         Locale loc = bld.build(status);
925         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
926             errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
927                   ill);
928         }
929     }
930 }
931 
TestSetUnicodeLocaleKeywordIllFormedValue()932 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
933     static const char* illFormed[] = {
934         "34",
935         "ab-",
936         "-cd",
937         "-ef-",
938         "zyzabcdef",
939         "ab-abc",
940         "1ZB30zfk9-abc",
941         "2ck30zfk9-adsf023-234kcZ",
942     };
943     for (const char* ill : illFormed) {
944         UErrorCode status = U_ZERO_ERROR;
945         LocaleBuilder bld;
946         bld.setUnicodeLocaleKeyword("ab", ill);
947         Locale loc = bld.build(status);
948         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
949             errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
950                   ill);
951         }
952     }
953 }
954 
TestAddRemoveUnicodeLocaleAttribute()955 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
956     LocaleBuilder bld;
957     UErrorCode status = U_ZERO_ERROR;
958     Locale loc = bld.setLanguage("fr")
959                     .addUnicodeLocaleAttribute("abc")
960                     .addUnicodeLocaleAttribute("aBc")
961                     .addUnicodeLocaleAttribute("EFG")
962                     .addUnicodeLocaleAttribute("efghi")
963                     .addUnicodeLocaleAttribute("efgh")
964                     .addUnicodeLocaleAttribute("efGhi")
965                     .addUnicodeLocaleAttribute("EFg")
966                     .addUnicodeLocaleAttribute("hijk")
967                     .addUnicodeLocaleAttribute("EFG")
968                     .addUnicodeLocaleAttribute("HiJK")
969                     .addUnicodeLocaleAttribute("aBc")
970                     .build(status);
971     if (U_FAILURE(status)) {
972         errln("addUnicodeLocaleAttribute() got Error: %s\n",
973               u_errorName(status));
974     }
975     std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
976     std::string actual = loc.toLanguageTag<std::string>(status);
977     if (U_FAILURE(status) || expected != actual) {
978         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
979     }
980 
981     // remove "efgh" in the middle with different casing.
982     loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
983     if (U_FAILURE(status)) {
984         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
985               u_errorName(status));
986     }
987     expected = "fr-u-abc-efg-efghi-hijk";
988     actual = loc.toLanguageTag<std::string>(status);
989     if (U_FAILURE(status) || expected != actual) {
990         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
991     }
992 
993     // remove non-existing attributes.
994     loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
995     if (U_FAILURE(status)) {
996         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
997               u_errorName(status));
998     }
999     actual = loc.toLanguageTag<std::string>(status);
1000     if (U_FAILURE(status) || expected != actual) {
1001         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1002     }
1003 
1004     // remove "abc" in the beginning with different casing.
1005     loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
1006     if (U_FAILURE(status)) {
1007         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1008               u_errorName(status));
1009     }
1010     expected = "fr-u-efg-efghi-hijk";
1011     actual = loc.toLanguageTag<std::string>(status);
1012     if (U_FAILURE(status) || expected != actual) {
1013         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1014     }
1015 
1016     // remove non-existing substring in the end.
1017     loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
1018     if (U_FAILURE(status)) {
1019         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1020               u_errorName(status));
1021     }
1022     actual = loc.toLanguageTag<std::string>(status);
1023     if (U_FAILURE(status) || expected != actual) {
1024         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1025     }
1026 
1027     // remove "hijk" in the end with different casing.
1028     loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
1029     if (U_FAILURE(status)) {
1030         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1031               u_errorName(status));
1032     }
1033     expected = "fr-u-efg-efghi";
1034     actual = loc.toLanguageTag<std::string>(status);
1035     if (U_FAILURE(status) || expected != actual) {
1036         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1037     }
1038 
1039     // remove "efghi" in the end with different casing.
1040     loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
1041     if (U_FAILURE(status)) {
1042         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1043               u_errorName(status));
1044     }
1045     expected = "fr-u-efg";
1046     actual = loc.toLanguageTag<std::string>(status);
1047     if (U_FAILURE(status) || expected != actual) {
1048         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1049     }
1050 
1051     // remove "efg" in as the only one, with different casing.
1052     loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
1053     if (U_FAILURE(status)) {
1054         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1055               u_errorName(status));
1056     }
1057     expected = "fr";
1058     actual = loc.toLanguageTag<std::string>(status);
1059     if (U_FAILURE(status) || expected != actual) {
1060         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1061     }
1062 
1063 }
1064 
TestAddRemoveUnicodeLocaleAttributeWellFormed()1065 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1066     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1067     // attribute = alphanum{3,8} ;
1068     static const char* wellFormedAttributes[] = {
1069         // alphanum{3}
1070         "AbC",
1071         "ZAA",
1072         "0AA",
1073         "x3A",
1074         "xa8",
1075 
1076         // alphanum{4}
1077         "AbCA",
1078         "ZASD",
1079         "0ASD",
1080         "A3a4",
1081         "zK90",
1082 
1083         // alphanum{5}
1084         "efgij",
1085         "AbCAD",
1086         "ZAASD",
1087         "0AASD",
1088         "A1CAD",
1089         "ef2ij",
1090         "ads3X",
1091         "owqF4",
1092 
1093         // alphanum{6}
1094         "efgijk",
1095         "AADGFE",
1096         "AkDfFz",
1097         "0ADGFE",
1098         "A9DfFz",
1099         "AADG7E",
1100 
1101         // alphanum{7}
1102         "asdfads",
1103         "ADSFADF",
1104         "piSFkDk",
1105         "a0dfads",
1106         "ADSF3DF",
1107         "piSFkD9",
1108 
1109         // alphanum{8}
1110         "oieradfz",
1111         "IADSFJKR",
1112         "kkDSFJkR",
1113     };
1114     LocaleBuilder bld;
1115     for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1116         if (i % 5 == 0) {
1117             bld.clear();
1118         }
1119         UErrorCode status = U_ZERO_ERROR;
1120         bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1121         Locale loc = bld.build(status);
1122         if (U_FAILURE(status)) {
1123             errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1124                   wellFormedAttributes[i], u_errorName(status));
1125         }
1126         if (i > 2) {
1127             bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1128             loc = bld.build(status);
1129             if (U_FAILURE(status)) {
1130                 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1131                       wellFormedAttributes[i - 1], u_errorName(status));
1132             }
1133             bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1134             loc = bld.build(status);
1135             if (U_FAILURE(status)) {
1136                 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1137                       wellFormedAttributes[i - 3], u_errorName(status));
1138             }
1139         }
1140     }
1141 }
1142 
TestAddUnicodeLocaleAttributeIllFormed()1143 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1144     static const char* illFormed[] = {
1145         "aa",
1146         "34",
1147         "ab-",
1148         "-cd",
1149         "-ef-",
1150         "zyzabcdef",
1151         "123456789",
1152         "ab-abc",
1153         "1ZB30zfk9-abc",
1154         "2ck30zfk9-adsf023-234kcZ",
1155     };
1156     for (const char* ill : illFormed) {
1157         UErrorCode status = U_ZERO_ERROR;
1158         LocaleBuilder bld;
1159         bld.addUnicodeLocaleAttribute(ill);
1160         Locale loc = bld.build(status);
1161         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1162             errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1163                   ill);
1164         }
1165     }
1166 }
1167 
TestSetExtensionU()1168 void LocaleBuilderTest::TestSetExtensionU() {
1169     LocaleBuilder bld;
1170     bld.setLanguage("zh");
1171     Verify(bld, "zh",
1172            "setLanguage(\"zh\") got Error: %s\n");
1173 
1174     bld.setExtension('u', "co-stroke");
1175     Verify(bld, "zh-u-co-stroke",
1176            "setExtension('u', \"co-stroke\") got Error: %s\n");
1177 
1178     bld.setExtension('U', "ca-islamic");
1179     Verify(bld, "zh-u-ca-islamic",
1180            "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1181 
1182     bld.setExtension('u', "ca-chinese");
1183     Verify(bld, "zh-u-ca-chinese",
1184            "setExtension('u', \"ca-chinese\") got Error: %s\n");
1185 
1186     bld.setExtension('U', "co-pinyin");
1187     Verify(bld, "zh-u-co-pinyin",
1188            "setExtension('U', \"co-pinyin\") got Error: %s\n");
1189 
1190     bld.setRegion("TW");
1191     Verify(bld, "zh-TW-u-co-pinyin",
1192            "setRegion(\"TW\") got Error: %s\n");
1193 
1194     bld.setExtension('U', "");
1195     Verify(bld, "zh-TW",
1196            "setExtension('U', \"\") got Error: %s\n");
1197 
1198     bld.setExtension('u', "abc-defg-kr-face");
1199     Verify(bld, "zh-TW-u-abc-defg-kr-face",
1200            "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1201 
1202     bld.setExtension('U', "ca-japanese");
1203     Verify(bld, "zh-TW-u-ca-japanese",
1204            "setExtension('U', \"ca-japanese\") got Error: %s\n");
1205 
1206 }
1207 
TestSetExtensionValidateUWellFormed()1208 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1209     static const char* wellFormedExtensions[] = {
1210         // keyword
1211         //   keyword = key (sep type)? ;
1212         //   key = alphanum alpha ;
1213         //   type = alphanum{3,8} (sep alphanum{3,8})* ;
1214         "3A",
1215         "ZA",
1216         "az-abc",
1217         "zz-123",
1218         "7z-12345678",
1219         "kb-A234567Z",
1220         // (sep keyword)+
1221         "1z-ZZ",
1222         "2z-ZZ-123",
1223         "3z-ZZ-123-cd",
1224         "0z-ZZ-123-cd-efghijkl",
1225         // attribute
1226         "abc",
1227         "456",
1228         "87654321",
1229         "ZABADFSD",
1230         // (sep attribute)+
1231         "abc-ZABADFSD",
1232         "123-ZABADFSD",
1233         "K2K-12345678",
1234         "K2K-12345678-zzz",
1235         // (sep attribute)+ (sep keyword)*
1236         "K2K-12345678-zz",
1237         "K2K-12345678-zz-0z",
1238         "K2K-12345678-9z-AZ-abc",
1239         "K2K-12345678-zz-9A-234",
1240         "K2K-12345678-zk0-abc-efg-zz-9k-234",
1241     };
1242     for (const char* extension : wellFormedExtensions) {
1243         UErrorCode status = U_ZERO_ERROR;
1244         LocaleBuilder bld;
1245         bld.setExtension('u', extension);
1246         Locale loc = bld.build(status);
1247         if (U_FAILURE(status)) {
1248             errln("setExtension('u', \"%s\") got Error: %s\n",
1249                   extension, u_errorName(status));
1250         }
1251     }
1252 }
1253 
TestSetExtensionValidateUIllFormed()1254 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1255     static const char* illFormed[] = {
1256         // bad key
1257         "-",
1258         "-ab",
1259         "ab-",
1260         "abc-",
1261         "-abc",
1262         "0",
1263         "a",
1264         "A0",
1265         "z9",
1266         "09",
1267         "90",
1268         // bad keyword
1269         "AB-A0",
1270         "AB-efg-A0",
1271         "xy-123456789",
1272         "AB-Aa-",
1273         "AB-Aac-",
1274         // bad attribute
1275         "abcdefghi",
1276         "abcdefgh-",
1277         "abcdefgh-abcdefghi",
1278         "abcdefgh-1",
1279         "abcdefgh-a",
1280         "abcdefgh-a2345678z",
1281     };
1282     for (const char* ill : illFormed) {
1283         UErrorCode status = U_ZERO_ERROR;
1284         LocaleBuilder bld;
1285         bld.setExtension('u', ill);
1286         Locale loc = bld.build(status);
1287         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1288             errln("setExtension('u', \"%s\") should fail but has no Error\n",
1289                   ill);
1290         }
1291     }
1292 }
1293 
TestSetExtensionT()1294 void LocaleBuilderTest::TestSetExtensionT() {
1295     LocaleBuilder bld;
1296     bld.setLanguage("fr");
1297     Verify(bld, "fr",
1298            "setLanguage(\"fr\") got Error: %s\n");
1299 
1300     bld.setExtension('T', "zh");
1301     Verify(bld, "fr-t-zh",
1302            "setExtension('T', \"zh\") got Error: %s\n");
1303 
1304     bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1305     Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1306            "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1307 
1308     bld.setExtension('T', "a9-123");
1309     Verify(bld, "fr-t-a9-123",
1310            "setExtension('T', \"a9-123\") got Error: %s\n");
1311 
1312     bld.setRegion("MX");
1313     Verify(bld, "fr-MX-t-a9-123",
1314            "setRegion(\"MX\") got Error: %s\n");
1315 
1316     bld.setScript("Hans");
1317     Verify(bld, "fr-Hans-MX-t-a9-123",
1318            "setScript(\"Hans\") got Error: %s\n");
1319 
1320     bld.setVariant("9abc-abcde");
1321     Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1322            "setVariant(\"9abc-abcde\") got Error: %s\n");
1323 
1324     bld.setExtension('T', "");
1325     Verify(bld, "fr-Hans-MX-9abc-abcde",
1326            "bld.setExtension('T', \"\") got Error: %s\n");
1327 }
1328 
TestSetExtensionValidateTWellFormed()1329 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1330     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1331     static const char* wellFormedExtensions[] = {
1332         // tlang
1333         //  tlang = unicode_language_subtag (sep unicode_script_subtag)?
1334         //          (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
1335         // unicode_language_subtag
1336         "en",
1337         "abc",
1338         "abcde",
1339         "ABCDEFGH",
1340         // unicode_language_subtag sep unicode_script_subtag
1341         "en-latn",
1342         "abc-arab",
1343         "ABCDEFGH-Thai",
1344         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1345         "en-latn-ME",
1346         "abc-arab-RU",
1347         "ABCDEFGH-Thai-TH",
1348         "en-latn-409",
1349         "abc-arab-123",
1350         "ABCDEFGH-Thai-456",
1351         // unicode_language_subtag sep unicode_region_subtag
1352         "en-ME",
1353         "abc-RU",
1354         "ABCDEFGH-TH",
1355         "en-409",
1356         "abc-123",
1357         "ABCDEFGH-456",
1358         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1359         // sep (sep unicode_variant_subtag)*
1360         "en-latn-ME-abcde",
1361         "abc-arab-RU-3abc-abcdef",
1362         "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1363         "en-latn-409-xafsa",
1364         "abc-arab-123-ADASDF",
1365         "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1366         // (sep tfield)+
1367         "A0-abcde",
1368         "z9-abcde123",
1369         "z9-abcde123-a1-abcde",
1370         // tlang (sep tfield)*
1371         "fr-A0-abcde",
1372         "fr-FR-A0-abcde",
1373         "fr-123-z9-abcde123-a1-abcde",
1374         "fr-Latn-FR-z9-abcde123-a1-abcde",
1375         "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1376         "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1377     };
1378     for (const char* extension : wellFormedExtensions) {
1379         UErrorCode status = U_ZERO_ERROR;
1380         LocaleBuilder bld;
1381         bld.setExtension('t', extension);
1382         Locale loc = bld.build(status);
1383         if (U_FAILURE(status)) {
1384             errln("setExtension('t', \"%s\") got Error: %s\n",
1385                   extension, u_errorName(status));
1386         }
1387     }
1388 }
1389 
TestSetExtensionValidateTIllFormed()1390 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1391     static const char* illFormed[] = {
1392         "a",
1393         "a-",
1394         "0",
1395         "9-",
1396         "-9",
1397         "-z",
1398         "Latn",
1399         "Latn-",
1400         "en-",
1401         "nob-",
1402         "-z9",
1403         "a3",
1404         "a3-",
1405         "3a",
1406         "0z-",
1407         "en-123-a1",
1408         "en-TH-a1",
1409         "gab-TH-a1",
1410         "gab-Thai-a1",
1411         "gab-Thai-TH-a1",
1412         "gab-Thai-TH-0bde-a1",
1413         "gab-Thai-TH-0bde-3b",
1414         "gab-Thai-TH-0bde-z9-a1",
1415         "gab-Thai-TH-0bde-z9-3b",
1416         "gab-Thai-TH-0bde-z9-abcde123-3b",
1417         "gab-Thai-TH-0bde-z9-abcde123-ab",
1418         "gab-Thai-TH-0bde-z9-abcde123-ab",
1419         "gab-Thai-TH-0bde-z9-abcde123-a1",
1420         "gab-Thai-TH-0bde-z9-abcde123-a1-",
1421         "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1422         "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1423         // ICU-21408
1424         "root",
1425     };
1426     for (const char* ill : illFormed) {
1427         UErrorCode status = U_ZERO_ERROR;
1428         LocaleBuilder bld;
1429         bld.setExtension('t', ill);
1430         Locale loc = bld.build(status);
1431         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1432             errln("setExtension('t', \"%s\") should fail but has no Error\n",
1433                   ill);
1434         }
1435     }
1436 }
1437 
TestSetExtensionPU()1438 void LocaleBuilderTest::TestSetExtensionPU() {
1439     LocaleBuilder bld;
1440     bld.setLanguage("ar");
1441     Verify(bld, "ar",
1442            "setLanguage(\"ar\") got Error: %s\n");
1443 
1444     bld.setExtension('X', "a-b-c-d-e");
1445     Verify(bld, "ar-x-a-b-c-d-e",
1446            "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1447 
1448     bld.setExtension('x', "0-1-2-3");
1449     Verify(bld, "ar-x-0-1-2-3",
1450            "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1451 
1452     bld.setExtension('X', "0-12345678-x-x");
1453     Verify(bld, "ar-x-0-12345678-x-x",
1454            "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1455 
1456     bld.setRegion("TH");
1457     Verify(bld, "ar-TH-x-0-12345678-x-x",
1458            "setRegion(\"TH\") got Error: %s\n");
1459 
1460     bld.setExtension('X', "");
1461     Verify(bld, "ar-TH",
1462            "setExtension(\"X\") got Error: %s\n");
1463 }
1464 
TestSetExtensionValidatePUWellFormed()1465 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1466     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1467     static const char* wellFormedExtensions[] = {
1468         "a",  // Short subtag
1469         "z",  // Short subtag
1470         "0",  // Short subtag, digit
1471         "9",  // Short subtag, digit
1472         "a-0",  // Two short subtag, alpha and digit
1473         "9-z",  // Two short subtag, digit and alpha
1474         "ab",
1475         "abc",
1476         "abcefghi",  // Long subtag
1477         "87654321",
1478         "01",
1479         "234",
1480         "0a-ab-87654321",  // Three subtags
1481         "87654321-ab-00-3A",  // Four subtabs
1482         "a-9-87654321",  // Three subtags with short and long subtags
1483         "87654321-ab-0-3A",
1484     };
1485     for (const char* extension : wellFormedExtensions) {
1486         UErrorCode status = U_ZERO_ERROR;
1487         LocaleBuilder bld;
1488         bld.setExtension('x', extension);
1489         Locale loc = bld.build(status);
1490         if (U_FAILURE(status)) {
1491             errln("setExtension('x', \"%s\") got Error: %s\n",
1492                   extension, u_errorName(status));
1493         }
1494     }
1495 }
1496 
TestSetExtensionValidatePUIllFormed()1497 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1498     static const char* illFormed[] = {
1499         "123456789",  // Too long
1500         "abcdefghi",  // Too long
1501         "ab-123456789",  // Second subtag too long
1502         "abcdefghi-12",  // First subtag too long
1503         "a-ab-987654321",  // Third subtag too long
1504         "987654321-a-0-3",  // First subtag too long
1505     };
1506     for (const char* ill : illFormed) {
1507         UErrorCode status = U_ZERO_ERROR;
1508         LocaleBuilder bld;
1509         bld.setExtension('x', ill);
1510         Locale loc = bld.build(status);
1511         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1512             errln("setExtension('x', \"%s\") should fail but has no Error\n",
1513                   ill);
1514         }
1515     }
1516 }
1517 
TestSetExtensionOthers()1518 void LocaleBuilderTest::TestSetExtensionOthers() {
1519     LocaleBuilder bld;
1520     bld.setLanguage("fr");
1521     Verify(bld, "fr",
1522            "setLanguage(\"fr\") got Error: %s\n");
1523 
1524     bld.setExtension('Z', "ab");
1525     Verify(bld, "fr-z-ab",
1526            "setExtension('Z', \"ab\") got Error: %s\n");
1527 
1528     bld.setExtension('0', "xyz12345-abcdefg");
1529     Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1530            "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1531 
1532     bld.setExtension('a', "01-12345678-ABcdef");
1533     Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1534            "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1535 
1536     bld.setRegion("TH");
1537     Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1538            "setRegion(\"TH\") got Error: %s\n");
1539 
1540     bld.setScript("Arab");
1541     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1542            "setRegion(\"Arab\") got Error: %s\n");
1543 
1544     bld.setExtension('A', "97");
1545     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1546            "setExtension('a', \"97\") got Error: %s\n");
1547 
1548     bld.setExtension('a', "");
1549     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1550            "setExtension('a', \"\") got Error: %s\n");
1551 
1552     bld.setExtension('0', "");
1553     Verify(bld, "fr-Arab-TH-z-ab",
1554            "setExtension('0', \"\") got Error: %s\n");
1555 }
1556 
TestSetExtensionValidateOthersWellFormed()1557 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1558     static const char* wellFormedExtensions[] = {
1559         "ab",
1560         "abc",
1561         "abcefghi",
1562         "01",
1563         "234",
1564         "87654321",
1565         "0a-ab-87654321",
1566         "87654321-ab-00-3A",
1567     };
1568 
1569     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1570     const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1571     int32_t i = 0;
1572     for (const char* extension : wellFormedExtensions) {
1573         char ch = aToZ[i];
1574         i = (i + 1) % aToZLen;
1575         UErrorCode status = U_ZERO_ERROR;
1576         LocaleBuilder bld;
1577         bld.setExtension(ch, extension);
1578         Locale loc = bld.build(status);
1579         if (U_FAILURE(status)) {
1580             errln("setExtension('%c', \"%s\") got Error: %s\n",
1581                   ch, extension, u_errorName(status));
1582         }
1583     }
1584 
1585     const char* someChars =
1586         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1587     const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1588     for (int32_t i = 0; i < someCharsLen; i++) {
1589         char ch = someChars[i];
1590         UErrorCode status = U_ZERO_ERROR;
1591         LocaleBuilder bld;
1592         bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1593         Locale loc = bld.build(status);
1594         if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1595             if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1596                 if (U_FAILURE(status)) {
1597                     errln("setExtension('%c', \"%s\") got Error: %s\n",
1598                           ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1599                 }
1600             }
1601         } else {
1602             if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1603                 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1604                       ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1605             }
1606         }
1607 
1608     }
1609 }
1610 
TestSetExtensionValidateOthersIllFormed()1611 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1612     static const char* illFormed[] = {
1613         "0",  // Too short
1614         "a",  // Too short
1615         "123456789",  // Too long
1616         "abcdefghi",  // Too long
1617         "ab-123456789",  // Second subtag too long
1618         "abcdefghi-12",  // First subtag too long
1619         "a-ab-87654321",  // Third subtag too long
1620         "87654321-a-0-3",  // First subtag too long
1621     };
1622     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1623     const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1624     int32_t i = 0;
1625     for (const char* ill : illFormed) {
1626         char ch = aToZ[i];
1627         i = (i + 1) % aToZLen;
1628         UErrorCode status = U_ZERO_ERROR;
1629         LocaleBuilder bld;
1630         bld.setExtension(ch, ill);
1631         Locale loc = bld.build(status);
1632         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1633             errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1634                   ch, ill);
1635         }
1636     }
1637 }
1638 
TestSetLocale()1639 void LocaleBuilderTest::TestSetLocale() {
1640     LocaleBuilder bld1, bld2;
1641     UErrorCode status = U_ZERO_ERROR;
1642     Locale l1 = bld1.setLanguage("en")
1643         .setScript("Latn")
1644         .setRegion("MX")
1645         .setVariant("3456-abcde")
1646         .addUnicodeLocaleAttribute("456")
1647         .addUnicodeLocaleAttribute("123")
1648         .setUnicodeLocaleKeyword("nu", "thai")
1649         .setUnicodeLocaleKeyword("co", "stroke")
1650         .setUnicodeLocaleKeyword("ca", "chinese")
1651         .build(status);
1652     if (U_FAILURE(status) || l1.isBogus()) {
1653         errln("build got Error: %s\n", u_errorName(status));
1654     }
1655     status = U_ZERO_ERROR;
1656     Locale l2 = bld1.setLocale(l1).build(status);
1657     if (U_FAILURE(status) || l2.isBogus()) {
1658         errln("build got Error: %s\n", u_errorName(status));
1659     }
1660 
1661     if (l1 != l2) {
1662         errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1663               l1.getName(), l2.getName());
1664     }
1665 }
1666 
TestPosixCases()1667 void LocaleBuilderTest::TestPosixCases() {
1668     UErrorCode status = U_ZERO_ERROR;
1669     Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1670     if (U_FAILURE(status) || l1.isBogus()) {
1671         errln("build got Error: %s\n", u_errorName(status));
1672     }
1673     LocaleBuilder bld;
1674     bld.setLanguage("en")
1675         .setRegion("MX")
1676         .setScript("Arab")
1677         .setUnicodeLocaleKeyword("nu", "Thai")
1678         .setExtension('x', "1");
1679     // All of above should be cleared by the setLocale call.
1680     Locale l2 = bld.setLocale(l1).build(status);
1681     if (U_FAILURE(status) || l2.isBogus()) {
1682         errln("build got Error: %s\n", u_errorName(status));
1683     }
1684     if (l1 != l2) {
1685         errln("The result locale should be the set as the setLocale %s but got %s\n",
1686               l1.toLanguageTag<std::string>(status).c_str(),
1687               l2.toLanguageTag<std::string>(status).c_str());
1688     }
1689     Locale posix("en-US-POSIX");
1690     if (posix != l2) {
1691         errln("The result locale should be the set as %s but got %s\n",
1692               posix.getName(), l2.getName());
1693     }
1694 }
1695