• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include <memory>
5 
6 #include "cmemory.h"
7 #include "cstring.h"
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
11 
LocaleBuilderTest()12 LocaleBuilderTest::LocaleBuilderTest()
13 {
14 }
15 
~LocaleBuilderTest()16 LocaleBuilderTest::~LocaleBuilderTest()
17 {
18 }
19 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)20 void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21 {
22     TESTCASE_AUTO_BEGIN;
23     TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24     TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25     TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26     TESTCASE_AUTO(TestLocaleBuilder);
27     TESTCASE_AUTO(TestLocaleBuilderBasic);
28     TESTCASE_AUTO(TestPosixCases);
29     TESTCASE_AUTO(TestSetExtensionOthers);
30     TESTCASE_AUTO(TestSetExtensionPU);
31     TESTCASE_AUTO(TestSetExtensionT);
32     TESTCASE_AUTO(TestSetExtensionU);
33     TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
34     TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
35     TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
36     TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
37     TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
38     TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
39     TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
40     TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
41     TESTCASE_AUTO(TestSetLanguageIllFormed);
42     TESTCASE_AUTO(TestSetLanguageWellFormed);
43     TESTCASE_AUTO(TestSetLocale);
44     TESTCASE_AUTO(TestSetRegionIllFormed);
45     TESTCASE_AUTO(TestSetRegionWellFormed);
46     TESTCASE_AUTO(TestSetScriptIllFormed);
47     TESTCASE_AUTO(TestSetScriptWellFormed);
48     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
49     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
50     TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
51     TESTCASE_AUTO(TestSetVariantIllFormed);
52     TESTCASE_AUTO(TestSetVariantWellFormed);
53     TESTCASE_AUTO_END;
54 }
55 
Verify(LocaleBuilder & bld,const char * expected,const char * msg)56 void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
57     UErrorCode status = U_ZERO_ERROR;
58     UErrorCode copyStatus = U_ZERO_ERROR;
59     UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
60     if (bld.copyErrorTo(copyStatus)) {
61         errln(msg, u_errorName(copyStatus));
62     }
63     if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
64         errln("Should always get the previous error and return FALSE");
65     }
66     Locale loc = bld.build(status);
67     if (U_FAILURE(status)) {
68         errln(msg, u_errorName(status));
69     }
70     if (status != copyStatus) {
71         errln(msg, u_errorName(status));
72     }
73     std::string tag = loc.toLanguageTag<std::string>(status);
74     if (U_FAILURE(status)) {
75         errln("loc.toLanguageTag() got Error: %s\n",
76               u_errorName(status));
77     }
78     if (tag != expected) {
79         errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
80     }
81 }
82 
TestLocaleBuilder()83 void LocaleBuilderTest::TestLocaleBuilder() {
84     // The following test data are copy from
85     // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
86     // "L": +1 = language
87     // "S": +1 = script
88     // "R": +1 = region
89     // "V": +1 = variant
90     // "K": +1 = Unicode locale key / +2 = Unicode locale type
91     // "A": +1 = Unicode locale attribute
92     // "E": +1 = extension letter / +2 = extension value
93     // "P": +1 = private use
94     // "U": +1 = ULocale
95     // "B": +1 = BCP47 language tag
96     // "C": Clear all
97     // "N": Clear extensions
98     // "D": +1 = Unicode locale attribute to be removed
99     // "X": indicates an exception must be thrown
100     // "T": +1 = expected language tag / +2 = expected locale string
101     const char* TESTCASES[][14] = {
102         {"L", "en", "R", "us", "T", "en-US", "en_US"},
103         {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
104         {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
105         {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
106         {"L", "123", "X"},
107         {"R", "us", "T", "und-US", "_US"},
108         {"R", "usa", "X"},
109         {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
110         {"R", "123", "L", "it", "R", "", "T", "it", "it"},
111         {"R", "123", "L", "en", "T", "en-123", "en_123"},
112         {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
113         {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
114         {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
115         {"S", "latin", "X"},
116         {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
117         {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
118         {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
119         {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
120         {"V", "123", "X"},
121         {"U", "en_US", "T", "en-US", "en_US"},
122         {"U", "en_US_WIN", "X"},
123         {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
124           "fr-FR-1606nict-u-ca-gregory-x-test",
125           "fr_FR_1606NICT@calendar=gregorian;x=test"},
126         {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
127         {"B", "und-CA", "T", "und-CA", "_CA"},
128         // Blocked by ICU-20327
129         // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
130         // "en_US_VAR@x=test"},
131         {"B", "en-US-VAR", "X"},
132         {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
133           "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
134         {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
135           "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
136         {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
137           "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
138           "ja_JP@attribute=attr1;calendar=gregorian"},
139         {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
140           "en@colnumeric=yes"},
141         {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
142           "th_TH@numbers=thai"},
143         {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
144         {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
145         {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
146         {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
147         {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
148         {"E", "a", "x", "X"},
149         {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
150         // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
151         // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
152         // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
153         // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
154         // key = alphanum alpha
155         {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
156          "en@0a=yes;attribute=aaa-bbb"},
157         {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
158           "fr_FR@x=yoshito-icu"},
159         {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
160           "ja_JP@calendar=japanese"},
161         {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
162           "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
163         {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
164         {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
165           "th@calendar=gregorian;numbers=thai"},
166         {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
167           "en_US@timezone=America/New_York"},
168         {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
169           "true", "T", "de-u-co-phonebk-kk-ks-level1",
170           "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
171         {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
172           "en_US@calendar=gregorian"},
173         {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
174         {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
175         {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
176           "en_US@colnumeric=yes"},
177         {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
178         {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
179         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
180           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
181         {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
182           "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
183         {"L", "en", "A", "aa", "X"},
184         {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
185     };
186     UErrorCode status = U_ZERO_ERROR;
187     LocaleBuilder bld;
188     for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
189         const char* (&testCase)[14] = TESTCASES[tidx];
190         std::string actions;
191         for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
192              if (testCase[p] == nullptr) {
193                  actions += " (nullptr)";
194                  break;
195              }
196              if (p > 0) actions += " ";
197              actions += testCase[p];
198         }
199         int i = 0;
200         const char* method;
201         status = U_ZERO_ERROR;
202         bld.clear();
203         while (true) {
204             status = U_ZERO_ERROR;
205             UErrorCode copyStatus = U_ZERO_ERROR;
206             method = testCase[i++];
207             if (strcmp("L", method) == 0) {
208                 bld.setLanguage(testCase[i++]);
209                 bld.copyErrorTo(copyStatus);
210                 bld.build(status);
211             } else if (strcmp("S", method) == 0) {
212                 bld.setScript(testCase[i++]);
213                 bld.copyErrorTo(copyStatus);
214                 bld.build(status);
215             } else if (strcmp("R", method) == 0) {
216                 bld.setRegion(testCase[i++]);
217                 bld.copyErrorTo(copyStatus);
218                 bld.build(status);
219             } else if (strcmp("V", method) == 0) {
220                 bld.setVariant(testCase[i++]);
221                 bld.copyErrorTo(copyStatus);
222                 bld.build(status);
223             } else if (strcmp("K", method) == 0) {
224                 const char* key = testCase[i++];
225                 const char* type = testCase[i++];
226                 bld.setUnicodeLocaleKeyword(key, type);
227                 bld.copyErrorTo(copyStatus);
228                 bld.build(status);
229             } else if (strcmp("A", method) == 0) {
230                 bld.addUnicodeLocaleAttribute(testCase[i++]);
231                 bld.copyErrorTo(copyStatus);
232                 bld.build(status);
233             } else if (strcmp("E", method) == 0) {
234                 const char* key = testCase[i++];
235                 const char* value = testCase[i++];
236                 bld.setExtension(key[0], value);
237                 bld.copyErrorTo(copyStatus);
238                 bld.build(status);
239             } else if (strcmp("P", method) == 0) {
240                 bld.setExtension('x', testCase[i++]);
241                 bld.copyErrorTo(copyStatus);
242                 bld.build(status);
243             } else if (strcmp("U", method) == 0) {
244                 bld.setLocale(Locale(testCase[i++]));
245                 bld.copyErrorTo(copyStatus);
246                 bld.build(status);
247             } else if (strcmp("B", method) == 0) {
248                 bld.setLanguageTag(testCase[i++]);
249                 bld.copyErrorTo(copyStatus);
250                 bld.build(status);
251             }
252             // clear / remove
253             else if (strcmp("C", method) == 0) {
254                 bld.clear();
255                 bld.copyErrorTo(copyStatus);
256                 bld.build(status);
257             } else if (strcmp("N", method) == 0) {
258                 bld.clearExtensions();
259                 bld.copyErrorTo(copyStatus);
260                 bld.build(status);
261             } else if (strcmp("D", method) == 0) {
262                 bld.removeUnicodeLocaleAttribute(testCase[i++]);
263                 bld.copyErrorTo(copyStatus);
264                 bld.build(status);
265             }
266             // result
267             else if (strcmp("X", method) == 0) {
268                 if (U_SUCCESS(status)) {
269                     errln("FAIL: No error return - test case: %s", actions.c_str());
270                 }
271             } else if (strcmp("T", method) == 0) {
272                 status = U_ZERO_ERROR;
273                 Locale loc = bld.build(status);
274                 if (status != copyStatus) {
275                     errln("copyErrorTo not matching");
276                 }
277                 if (U_FAILURE(status) ||
278                     strcmp(loc.getName(), testCase[i + 1]) != 0) {
279                     errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
280                             " for test case: ", actions.c_str());
281                 }
282                 std::string langtag = loc.toLanguageTag<std::string>(status);
283                 if (U_FAILURE(status) || langtag != testCase[i]) {
284                     errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
285                             " for test case: ", actions.c_str());
286                 }
287                 break;
288             } else {
289                 // Unknow test method
290                 errln("Unknown test case method: There is an error in the test case data.");
291                 break;
292             }
293             if (status != copyStatus) {
294                 errln("copyErrorTo not matching");
295             }
296             if (U_FAILURE(status)) {
297                 if (strcmp("X", testCase[i]) == 0) {
298                     // This failure is expected
299                     break;
300                 } else {
301                     errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
302                           " in test case: ", actions.c_str());
303                     break;
304                 }
305             }
306             if (strcmp("T", method) == 0) {
307                 break;
308             }
309         }  // while(true)
310     }  // for TESTCASES
311 }
312 
TestLocaleBuilderBasic()313 void LocaleBuilderTest::TestLocaleBuilderBasic() {
314     LocaleBuilder bld;
315     bld.setLanguage("zh");
316     Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
317 
318     bld.setScript("Hant");
319     Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
320 
321     bld.setRegion("SG");
322     Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
323 
324     bld.setRegion("HK");
325     bld.setScript("Hans");
326     Verify(bld, "zh-Hans-HK",
327            "setRegion('HK') and setScript('Hans') got Error: %s\n");
328 
329     bld.setVariant("revised");
330     Verify(bld, "zh-Hans-HK-revised",
331            "setVariant('revised') got Error: %s\n");
332 
333     bld.setUnicodeLocaleKeyword("nu", "thai");
334     Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
335            "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
336 
337     bld.setUnicodeLocaleKeyword("co", "pinyin");
338     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
339            "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
340 
341     bld.setUnicodeLocaleKeyword("nu", "latn");
342     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
343            "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
344 
345     bld.setUnicodeLocaleKeyword("nu", nullptr);
346     Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
347            "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
348 
349     bld.setUnicodeLocaleKeyword("co", nullptr);
350     Verify(bld, "zh-Hans-HK-revised",
351            "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
352 
353     bld.setScript("");
354     Verify(bld, "zh-HK-revised",
355            "setScript('') got Error: %s\n");
356 
357     bld.setVariant("");
358     Verify(bld, "zh-HK",
359            "setVariant('') got Error: %s\n");
360 
361     bld.setRegion("");
362     Verify(bld, "zh",
363            "setRegion('') got Error: %s\n");
364 }
365 
TestSetLanguageWellFormed()366 void LocaleBuilderTest::TestSetLanguageWellFormed() {
367     // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
368     // unicode_language_subtag = alpha{2,3} | alpha{5,8};
369     // ICUTC decided also support alpha{4}
370     static const char* wellFormedLanguages[] = {
371         "",
372 
373         // alpha{2}
374         "en",
375         "NE",
376         "eN",
377         "Ne",
378 
379         // alpha{3}
380         "aNe",
381         "zzz",
382         "AAA",
383 
384         // alpha{4}
385         "ABCD",
386         "abcd",
387 
388         // alpha{5}
389         "efgij",
390         "AbCAD",
391         "ZAASD",
392 
393         // alpha{6}
394         "efgijk",
395         "AADGFE",
396         "AkDfFz",
397 
398         // alpha{7}
399         "asdfads",
400         "ADSFADF",
401         "piSFkDk",
402 
403         // alpha{8}
404         "oieradfz",
405         "IADSFJKR",
406         "kkDSFJkR",
407     };
408     for (const char* lang : wellFormedLanguages) {
409         UErrorCode status = U_ZERO_ERROR;
410         LocaleBuilder bld;
411         bld.setLanguage(lang);
412         Locale loc = bld.build(status);
413         if (U_FAILURE(status)) {
414             errln("setLanguage(\"%s\") got Error: %s\n",
415                   lang, u_errorName(status));
416         }
417     }
418 }
419 
TestSetLanguageIllFormed()420 void LocaleBuilderTest::TestSetLanguageIllFormed() {
421     static const char* illFormed[] = {
422         "a",
423         "z",
424         "A",
425         "F",
426         "2",
427         "0",
428         "9"
429         "{",
430         ".",
431         "[",
432         "]",
433         "\\",
434 
435         "e1",
436         "N2",
437         "3N",
438         "4e",
439         "e:",
440         "43",
441         "a9",
442 
443         "aN0",
444         "z1z",
445         "2zz",
446         "3A3",
447         "456",
448         "af)",
449 
450         // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
451         // "latn",
452         // "Arab",
453         // "LATN",
454 
455         "e)gij",
456         "Ab3AD",
457         "ZAAS8",
458 
459         "efgi[]",
460         "AA9GFE",
461         "7kD3Fz",
462         "as8fads",
463         "0DSFADF",
464         "'iSFkDk",
465 
466         "oieradf+",
467         "IADSFJK-",
468         "kkDSFJk0",
469 
470         // alpha{9}
471         "oieradfab",
472         "IADSFJKDE",
473         "kkDSFJkzf",
474     };
475     for (const char* ill : illFormed) {
476         UErrorCode status = U_ZERO_ERROR;
477         LocaleBuilder bld;
478         bld.setLanguage(ill);
479         Locale loc = bld.build(status);
480         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
481             errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
482         }
483     }
484 }
485 
TestSetScriptWellFormed()486 void LocaleBuilderTest::TestSetScriptWellFormed() {
487     // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
488     // unicode_script_subtag = alpha{4} ;
489     static const char* wellFormedScripts[] = {
490         "",
491 
492         "Latn",
493         "latn",
494         "lATN",
495         "laTN",
496         "arBN",
497         "ARbn",
498         "adsf",
499         "aADF",
500         "BSVS",
501         "LATn",
502     };
503     for (const char* script : wellFormedScripts) {
504         UErrorCode status = U_ZERO_ERROR;
505         LocaleBuilder bld;
506         bld.setScript(script);
507         Locale loc = bld.build(status);
508         if (U_FAILURE(status)) {
509             errln("setScript(\"%s\") got Error: %s\n",
510                   script, u_errorName(status));
511         }
512     }
513 }
514 
TestSetScriptIllFormed()515 void LocaleBuilderTest::TestSetScriptIllFormed() {
516     static const char* illFormed[] = {
517         "a",
518         "z",
519         "A",
520         "F",
521         "2",
522         "0",
523         "9"
524         "{",
525         ".",
526         "[",
527         "]",
528         "\\",
529 
530         "e1",
531         "N2",
532         "3N",
533         "4e",
534         "e:",
535         "43",
536         "a9",
537 
538         "aN0",
539         "z1z",
540         "2zz",
541         "3A3",
542         "456",
543         "af)",
544 
545         "0atn",
546         "l1tn",
547         "lA2N",
548         "la4N",
549         "arB5",
550         "1234",
551 
552         "e)gij",
553         "Ab3AD",
554         "ZAAS8",
555 
556         "efgi[]",
557         "AA9GFE",
558         "7kD3Fz",
559 
560         "as8fads",
561         "0DSFADF",
562         "'iSFkDk",
563 
564         "oieradf+",
565         "IADSFJK-",
566         "kkDSFJk0",
567 
568         // alpha{9}
569         "oieradfab",
570         "IADSFJKDE",
571         "kkDSFJkzf",
572     };
573     for (const char* ill : illFormed) {
574         UErrorCode status = U_ZERO_ERROR;
575         LocaleBuilder bld;
576         bld.setScript(ill);
577         Locale loc = bld.build(status);
578         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
579             errln("setScript(\"%s\") should fail but has no Error\n", ill);
580         }
581     }
582 }
583 
TestSetRegionWellFormed()584 void LocaleBuilderTest::TestSetRegionWellFormed() {
585     // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
586     // unicode_region_subtag = (alpha{2} | digit{3})
587     static const char* wellFormedRegions[] = {
588         "",
589 
590         // alpha{2}
591         "en",
592         "NE",
593         "eN",
594         "Ne",
595 
596         // digit{3}
597         "000",
598         "999",
599         "123",
600         "987"
601     };
602     for (const char* region : wellFormedRegions) {
603         UErrorCode status = U_ZERO_ERROR;
604         LocaleBuilder bld;
605         bld.setRegion(region);
606         Locale loc = bld.build(status);
607         if (U_FAILURE(status)) {
608             errln("setRegion(\"%s\") got Error: %s\n",
609                   region, u_errorName(status));
610         }
611     }
612 }
613 
TestSetRegionIllFormed()614 void LocaleBuilderTest::TestSetRegionIllFormed() {
615     static const char* illFormed[] = {
616         "a",
617         "z",
618         "A",
619         "F",
620         "2",
621         "0",
622         "9"
623         "{",
624         ".",
625         "[",
626         "]",
627         "\\",
628 
629         "e1",
630         "N2",
631         "3N",
632         "4e",
633         "e:",
634         "43",
635         "a9",
636 
637         "aN0",
638         "z1z",
639         "2zz",
640         "3A3",
641         "4.6",
642         "af)",
643 
644         "0atn",
645         "l1tn",
646         "lA2N",
647         "la4N",
648         "arB5",
649         "1234",
650 
651         "e)gij",
652         "Ab3AD",
653         "ZAAS8",
654 
655         "efgi[]",
656         "AA9GFE",
657         "7kD3Fz",
658 
659         "as8fads",
660         "0DSFADF",
661         "'iSFkDk",
662 
663         "oieradf+",
664         "IADSFJK-",
665         "kkDSFJk0",
666 
667         // alpha{9}
668         "oieradfab",
669         "IADSFJKDE",
670         "kkDSFJkzf",
671     };
672     for (const char* ill : illFormed) {
673         UErrorCode status = U_ZERO_ERROR;
674         LocaleBuilder bld;
675         bld.setRegion(ill);
676         Locale loc = bld.build(status);
677         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
678             errln("setRegion(\"%s\") should fail but has no Error\n", ill);
679         }
680     }
681 }
682 
TestSetVariantWellFormed()683 void LocaleBuilderTest::TestSetVariantWellFormed() {
684     // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
685     // (sep unicode_variant_subtag)*
686     // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
687     static const char* wellFormedVariants[] = {
688         "",
689 
690         // alphanum{5}
691         "efgij",
692         "AbCAD",
693         "ZAASD",
694         "0AASD",
695         "A1CAD",
696         "ef2ij",
697         "ads3X",
698         "owqF4",
699 
700         // alphanum{6}
701         "efgijk",
702         "AADGFE",
703         "AkDfFz",
704         "0ADGFE",
705         "A9DfFz",
706         "AADG7E",
707 
708         // alphanum{7}
709         "asdfads",
710         "ADSFADF",
711         "piSFkDk",
712         "a0dfads",
713         "ADSF3DF",
714         "piSFkD9",
715 
716         // alphanum{8}
717         "oieradfz",
718         "IADSFJKR",
719         "kkDSFJkR",
720         "0ADSFJKR",
721         "12345679",
722 
723         // digit alphanum{3}
724         "0123",
725         "1abc",
726         "20EF",
727         "30EF",
728         "8A03",
729         "3Ax3",
730         "9Axy",
731 
732         // (sep unicode_variant_subtag)*
733         "0123-4567",
734         "0ab3-ABCDE",
735         "9ax3-xByD9",
736         "9ax3-xByD9-adfk934a",
737 
738         "0123_4567",
739         "0ab3_ABCDE",
740         "9ax3_xByD9",
741         "9ax3_xByD9_adfk934a",
742 
743         "9ax3-xByD9_adfk934a",
744         "9ax3_xByD9-adfk934a",
745     };
746     for (const char* variant : wellFormedVariants) {
747         UErrorCode status = U_ZERO_ERROR;
748         LocaleBuilder bld;
749         bld.setVariant(variant);
750         Locale loc = bld.build(status);
751         if (U_FAILURE(status)) {
752             errln("setVariant(\"%s\") got Error: %s\n",
753                   variant, u_errorName(status));
754         }
755     }
756 }
757 
TestSetVariantIllFormed()758 void LocaleBuilderTest::TestSetVariantIllFormed() {
759     static const char* illFormed[] = {
760         "a",
761         "z",
762         "A",
763         "F",
764         "2",
765         "0",
766         "9"
767         "{",
768         ".",
769         "[",
770         "]",
771         "\\",
772 
773         "e1",
774         "N2",
775         "3N",
776         "4e",
777         "e:",
778         "43",
779         "a9",
780         "en",
781         "NE",
782         "eN",
783         "Ne",
784 
785         "aNe",
786         "zzz",
787         "AAA",
788         "aN0",
789         "z1z",
790         "2zz",
791         "3A3",
792         "4.6",
793         "af)",
794         "345",
795         "923",
796 
797         "Latn",
798         "latn",
799         "lATN",
800         "laTN",
801         "arBN",
802         "ARbn",
803         "adsf",
804         "aADF",
805         "BSVS",
806         "LATn",
807         "l1tn",
808         "lA2N",
809         "la4N",
810         "arB5",
811         "abc3",
812         "A3BC",
813 
814         "e)gij",
815         "A+3AD",
816         "ZAA=8",
817 
818         "efgi[]",
819         "AA9]FE",
820         "7k[3Fz",
821 
822         "as8f/ds",
823         "0DSFAD{",
824         "'iSFkDk",
825 
826         "oieradf+",
827         "IADSFJK-",
828         "k}DSFJk0",
829 
830         // alpha{9}
831         "oieradfab",
832         "IADSFJKDE",
833         "kkDSFJkzf",
834         "123456789",
835 
836         "-0123",
837         "-0123-4567",
838         "0123-4567-",
839         "-123-4567",
840         "_0123",
841         "_0123_4567",
842         "0123_4567_",
843         "_123_4567",
844 
845         "-abcde-figjk",
846         "abcde-figjk-",
847         "-abcde-figjk-",
848         "_abcde_figjk",
849         "abcde_figjk_",
850         "_abcde_figjk_",
851     };
852     for (const char* ill : illFormed) {
853         UErrorCode status = U_ZERO_ERROR;
854         LocaleBuilder bld;
855         bld.setVariant(ill);
856         Locale loc = bld.build(status);
857         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
858             errln("setVariant(\"%s\") should fail but has no Error\n", ill);
859         }
860     }
861 }
862 
TestSetUnicodeLocaleKeywordWellFormed()863 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
864     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
865     // keyword = key (sep type)? ;
866     // key = alphanum alpha ;
867     // type = alphanum{3,8} (sep alphanum{3,8})* ;
868     static const char* wellFormed_key_value[] = {
869         "aa", "123",
870         "3b", "zyzbcdef",
871         "0Z", "1ZB30zk9-abc",
872         "cZ", "2ck30zfZ-adsf023-234kcZ",
873         "ZZ", "Lant",
874         "ko", "",
875     };
876     for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
877         UErrorCode status = U_ZERO_ERROR;
878         LocaleBuilder bld;
879         bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
880                                     wellFormed_key_value[i + 1]);
881         Locale loc = bld.build(status);
882         if (U_FAILURE(status)) {
883             errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
884                   wellFormed_key_value[i],
885                   wellFormed_key_value[i + 1],
886                   u_errorName(status));
887         }
888     }
889 }
890 
TestSetUnicodeLocaleKeywordIllFormedKey()891 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
892     static const char* illFormed[] = {
893         "34",
894         "ab-cde",
895         "123",
896         "b3",
897         "zyzabcdef",
898         "Z0",
899     };
900     for (const char* ill : illFormed) {
901         UErrorCode status = U_ZERO_ERROR;
902         LocaleBuilder bld;
903         bld.setUnicodeLocaleKeyword(ill, "abc");
904         Locale loc = bld.build(status);
905         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
906             errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
907                   ill);
908         }
909     }
910 }
911 
TestSetUnicodeLocaleKeywordIllFormedValue()912 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
913     static const char* illFormed[] = {
914         "34",
915         "ab-",
916         "-cd",
917         "-ef-",
918         "zyzabcdef",
919         "ab-abc",
920         "1ZB30zfk9-abc",
921         "2ck30zfk9-adsf023-234kcZ",
922     };
923     for (const char* ill : illFormed) {
924         UErrorCode status = U_ZERO_ERROR;
925         LocaleBuilder bld;
926         bld.setUnicodeLocaleKeyword("ab", ill);
927         Locale loc = bld.build(status);
928         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
929             errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
930                   ill);
931         }
932     }
933 }
934 
TestAddRemoveUnicodeLocaleAttribute()935 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
936     LocaleBuilder bld;
937     UErrorCode status = U_ZERO_ERROR;
938     Locale loc = bld.setLanguage("fr")
939                     .addUnicodeLocaleAttribute("abc")
940                     .addUnicodeLocaleAttribute("aBc")
941                     .addUnicodeLocaleAttribute("EFG")
942                     .addUnicodeLocaleAttribute("efghi")
943                     .addUnicodeLocaleAttribute("efgh")
944                     .addUnicodeLocaleAttribute("efGhi")
945                     .addUnicodeLocaleAttribute("EFg")
946                     .addUnicodeLocaleAttribute("hijk")
947                     .addUnicodeLocaleAttribute("EFG")
948                     .addUnicodeLocaleAttribute("HiJK")
949                     .addUnicodeLocaleAttribute("aBc")
950                     .build(status);
951     if (U_FAILURE(status)) {
952         errln("addUnicodeLocaleAttribute() got Error: %s\n",
953               u_errorName(status));
954     }
955     std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
956     std::string actual = loc.toLanguageTag<std::string>(status);
957     if (U_FAILURE(status) || expected != actual) {
958         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
959     }
960 
961     // remove "efgh" in the middle with different casing.
962     loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
963     if (U_FAILURE(status)) {
964         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
965               u_errorName(status));
966     }
967     expected = "fr-u-abc-efg-efghi-hijk";
968     actual = loc.toLanguageTag<std::string>(status);
969     if (U_FAILURE(status) || expected != actual) {
970         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
971     }
972 
973     // remove non-existing attributes.
974     loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
975     if (U_FAILURE(status)) {
976         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
977               u_errorName(status));
978     }
979     actual = loc.toLanguageTag<std::string>(status);
980     if (U_FAILURE(status) || expected != actual) {
981         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
982     }
983 
984     // remove "abc" in the beginning with different casing.
985     loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
986     if (U_FAILURE(status)) {
987         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
988               u_errorName(status));
989     }
990     expected = "fr-u-efg-efghi-hijk";
991     actual = loc.toLanguageTag<std::string>(status);
992     if (U_FAILURE(status) || expected != actual) {
993         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
994     }
995 
996     // remove non-existing substring in the end.
997     loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
998     if (U_FAILURE(status)) {
999         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1000               u_errorName(status));
1001     }
1002     actual = loc.toLanguageTag<std::string>(status);
1003     if (U_FAILURE(status) || expected != actual) {
1004         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1005     }
1006 
1007     // remove "hijk" in the end with different casing.
1008     loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
1009     if (U_FAILURE(status)) {
1010         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1011               u_errorName(status));
1012     }
1013     expected = "fr-u-efg-efghi";
1014     actual = loc.toLanguageTag<std::string>(status);
1015     if (U_FAILURE(status) || expected != actual) {
1016         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1017     }
1018 
1019     // remove "efghi" in the end with different casing.
1020     loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
1021     if (U_FAILURE(status)) {
1022         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1023               u_errorName(status));
1024     }
1025     expected = "fr-u-efg";
1026     actual = loc.toLanguageTag<std::string>(status);
1027     if (U_FAILURE(status) || expected != actual) {
1028         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1029     }
1030 
1031     // remove "efg" in as the only one, with different casing.
1032     loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
1033     if (U_FAILURE(status)) {
1034         errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1035               u_errorName(status));
1036     }
1037     expected = "fr";
1038     actual = loc.toLanguageTag<std::string>(status);
1039     if (U_FAILURE(status) || expected != actual) {
1040         errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1041     }
1042 
1043 }
1044 
TestAddRemoveUnicodeLocaleAttributeWellFormed()1045 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1046     // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1047     // attribute = alphanum{3,8} ;
1048     static const char* wellFormedAttributes[] = {
1049         // alphanum{3}
1050         "AbC",
1051         "ZAA",
1052         "0AA",
1053         "x3A",
1054         "xa8",
1055 
1056         // alphanum{4}
1057         "AbCA",
1058         "ZASD",
1059         "0ASD",
1060         "A3a4",
1061         "zK90",
1062 
1063         // alphanum{5}
1064         "efgij",
1065         "AbCAD",
1066         "ZAASD",
1067         "0AASD",
1068         "A1CAD",
1069         "ef2ij",
1070         "ads3X",
1071         "owqF4",
1072 
1073         // alphanum{6}
1074         "efgijk",
1075         "AADGFE",
1076         "AkDfFz",
1077         "0ADGFE",
1078         "A9DfFz",
1079         "AADG7E",
1080 
1081         // alphanum{7}
1082         "asdfads",
1083         "ADSFADF",
1084         "piSFkDk",
1085         "a0dfads",
1086         "ADSF3DF",
1087         "piSFkD9",
1088 
1089         // alphanum{8}
1090         "oieradfz",
1091         "IADSFJKR",
1092         "kkDSFJkR",
1093     };
1094     LocaleBuilder bld;
1095     for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1096         if (i % 5 == 0) {
1097             bld.clear();
1098         }
1099         UErrorCode status = U_ZERO_ERROR;
1100         bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1101         Locale loc = bld.build(status);
1102         if (U_FAILURE(status)) {
1103             errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1104                   wellFormedAttributes[i], u_errorName(status));
1105         }
1106         if (i > 2) {
1107             bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1108             loc = bld.build(status);
1109             if (U_FAILURE(status)) {
1110                 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1111                       wellFormedAttributes[i - 1], u_errorName(status));
1112             }
1113             bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1114             loc = bld.build(status);
1115             if (U_FAILURE(status)) {
1116                 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1117                       wellFormedAttributes[i - 3], u_errorName(status));
1118             }
1119         }
1120     }
1121 }
1122 
TestAddUnicodeLocaleAttributeIllFormed()1123 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1124     static const char* illFormed[] = {
1125         "aa",
1126         "34",
1127         "ab-",
1128         "-cd",
1129         "-ef-",
1130         "zyzabcdef",
1131         "123456789",
1132         "ab-abc",
1133         "1ZB30zfk9-abc",
1134         "2ck30zfk9-adsf023-234kcZ",
1135     };
1136     for (const char* ill : illFormed) {
1137         UErrorCode status = U_ZERO_ERROR;
1138         LocaleBuilder bld;
1139         bld.addUnicodeLocaleAttribute(ill);
1140         Locale loc = bld.build(status);
1141         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1142             errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1143                   ill);
1144         }
1145     }
1146 }
1147 
TestSetExtensionU()1148 void LocaleBuilderTest::TestSetExtensionU() {
1149     LocaleBuilder bld;
1150     bld.setLanguage("zh");
1151     Verify(bld, "zh",
1152            "setLanguage(\"zh\") got Error: %s\n");
1153 
1154     bld.setExtension('u', "co-stroke");
1155     Verify(bld, "zh-u-co-stroke",
1156            "setExtension('u', \"co-stroke\") got Error: %s\n");
1157 
1158     bld.setExtension('U', "ca-islamic");
1159     Verify(bld, "zh-u-ca-islamic",
1160            "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1161 
1162     bld.setExtension('u', "ca-chinese");
1163     Verify(bld, "zh-u-ca-chinese",
1164            "setExtension('u', \"ca-chinese\") got Error: %s\n");
1165 
1166     bld.setExtension('U', "co-pinyin");
1167     Verify(bld, "zh-u-co-pinyin",
1168            "setExtension('U', \"co-pinyin\") got Error: %s\n");
1169 
1170     bld.setRegion("TW");
1171     Verify(bld, "zh-TW-u-co-pinyin",
1172            "setRegion(\"TW\") got Error: %s\n");
1173 
1174     bld.setExtension('U', "");
1175     Verify(bld, "zh-TW",
1176            "setExtension('U', \"\") got Error: %s\n");
1177 
1178     bld.setExtension('u', "abc-defg-kr-face");
1179     Verify(bld, "zh-TW-u-abc-defg-kr-face",
1180            "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1181 
1182     bld.setExtension('U', "ca-japanese");
1183     Verify(bld, "zh-TW-u-ca-japanese",
1184            "setExtension('U', \"ca-japanese\") got Error: %s\n");
1185 
1186 }
1187 
TestSetExtensionValidateUWellFormed()1188 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1189     static const char* wellFormedExtensions[] = {
1190         // keyword
1191         //   keyword = key (sep type)? ;
1192         //   key = alphanum alpha ;
1193         //   type = alphanum{3,8} (sep alphanum{3,8})* ;
1194         "3A",
1195         "ZA",
1196         "az-abc",
1197         "zz-123",
1198         "7z-12345678",
1199         "kb-A234567Z",
1200         // (sep keyword)+
1201         "1z-ZZ",
1202         "2z-ZZ-123",
1203         "3z-ZZ-123-cd",
1204         "0z-ZZ-123-cd-efghijkl",
1205         // attribute
1206         "abc",
1207         "456",
1208         "87654321",
1209         "ZABADFSD",
1210         // (sep attribute)+
1211         "abc-ZABADFSD",
1212         "123-ZABADFSD",
1213         "K2K-12345678",
1214         "K2K-12345678-zzz",
1215         // (sep attribute)+ (sep keyword)*
1216         "K2K-12345678-zz",
1217         "K2K-12345678-zz-0z",
1218         "K2K-12345678-9z-AZ-abc",
1219         "K2K-12345678-zz-9A-234",
1220         "K2K-12345678-zk0-abc-efg-zz-9k-234",
1221     };
1222     for (const char* extension : wellFormedExtensions) {
1223         UErrorCode status = U_ZERO_ERROR;
1224         LocaleBuilder bld;
1225         bld.setExtension('u', extension);
1226         Locale loc = bld.build(status);
1227         if (U_FAILURE(status)) {
1228             errln("setExtension('u', \"%s\") got Error: %s\n",
1229                   extension, u_errorName(status));
1230         }
1231     }
1232 }
1233 
TestSetExtensionValidateUIllFormed()1234 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1235     static const char* illFormed[] = {
1236         // bad key
1237         "-",
1238         "-ab",
1239         "ab-",
1240         "abc-",
1241         "-abc",
1242         "0",
1243         "a",
1244         "A0",
1245         "z9",
1246         "09",
1247         "90",
1248         // bad keyword
1249         "AB-A0",
1250         "AB-efg-A0",
1251         "xy-123456789",
1252         "AB-Aa-",
1253         "AB-Aac-",
1254         // bad attribute
1255         "abcdefghi",
1256         "abcdefgh-",
1257         "abcdefgh-abcdefghi",
1258         "abcdefgh-1",
1259         "abcdefgh-a",
1260         "abcdefgh-a2345678z",
1261     };
1262     for (const char* ill : illFormed) {
1263         UErrorCode status = U_ZERO_ERROR;
1264         LocaleBuilder bld;
1265         bld.setExtension('u', ill);
1266         Locale loc = bld.build(status);
1267         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1268             errln("setExtension('u', \"%s\") should fail but has no Error\n",
1269                   ill);
1270         }
1271     }
1272 }
1273 
TestSetExtensionT()1274 void LocaleBuilderTest::TestSetExtensionT() {
1275     LocaleBuilder bld;
1276     bld.setLanguage("fr");
1277     Verify(bld, "fr",
1278            "setLanguage(\"fr\") got Error: %s\n");
1279 
1280     bld.setExtension('T', "zh");
1281     Verify(bld, "fr-t-zh",
1282            "setExtension('T', \"zh\") got Error: %s\n");
1283 
1284     bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1285     Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1286            "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1287 
1288     bld.setExtension('T', "a9-123");
1289     Verify(bld, "fr-t-a9-123",
1290            "setExtension('T', \"a9-123\") got Error: %s\n");
1291 
1292     bld.setRegion("MX");
1293     Verify(bld, "fr-MX-t-a9-123",
1294            "setRegion(\"MX\") got Error: %s\n");
1295 
1296     bld.setScript("Hans");
1297     Verify(bld, "fr-Hans-MX-t-a9-123",
1298            "setScript(\"Hans\") got Error: %s\n");
1299 
1300     bld.setVariant("9abc-abcde");
1301     Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1302            "setVariant(\"9abc-abcde\") got Error: %s\n");
1303 
1304     bld.setExtension('T', "");
1305     Verify(bld, "fr-Hans-MX-9abc-abcde",
1306            "bld.setExtension('T', \"\") got Error: %s\n");
1307 }
1308 
TestSetExtensionValidateTWellFormed()1309 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1310     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1311     static const char* wellFormedExtensions[] = {
1312         // tlang
1313         //  tlang = unicode_language_subtag (sep unicode_script_subtag)?
1314         //          (sep unicode_region_subtag)?  (sep unicode_variant_subtag)* ;
1315         // unicode_language_subtag
1316         "en",
1317         "abc",
1318         "abcde",
1319         "ABCDEFGH",
1320         // unicode_language_subtag sep unicode_script_subtag
1321         "en-latn",
1322         "abc-arab",
1323         "ABCDEFGH-Thai",
1324         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1325         "en-latn-ME",
1326         "abc-arab-RU",
1327         "ABCDEFGH-Thai-TH",
1328         "en-latn-409",
1329         "abc-arab-123",
1330         "ABCDEFGH-Thai-456",
1331         // unicode_language_subtag sep unicode_region_subtag
1332         "en-ME",
1333         "abc-RU",
1334         "ABCDEFGH-TH",
1335         "en-409",
1336         "abc-123",
1337         "ABCDEFGH-456",
1338         // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1339         // sep (sep unicode_variant_subtag)*
1340         "en-latn-ME-abcde",
1341         "abc-arab-RU-3abc-abcdef",
1342         "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1343         "en-latn-409-xafsa",
1344         "abc-arab-123-ADASDF",
1345         "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1346         // (sep tfield)+
1347         "A0-abcde",
1348         "z9-abcde123",
1349         "z9-abcde123-a1-abcde",
1350         // tlang (sep tfield)*
1351         "fr-A0-abcde",
1352         "fr-FR-A0-abcde",
1353         "fr-123-z9-abcde123-a1-abcde",
1354         "fr-Latn-FR-z9-abcde123-a1-abcde",
1355         "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1356         "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1357     };
1358     for (const char* extension : wellFormedExtensions) {
1359         UErrorCode status = U_ZERO_ERROR;
1360         LocaleBuilder bld;
1361         bld.setExtension('t', extension);
1362         Locale loc = bld.build(status);
1363         if (U_FAILURE(status)) {
1364             errln("setExtension('t', \"%s\") got Error: %s\n",
1365                   extension, u_errorName(status));
1366         }
1367     }
1368 }
1369 
TestSetExtensionValidateTIllFormed()1370 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1371     static const char* illFormed[] = {
1372         "a",
1373         "a-",
1374         "0",
1375         "9-",
1376         "-9",
1377         "-z",
1378         "Latn",
1379         "Latn-",
1380         "en-",
1381         "nob-",
1382         "-z9",
1383         "a3",
1384         "a3-",
1385         "3a",
1386         "0z-",
1387         "en-123-a1",
1388         "en-TH-a1",
1389         "gab-TH-a1",
1390         "gab-Thai-a1",
1391         "gab-Thai-TH-a1",
1392         "gab-Thai-TH-0bde-a1",
1393         "gab-Thai-TH-0bde-3b",
1394         "gab-Thai-TH-0bde-z9-a1",
1395         "gab-Thai-TH-0bde-z9-3b",
1396         "gab-Thai-TH-0bde-z9-abcde123-3b",
1397         "gab-Thai-TH-0bde-z9-abcde123-ab",
1398         "gab-Thai-TH-0bde-z9-abcde123-ab",
1399         "gab-Thai-TH-0bde-z9-abcde123-a1",
1400         "gab-Thai-TH-0bde-z9-abcde123-a1-",
1401         "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1402         "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1403         // ICU-21408
1404         "root",
1405     };
1406     for (const char* ill : illFormed) {
1407         UErrorCode status = U_ZERO_ERROR;
1408         LocaleBuilder bld;
1409         bld.setExtension('t', ill);
1410         Locale loc = bld.build(status);
1411         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1412             errln("setExtension('t', \"%s\") should fail but has no Error\n",
1413                   ill);
1414         }
1415     }
1416 }
1417 
TestSetExtensionPU()1418 void LocaleBuilderTest::TestSetExtensionPU() {
1419     LocaleBuilder bld;
1420     bld.setLanguage("ar");
1421     Verify(bld, "ar",
1422            "setLanguage(\"ar\") got Error: %s\n");
1423 
1424     bld.setExtension('X', "a-b-c-d-e");
1425     Verify(bld, "ar-x-a-b-c-d-e",
1426            "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1427 
1428     bld.setExtension('x', "0-1-2-3");
1429     Verify(bld, "ar-x-0-1-2-3",
1430            "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1431 
1432     bld.setExtension('X', "0-12345678-x-x");
1433     Verify(bld, "ar-x-0-12345678-x-x",
1434            "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1435 
1436     bld.setRegion("TH");
1437     Verify(bld, "ar-TH-x-0-12345678-x-x",
1438            "setRegion(\"TH\") got Error: %s\n");
1439 
1440     bld.setExtension('X', "");
1441     Verify(bld, "ar-TH",
1442            "setExtension(\"X\") got Error: %s\n");
1443 }
1444 
TestSetExtensionValidatePUWellFormed()1445 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1446     // ((sep tlang (sep tfield)*) | (sep tfield)+)
1447     static const char* wellFormedExtensions[] = {
1448         "a",  // Short subtag
1449         "z",  // Short subtag
1450         "0",  // Short subtag, digit
1451         "9",  // Short subtag, digit
1452         "a-0",  // Two short subtag, alpha and digit
1453         "9-z",  // Two short subtag, digit and alpha
1454         "ab",
1455         "abc",
1456         "abcefghi",  // Long subtag
1457         "87654321",
1458         "01",
1459         "234",
1460         "0a-ab-87654321",  // Three subtags
1461         "87654321-ab-00-3A",  // Four subtabs
1462         "a-9-87654321",  // Three subtags with short and long subtags
1463         "87654321-ab-0-3A",
1464     };
1465     for (const char* extension : wellFormedExtensions) {
1466         UErrorCode status = U_ZERO_ERROR;
1467         LocaleBuilder bld;
1468         bld.setExtension('x', extension);
1469         Locale loc = bld.build(status);
1470         if (U_FAILURE(status)) {
1471             errln("setExtension('x', \"%s\") got Error: %s\n",
1472                   extension, u_errorName(status));
1473         }
1474     }
1475 }
1476 
TestSetExtensionValidatePUIllFormed()1477 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1478     static const char* illFormed[] = {
1479         "123456789",  // Too long
1480         "abcdefghi",  // Too long
1481         "ab-123456789",  // Second subtag too long
1482         "abcdefghi-12",  // First subtag too long
1483         "a-ab-987654321",  // Third subtag too long
1484         "987654321-a-0-3",  // First subtag too long
1485     };
1486     for (const char* ill : illFormed) {
1487         UErrorCode status = U_ZERO_ERROR;
1488         LocaleBuilder bld;
1489         bld.setExtension('x', ill);
1490         Locale loc = bld.build(status);
1491         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1492             errln("setExtension('x', \"%s\") should fail but has no Error\n",
1493                   ill);
1494         }
1495     }
1496 }
1497 
TestSetExtensionOthers()1498 void LocaleBuilderTest::TestSetExtensionOthers() {
1499     LocaleBuilder bld;
1500     bld.setLanguage("fr");
1501     Verify(bld, "fr",
1502            "setLanguage(\"fr\") got Error: %s\n");
1503 
1504     bld.setExtension('Z', "ab");
1505     Verify(bld, "fr-z-ab",
1506            "setExtension('Z', \"ab\") got Error: %s\n");
1507 
1508     bld.setExtension('0', "xyz12345-abcdefg");
1509     Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1510            "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1511 
1512     bld.setExtension('a', "01-12345678-ABcdef");
1513     Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1514            "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1515 
1516     bld.setRegion("TH");
1517     Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1518            "setRegion(\"TH\") got Error: %s\n");
1519 
1520     bld.setScript("Arab");
1521     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1522            "setRegion(\"Arab\") got Error: %s\n");
1523 
1524     bld.setExtension('A', "97");
1525     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1526            "setExtension('a', \"97\") got Error: %s\n");
1527 
1528     bld.setExtension('a', "");
1529     Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1530            "setExtension('a', \"\") got Error: %s\n");
1531 
1532     bld.setExtension('0', "");
1533     Verify(bld, "fr-Arab-TH-z-ab",
1534            "setExtension('0', \"\") got Error: %s\n");
1535 }
1536 
TestSetExtensionValidateOthersWellFormed()1537 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1538     static const char* wellFormedExtensions[] = {
1539         "ab",
1540         "abc",
1541         "abcefghi",
1542         "01",
1543         "234",
1544         "87654321",
1545         "0a-ab-87654321",
1546         "87654321-ab-00-3A",
1547     };
1548 
1549     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1550     const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1551     int32_t i = 0;
1552     for (const char* extension : wellFormedExtensions) {
1553         char ch = aToZ[i];
1554         i = (i + 1) % aToZLen;
1555         UErrorCode status = U_ZERO_ERROR;
1556         LocaleBuilder bld;
1557         bld.setExtension(ch, extension);
1558         Locale loc = bld.build(status);
1559         if (U_FAILURE(status)) {
1560             errln("setExtension('%c', \"%s\") got Error: %s\n",
1561                   ch, extension, u_errorName(status));
1562         }
1563     }
1564 
1565     const char* someChars =
1566         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1567     const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1568     for (int32_t i = 0; i < someCharsLen; i++) {
1569         char ch = someChars[i];
1570         UErrorCode status = U_ZERO_ERROR;
1571         LocaleBuilder bld;
1572         bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1573         Locale loc = bld.build(status);
1574         if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1575             if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1576                 if (U_FAILURE(status)) {
1577                     errln("setExtension('%c', \"%s\") got Error: %s\n",
1578                           ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1579                 }
1580             }
1581         } else {
1582             if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1583                 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1584                       ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1585             }
1586         }
1587 
1588     }
1589 }
1590 
TestSetExtensionValidateOthersIllFormed()1591 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1592     static const char* illFormed[] = {
1593         "0",  // Too short
1594         "a",  // Too short
1595         "123456789",  // Too long
1596         "abcdefghi",  // Too long
1597         "ab-123456789",  // Second subtag too long
1598         "abcdefghi-12",  // First subtag too long
1599         "a-ab-87654321",  // Third subtag too long
1600         "87654321-a-0-3",  // First subtag too long
1601     };
1602     const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1603     const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1604     int32_t i = 0;
1605     for (const char* ill : illFormed) {
1606         char ch = aToZ[i];
1607         i = (i + 1) % aToZLen;
1608         UErrorCode status = U_ZERO_ERROR;
1609         LocaleBuilder bld;
1610         bld.setExtension(ch, ill);
1611         Locale loc = bld.build(status);
1612         if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1613             errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1614                   ch, ill);
1615         }
1616     }
1617 }
1618 
TestSetLocale()1619 void LocaleBuilderTest::TestSetLocale() {
1620     LocaleBuilder bld1, bld2;
1621     UErrorCode status = U_ZERO_ERROR;
1622     Locale l1 = bld1.setLanguage("en")
1623         .setScript("Latn")
1624         .setRegion("MX")
1625         .setVariant("3456-abcde")
1626         .addUnicodeLocaleAttribute("456")
1627         .addUnicodeLocaleAttribute("123")
1628         .setUnicodeLocaleKeyword("nu", "thai")
1629         .setUnicodeLocaleKeyword("co", "stroke")
1630         .setUnicodeLocaleKeyword("ca", "chinese")
1631         .build(status);
1632     if (U_FAILURE(status) || l1.isBogus()) {
1633         errln("build got Error: %s\n", u_errorName(status));
1634     }
1635     status = U_ZERO_ERROR;
1636     Locale l2 = bld1.setLocale(l1).build(status);
1637     if (U_FAILURE(status) || l2.isBogus()) {
1638         errln("build got Error: %s\n", u_errorName(status));
1639     }
1640 
1641     if (l1 != l2) {
1642         errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1643               l1.getName(), l2.getName());
1644     }
1645 }
1646 
TestPosixCases()1647 void LocaleBuilderTest::TestPosixCases() {
1648     UErrorCode status = U_ZERO_ERROR;
1649     Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1650     if (U_FAILURE(status) || l1.isBogus()) {
1651         errln("build got Error: %s\n", u_errorName(status));
1652     }
1653     LocaleBuilder bld;
1654     bld.setLanguage("en")
1655         .setRegion("MX")
1656         .setScript("Arab")
1657         .setUnicodeLocaleKeyword("nu", "Thai")
1658         .setExtension('x', "1");
1659     // All of above should be cleared by the setLocale call.
1660     Locale l2 = bld.setLocale(l1).build(status);
1661     if (U_FAILURE(status) || l2.isBogus()) {
1662         errln("build got Error: %s\n", u_errorName(status));
1663     }
1664     if (l1 != l2) {
1665         errln("The result locale should be the set as the setLocale %s but got %s\n",
1666               l1.toLanguageTag<std::string>(status).c_str(),
1667               l2.toLanguageTag<std::string>(status).c_str());
1668     }
1669     Locale posix("en-US-POSIX");
1670     if (posix != l2) {
1671         errln("The result locale should be the set as %s but got %s\n",
1672               posix.getName(), l2.getName());
1673     }
1674 }
1675