1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <memory>
5
6 #include "cmemory.h"
7 #include "cstring.h"
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
11
LocaleBuilderTest()12 LocaleBuilderTest::LocaleBuilderTest()
13 {
14 }
15
~LocaleBuilderTest()16 LocaleBuilderTest::~LocaleBuilderTest()
17 {
18 }
19
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)20 void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21 {
22 TESTCASE_AUTO_BEGIN;
23 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25 TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26 TESTCASE_AUTO(TestLocaleBuilder);
27 TESTCASE_AUTO(TestLocaleBuilderBasic);
28 TESTCASE_AUTO(TestLocaleBuilderBasicWithExtensionsOnDefaultLocale);
29 TESTCASE_AUTO(TestPosixCases);
30 TESTCASE_AUTO(TestSetExtensionOthers);
31 TESTCASE_AUTO(TestSetExtensionPU);
32 TESTCASE_AUTO(TestSetExtensionT);
33 TESTCASE_AUTO(TestSetExtensionU);
34 TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
35 TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
36 TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
37 TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
38 TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
39 TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
40 TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
41 TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
42 TESTCASE_AUTO(TestSetLanguageIllFormed);
43 TESTCASE_AUTO(TestSetLanguageWellFormed);
44 TESTCASE_AUTO(TestSetLocale);
45 TESTCASE_AUTO(TestSetRegionIllFormed);
46 TESTCASE_AUTO(TestSetRegionWellFormed);
47 TESTCASE_AUTO(TestSetScriptIllFormed);
48 TESTCASE_AUTO(TestSetScriptWellFormed);
49 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
50 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
51 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
52 TESTCASE_AUTO(TestSetVariantIllFormed);
53 TESTCASE_AUTO(TestSetVariantWellFormed);
54 TESTCASE_AUTO_END;
55 }
56
Verify(LocaleBuilder & bld,const char * expected,const char * msg)57 void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
58 UErrorCode status = U_ZERO_ERROR;
59 UErrorCode copyStatus = U_ZERO_ERROR;
60 UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
61 if (bld.copyErrorTo(copyStatus)) {
62 errln(msg, u_errorName(copyStatus));
63 }
64 if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
65 errln("Should always get the previous error and return false");
66 }
67 Locale loc = bld.build(status);
68 if (U_FAILURE(status)) {
69 errln(msg, u_errorName(status));
70 }
71 if (status != copyStatus) {
72 errln(msg, u_errorName(status));
73 }
74 std::string tag = loc.toLanguageTag<std::string>(status);
75 if (U_FAILURE(status)) {
76 errln("loc.toLanguageTag() got Error: %s\n",
77 u_errorName(status));
78 }
79 if (tag != expected) {
80 errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
81 }
82 }
83
TestLocaleBuilder()84 void LocaleBuilderTest::TestLocaleBuilder() {
85 // The following test data are copy from
86 // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
87 // "L": +1 = language
88 // "S": +1 = script
89 // "R": +1 = region
90 // "V": +1 = variant
91 // "K": +1 = Unicode locale key / +2 = Unicode locale type
92 // "A": +1 = Unicode locale attribute
93 // "E": +1 = extension letter / +2 = extension value
94 // "P": +1 = private use
95 // "U": +1 = ULocale
96 // "B": +1 = BCP47 language tag
97 // "C": Clear all
98 // "N": Clear extensions
99 // "D": +1 = Unicode locale attribute to be removed
100 // "X": indicates an exception must be thrown
101 // "T": +1 = expected language tag / +2 = expected locale string
102 const char* TESTCASES[][14] = {
103 {"L", "en", "R", "us", "T", "en-US", "en_US"},
104 {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
105 {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
106 {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
107 {"L", "123", "X"},
108 {"R", "us", "T", "und-US", "_US"},
109 {"R", "usa", "X"},
110 {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
111 {"R", "123", "L", "it", "R", "", "T", "it", "it"},
112 {"R", "123", "L", "en", "T", "en-123", "en_123"},
113 {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
114 {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
115 {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
116 {"S", "latin", "X"},
117 {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
118 {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
119 {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
120 {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
121 {"V", "123", "X"},
122 {"U", "en_US", "T", "en-US", "en_US"},
123 {"U", "en_US_WIN", "X"},
124 {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
125 "fr-FR-1606nict-u-ca-gregory-x-test",
126 "fr_FR_1606NICT@calendar=gregorian;x=test"},
127 {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
128 {"B", "und-CA", "T", "und-CA", "_CA"},
129 // Blocked by ICU-20327
130 // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
131 // "en_US_VAR@x=test"},
132 {"B", "en-US-VAR", "X"},
133 {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
134 "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
135 {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
136 "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
137 {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
138 "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
139 "ja_JP@attribute=attr1;calendar=gregorian"},
140 {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
141 "en@colnumeric=yes"},
142 {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
143 "th_TH@numbers=thai"},
144 {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
145 {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
146 {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
147 {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
148 {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
149 {"E", "a", "x", "X"},
150 {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
151 // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
152 // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
153 // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
154 // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
155 // key = alphanum alpha
156 {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
157 "en@0a=yes;attribute=aaa-bbb"},
158 {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
159 "fr_FR@x=yoshito-icu"},
160 {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
161 "ja_JP@calendar=japanese"},
162 {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
163 "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
164 {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
165 {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
166 "th@calendar=gregorian;numbers=thai"},
167 {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
168 "en_US@timezone=America/New_York"},
169 {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
170 "true", "T", "de-u-co-phonebk-kk-ks-level1",
171 "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
172 {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
173 "en_US@calendar=gregorian"},
174 {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
175 {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
176 {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
177 "en_US@colnumeric=yes"},
178 {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
179 {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
180 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
181 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
182 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
183 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
184 {"L", "en", "A", "aa", "X"},
185 {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
186 };
187 UErrorCode status = U_ZERO_ERROR;
188 LocaleBuilder bld;
189 for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
190 const char* (&testCase)[14] = TESTCASES[tidx];
191 std::string actions;
192 for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
193 if (testCase[p] == nullptr) {
194 actions += " (nullptr)";
195 break;
196 }
197 if (p > 0) actions += " ";
198 actions += testCase[p];
199 }
200 int i = 0;
201 const char* method;
202 status = U_ZERO_ERROR;
203 bld.clear();
204 while (true) {
205 status = U_ZERO_ERROR;
206 UErrorCode copyStatus = U_ZERO_ERROR;
207 method = testCase[i++];
208 if (strcmp("L", method) == 0) {
209 bld.setLanguage(testCase[i++]);
210 bld.copyErrorTo(copyStatus);
211 bld.build(status);
212 } else if (strcmp("S", method) == 0) {
213 bld.setScript(testCase[i++]);
214 bld.copyErrorTo(copyStatus);
215 bld.build(status);
216 } else if (strcmp("R", method) == 0) {
217 bld.setRegion(testCase[i++]);
218 bld.copyErrorTo(copyStatus);
219 bld.build(status);
220 } else if (strcmp("V", method) == 0) {
221 bld.setVariant(testCase[i++]);
222 bld.copyErrorTo(copyStatus);
223 bld.build(status);
224 } else if (strcmp("K", method) == 0) {
225 const char* key = testCase[i++];
226 const char* type = testCase[i++];
227 bld.setUnicodeLocaleKeyword(key, type);
228 bld.copyErrorTo(copyStatus);
229 bld.build(status);
230 } else if (strcmp("A", method) == 0) {
231 bld.addUnicodeLocaleAttribute(testCase[i++]);
232 bld.copyErrorTo(copyStatus);
233 bld.build(status);
234 } else if (strcmp("E", method) == 0) {
235 const char* key = testCase[i++];
236 const char* value = testCase[i++];
237 bld.setExtension(key[0], value);
238 bld.copyErrorTo(copyStatus);
239 bld.build(status);
240 } else if (strcmp("P", method) == 0) {
241 bld.setExtension('x', testCase[i++]);
242 bld.copyErrorTo(copyStatus);
243 bld.build(status);
244 } else if (strcmp("U", method) == 0) {
245 bld.setLocale(Locale(testCase[i++]));
246 bld.copyErrorTo(copyStatus);
247 bld.build(status);
248 } else if (strcmp("B", method) == 0) {
249 bld.setLanguageTag(testCase[i++]);
250 bld.copyErrorTo(copyStatus);
251 bld.build(status);
252 }
253 // clear / remove
254 else if (strcmp("C", method) == 0) {
255 bld.clear();
256 bld.copyErrorTo(copyStatus);
257 bld.build(status);
258 } else if (strcmp("N", method) == 0) {
259 bld.clearExtensions();
260 bld.copyErrorTo(copyStatus);
261 bld.build(status);
262 } else if (strcmp("D", method) == 0) {
263 bld.removeUnicodeLocaleAttribute(testCase[i++]);
264 bld.copyErrorTo(copyStatus);
265 bld.build(status);
266 }
267 // result
268 else if (strcmp("X", method) == 0) {
269 if (U_SUCCESS(status)) {
270 errln("FAIL: No error return - test case: %s", actions.c_str());
271 }
272 } else if (strcmp("T", method) == 0) {
273 status = U_ZERO_ERROR;
274 Locale loc = bld.build(status);
275 if (status != copyStatus) {
276 errln("copyErrorTo not matching");
277 }
278 if (U_FAILURE(status) ||
279 strcmp(loc.getName(), testCase[i + 1]) != 0) {
280 errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
281 " for test case: ", actions.c_str());
282 }
283 std::string langtag = loc.toLanguageTag<std::string>(status);
284 if (U_FAILURE(status) || langtag != testCase[i]) {
285 errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
286 " for test case: ", actions.c_str());
287 }
288 break;
289 } else {
290 // Unknown test method
291 errln("Unknown test case method: There is an error in the test case data.");
292 break;
293 }
294 if (status != copyStatus) {
295 errln("copyErrorTo not matching");
296 }
297 if (U_FAILURE(status)) {
298 if (strcmp("X", testCase[i]) == 0) {
299 // This failure is expected
300 break;
301 } else {
302 errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
303 " in test case: ", actions.c_str());
304 break;
305 }
306 }
307 if (strcmp("T", method) == 0) {
308 break;
309 }
310 } // while(true)
311 } // for TESTCASES
312 }
313
TestLocaleBuilderBasic()314 void LocaleBuilderTest::TestLocaleBuilderBasic() {
315 LocaleBuilder bld;
316 bld.setLanguage("zh");
317 Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
318
319 bld.setScript("Hant");
320 Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
321
322 bld.setRegion("SG");
323 Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
324
325 bld.setRegion("HK");
326 bld.setScript("Hans");
327 Verify(bld, "zh-Hans-HK",
328 "setRegion('HK') and setScript('Hans') got Error: %s\n");
329
330 bld.setVariant("revised");
331 Verify(bld, "zh-Hans-HK-revised",
332 "setVariant('revised') got Error: %s\n");
333
334 bld.setUnicodeLocaleKeyword("nu", "thai");
335 Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
336 "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
337
338 bld.setUnicodeLocaleKeyword("co", "pinyin");
339 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
340 "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
341
342 bld.setUnicodeLocaleKeyword("nu", "latn");
343 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
344 "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
345
346 bld.setUnicodeLocaleKeyword("nu", nullptr);
347 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
348 "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
349
350 bld.setUnicodeLocaleKeyword("co", nullptr);
351 Verify(bld, "zh-Hans-HK-revised",
352 "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
353
354 bld.setScript("");
355 Verify(bld, "zh-HK-revised",
356 "setScript('') got Error: %s\n");
357
358 bld.setVariant("");
359 Verify(bld, "zh-HK",
360 "setVariant('') got Error: %s\n");
361
362 bld.setRegion("");
363 Verify(bld, "zh",
364 "setRegion('') got Error: %s\n");
365 }
366
TestLocaleBuilderBasicWithExtensionsOnDefaultLocale()367 void LocaleBuilderTest::TestLocaleBuilderBasicWithExtensionsOnDefaultLocale() {
368 // Change the default locale to one with extension tags.
369 UErrorCode status = U_ZERO_ERROR;
370 Locale originalDefault;
371 Locale::setDefault(Locale::createFromName("en-US-u-hc-h12"), status);
372 if (U_FAILURE(status)) {
373 errln("ERROR: Could not change the default locale");
374 return;
375 }
376
377 // Invoke the basic test now that the default locale has been changed.
378 TestLocaleBuilderBasic();
379
380 Locale::setDefault(originalDefault, status);
381 if (U_FAILURE(status)) {
382 errln("ERROR: Could not restore the default locale");
383 }
384 }
385
TestSetLanguageWellFormed()386 void LocaleBuilderTest::TestSetLanguageWellFormed() {
387 // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
388 // unicode_language_subtag = alpha{2,3} | alpha{5,8};
389 // ICUTC decided also support alpha{4}
390 static const char* wellFormedLanguages[] = {
391 "",
392
393 // alpha{2}
394 "en",
395 "NE",
396 "eN",
397 "Ne",
398
399 // alpha{3}
400 "aNe",
401 "zzz",
402 "AAA",
403
404 // alpha{4}
405 "ABCD",
406 "abcd",
407
408 // alpha{5}
409 "efgij",
410 "AbCAD",
411 "ZAASD",
412
413 // alpha{6}
414 "efgijk",
415 "AADGFE",
416 "AkDfFz",
417
418 // alpha{7}
419 "asdfads",
420 "ADSFADF",
421 "piSFkDk",
422
423 // alpha{8}
424 "oieradfz",
425 "IADSFJKR",
426 "kkDSFJkR",
427 };
428 for (const char* lang : wellFormedLanguages) {
429 UErrorCode status = U_ZERO_ERROR;
430 LocaleBuilder bld;
431 bld.setLanguage(lang);
432 Locale loc = bld.build(status);
433 if (U_FAILURE(status)) {
434 errln("setLanguage(\"%s\") got Error: %s\n",
435 lang, u_errorName(status));
436 }
437 }
438 }
439
TestSetLanguageIllFormed()440 void LocaleBuilderTest::TestSetLanguageIllFormed() {
441 static const char* illFormed[] = {
442 "a",
443 "z",
444 "A",
445 "F",
446 "2",
447 "0",
448 "9",
449 "{",
450 ".",
451 "[",
452 "]",
453 "\\",
454
455 "e1",
456 "N2",
457 "3N",
458 "4e",
459 "e:",
460 "43",
461 "a9",
462
463 "aN0",
464 "z1z",
465 "2zz",
466 "3A3",
467 "456",
468 "af)",
469
470 // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
471 // "latn",
472 // "Arab",
473 // "LATN",
474
475 "e)gij",
476 "Ab3AD",
477 "ZAAS8",
478
479 "efgi[]",
480 "AA9GFE",
481 "7kD3Fz",
482 "as8fads",
483 "0DSFADF",
484 "'iSFkDk",
485
486 "oieradf+",
487 "IADSFJK-",
488 "kkDSFJk0",
489
490 // alpha{9}
491 "oieradfab",
492 "IADSFJKDE",
493 "kkDSFJkzf",
494 };
495 for (const char* ill : illFormed) {
496 UErrorCode status = U_ZERO_ERROR;
497 LocaleBuilder bld;
498 bld.setLanguage(ill);
499 Locale loc = bld.build(status);
500 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
501 errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
502 }
503 }
504 }
505
TestSetScriptWellFormed()506 void LocaleBuilderTest::TestSetScriptWellFormed() {
507 // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
508 // unicode_script_subtag = alpha{4} ;
509 static const char* wellFormedScripts[] = {
510 "",
511
512 "Latn",
513 "latn",
514 "lATN",
515 "laTN",
516 "arBN",
517 "ARbn",
518 "adsf",
519 "aADF",
520 "BSVS",
521 "LATn",
522 };
523 for (const char* script : wellFormedScripts) {
524 UErrorCode status = U_ZERO_ERROR;
525 LocaleBuilder bld;
526 bld.setScript(script);
527 Locale loc = bld.build(status);
528 if (U_FAILURE(status)) {
529 errln("setScript(\"%s\") got Error: %s\n",
530 script, u_errorName(status));
531 }
532 }
533 }
534
TestSetScriptIllFormed()535 void LocaleBuilderTest::TestSetScriptIllFormed() {
536 static const char* illFormed[] = {
537 "a",
538 "z",
539 "A",
540 "F",
541 "2",
542 "0",
543 "9",
544 "{",
545 ".",
546 "[",
547 "]",
548 "\\",
549
550 "e1",
551 "N2",
552 "3N",
553 "4e",
554 "e:",
555 "43",
556 "a9",
557
558 "aN0",
559 "z1z",
560 "2zz",
561 "3A3",
562 "456",
563 "af)",
564
565 "0atn",
566 "l1tn",
567 "lA2N",
568 "la4N",
569 "arB5",
570 "1234",
571
572 "e)gij",
573 "Ab3AD",
574 "ZAAS8",
575
576 "efgi[]",
577 "AA9GFE",
578 "7kD3Fz",
579
580 "as8fads",
581 "0DSFADF",
582 "'iSFkDk",
583
584 "oieradf+",
585 "IADSFJK-",
586 "kkDSFJk0",
587
588 // alpha{9}
589 "oieradfab",
590 "IADSFJKDE",
591 "kkDSFJkzf",
592 };
593 for (const char* ill : illFormed) {
594 UErrorCode status = U_ZERO_ERROR;
595 LocaleBuilder bld;
596 bld.setScript(ill);
597 Locale loc = bld.build(status);
598 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
599 errln("setScript(\"%s\") should fail but has no Error\n", ill);
600 }
601 }
602 }
603
TestSetRegionWellFormed()604 void LocaleBuilderTest::TestSetRegionWellFormed() {
605 // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
606 // unicode_region_subtag = (alpha{2} | digit{3})
607 static const char* wellFormedRegions[] = {
608 "",
609
610 // alpha{2}
611 "en",
612 "NE",
613 "eN",
614 "Ne",
615
616 // digit{3}
617 "000",
618 "999",
619 "123",
620 "987"
621 };
622 for (const char* region : wellFormedRegions) {
623 UErrorCode status = U_ZERO_ERROR;
624 LocaleBuilder bld;
625 bld.setRegion(region);
626 Locale loc = bld.build(status);
627 if (U_FAILURE(status)) {
628 errln("setRegion(\"%s\") got Error: %s\n",
629 region, u_errorName(status));
630 }
631 }
632 }
633
TestSetRegionIllFormed()634 void LocaleBuilderTest::TestSetRegionIllFormed() {
635 static const char* illFormed[] = {
636 "a",
637 "z",
638 "A",
639 "F",
640 "2",
641 "0",
642 "9",
643 "{",
644 ".",
645 "[",
646 "]",
647 "\\",
648
649 "e1",
650 "N2",
651 "3N",
652 "4e",
653 "e:",
654 "43",
655 "a9",
656
657 "aN0",
658 "z1z",
659 "2zz",
660 "3A3",
661 "4.6",
662 "af)",
663
664 "0atn",
665 "l1tn",
666 "lA2N",
667 "la4N",
668 "arB5",
669 "1234",
670
671 "e)gij",
672 "Ab3AD",
673 "ZAAS8",
674
675 "efgi[]",
676 "AA9GFE",
677 "7kD3Fz",
678
679 "as8fads",
680 "0DSFADF",
681 "'iSFkDk",
682
683 "oieradf+",
684 "IADSFJK-",
685 "kkDSFJk0",
686
687 // alpha{9}
688 "oieradfab",
689 "IADSFJKDE",
690 "kkDSFJkzf",
691 };
692 for (const char* ill : illFormed) {
693 UErrorCode status = U_ZERO_ERROR;
694 LocaleBuilder bld;
695 bld.setRegion(ill);
696 Locale loc = bld.build(status);
697 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
698 errln("setRegion(\"%s\") should fail but has no Error\n", ill);
699 }
700 }
701 }
702
TestSetVariantWellFormed()703 void LocaleBuilderTest::TestSetVariantWellFormed() {
704 // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
705 // (sep unicode_variant_subtag)*
706 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
707 static const char* wellFormedVariants[] = {
708 "",
709
710 // alphanum{5}
711 "efgij",
712 "AbCAD",
713 "ZAASD",
714 "0AASD",
715 "A1CAD",
716 "ef2ij",
717 "ads3X",
718 "owqF4",
719
720 // alphanum{6}
721 "efgijk",
722 "AADGFE",
723 "AkDfFz",
724 "0ADGFE",
725 "A9DfFz",
726 "AADG7E",
727
728 // alphanum{7}
729 "asdfads",
730 "ADSFADF",
731 "piSFkDk",
732 "a0dfads",
733 "ADSF3DF",
734 "piSFkD9",
735
736 // alphanum{8}
737 "oieradfz",
738 "IADSFJKR",
739 "kkDSFJkR",
740 "0ADSFJKR",
741 "12345679",
742
743 // digit alphanum{3}
744 "0123",
745 "1abc",
746 "20EF",
747 "30EF",
748 "8A03",
749 "3Ax3",
750 "9Axy",
751
752 // (sep unicode_variant_subtag)*
753 "0123-4567",
754 "0ab3-ABCDE",
755 "9ax3-xByD9",
756 "9ax3-xByD9-adfk934a",
757
758 "0123_4567",
759 "0ab3_ABCDE",
760 "9ax3_xByD9",
761 "9ax3_xByD9_adfk934a",
762
763 "9ax3-xByD9_adfk934a",
764 "9ax3_xByD9-adfk934a",
765 };
766 for (const char* variant : wellFormedVariants) {
767 UErrorCode status = U_ZERO_ERROR;
768 LocaleBuilder bld;
769 bld.setVariant(variant);
770 Locale loc = bld.build(status);
771 if (U_FAILURE(status)) {
772 errln("setVariant(\"%s\") got Error: %s\n",
773 variant, u_errorName(status));
774 }
775 }
776 }
777
TestSetVariantIllFormed()778 void LocaleBuilderTest::TestSetVariantIllFormed() {
779 static const char* illFormed[] = {
780 "a",
781 "z",
782 "A",
783 "F",
784 "2",
785 "0",
786 "9",
787 "{",
788 ".",
789 "[",
790 "]",
791 "\\",
792
793 "e1",
794 "N2",
795 "3N",
796 "4e",
797 "e:",
798 "43",
799 "a9",
800 "en",
801 "NE",
802 "eN",
803 "Ne",
804
805 "aNe",
806 "zzz",
807 "AAA",
808 "aN0",
809 "z1z",
810 "2zz",
811 "3A3",
812 "4.6",
813 "af)",
814 "345",
815 "923",
816
817 "Latn",
818 "latn",
819 "lATN",
820 "laTN",
821 "arBN",
822 "ARbn",
823 "adsf",
824 "aADF",
825 "BSVS",
826 "LATn",
827 "l1tn",
828 "lA2N",
829 "la4N",
830 "arB5",
831 "abc3",
832 "A3BC",
833
834 "e)gij",
835 "A+3AD",
836 "ZAA=8",
837
838 "efgi[]",
839 "AA9]FE",
840 "7k[3Fz",
841
842 "as8f/ds",
843 "0DSFAD{",
844 "'iSFkDk",
845
846 "oieradf+",
847 "IADSFJK-",
848 "k}DSFJk0",
849
850 // alpha{9}
851 "oieradfab",
852 "IADSFJKDE",
853 "kkDSFJkzf",
854 "123456789",
855
856 "-0123",
857 "-0123-4567",
858 "0123-4567-",
859 "-123-4567",
860 "_0123",
861 "_0123_4567",
862 "0123_4567_",
863 "_123_4567",
864
865 "-abcde-figjk",
866 "abcde-figjk-",
867 "-abcde-figjk-",
868 "_abcde_figjk",
869 "abcde_figjk_",
870 "_abcde_figjk_",
871 };
872 for (const char* ill : illFormed) {
873 UErrorCode status = U_ZERO_ERROR;
874 LocaleBuilder bld;
875 bld.setVariant(ill);
876 Locale loc = bld.build(status);
877 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
878 errln("setVariant(\"%s\") should fail but has no Error\n", ill);
879 }
880 }
881 }
882
TestSetUnicodeLocaleKeywordWellFormed()883 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
884 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
885 // keyword = key (sep type)? ;
886 // key = alphanum alpha ;
887 // type = alphanum{3,8} (sep alphanum{3,8})* ;
888 static const char* wellFormed_key_value[] = {
889 "aa", "123",
890 "3b", "zyzbcdef",
891 "0Z", "1ZB30zk9-abc",
892 "cZ", "2ck30zfZ-adsf023-234kcZ",
893 "ZZ", "Lant",
894 "ko", "",
895 };
896 for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
897 UErrorCode status = U_ZERO_ERROR;
898 LocaleBuilder bld;
899 bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
900 wellFormed_key_value[i + 1]);
901 Locale loc = bld.build(status);
902 if (U_FAILURE(status)) {
903 errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
904 wellFormed_key_value[i],
905 wellFormed_key_value[i + 1],
906 u_errorName(status));
907 }
908 }
909 }
910
TestSetUnicodeLocaleKeywordIllFormedKey()911 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
912 static const char* illFormed[] = {
913 "34",
914 "ab-cde",
915 "123",
916 "b3",
917 "zyzabcdef",
918 "Z0",
919 };
920 for (const char* ill : illFormed) {
921 UErrorCode status = U_ZERO_ERROR;
922 LocaleBuilder bld;
923 bld.setUnicodeLocaleKeyword(ill, "abc");
924 Locale loc = bld.build(status);
925 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
926 errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
927 ill);
928 }
929 }
930 }
931
TestSetUnicodeLocaleKeywordIllFormedValue()932 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
933 static const char* illFormed[] = {
934 "34",
935 "ab-",
936 "-cd",
937 "-ef-",
938 "zyzabcdef",
939 "ab-abc",
940 "1ZB30zfk9-abc",
941 "2ck30zfk9-adsf023-234kcZ",
942 };
943 for (const char* ill : illFormed) {
944 UErrorCode status = U_ZERO_ERROR;
945 LocaleBuilder bld;
946 bld.setUnicodeLocaleKeyword("ab", ill);
947 Locale loc = bld.build(status);
948 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
949 errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
950 ill);
951 }
952 }
953 }
954
TestAddRemoveUnicodeLocaleAttribute()955 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
956 LocaleBuilder bld;
957 UErrorCode status = U_ZERO_ERROR;
958 Locale loc = bld.setLanguage("fr")
959 .addUnicodeLocaleAttribute("abc")
960 .addUnicodeLocaleAttribute("aBc")
961 .addUnicodeLocaleAttribute("EFG")
962 .addUnicodeLocaleAttribute("efghi")
963 .addUnicodeLocaleAttribute("efgh")
964 .addUnicodeLocaleAttribute("efGhi")
965 .addUnicodeLocaleAttribute("EFg")
966 .addUnicodeLocaleAttribute("hijk")
967 .addUnicodeLocaleAttribute("EFG")
968 .addUnicodeLocaleAttribute("HiJK")
969 .addUnicodeLocaleAttribute("aBc")
970 .build(status);
971 if (U_FAILURE(status)) {
972 errln("addUnicodeLocaleAttribute() got Error: %s\n",
973 u_errorName(status));
974 }
975 std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
976 std::string actual = loc.toLanguageTag<std::string>(status);
977 if (U_FAILURE(status) || expected != actual) {
978 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
979 }
980
981 // remove "efgh" in the middle with different casing.
982 loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
983 if (U_FAILURE(status)) {
984 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
985 u_errorName(status));
986 }
987 expected = "fr-u-abc-efg-efghi-hijk";
988 actual = loc.toLanguageTag<std::string>(status);
989 if (U_FAILURE(status) || expected != actual) {
990 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
991 }
992
993 // remove non-existing attributes.
994 loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
995 if (U_FAILURE(status)) {
996 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
997 u_errorName(status));
998 }
999 actual = loc.toLanguageTag<std::string>(status);
1000 if (U_FAILURE(status) || expected != actual) {
1001 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1002 }
1003
1004 // remove "abc" in the beginning with different casing.
1005 loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
1006 if (U_FAILURE(status)) {
1007 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1008 u_errorName(status));
1009 }
1010 expected = "fr-u-efg-efghi-hijk";
1011 actual = loc.toLanguageTag<std::string>(status);
1012 if (U_FAILURE(status) || expected != actual) {
1013 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1014 }
1015
1016 // remove non-existing substring in the end.
1017 loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
1018 if (U_FAILURE(status)) {
1019 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1020 u_errorName(status));
1021 }
1022 actual = loc.toLanguageTag<std::string>(status);
1023 if (U_FAILURE(status) || expected != actual) {
1024 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1025 }
1026
1027 // remove "hijk" in the end with different casing.
1028 loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
1029 if (U_FAILURE(status)) {
1030 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1031 u_errorName(status));
1032 }
1033 expected = "fr-u-efg-efghi";
1034 actual = loc.toLanguageTag<std::string>(status);
1035 if (U_FAILURE(status) || expected != actual) {
1036 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1037 }
1038
1039 // remove "efghi" in the end with different casing.
1040 loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
1041 if (U_FAILURE(status)) {
1042 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1043 u_errorName(status));
1044 }
1045 expected = "fr-u-efg";
1046 actual = loc.toLanguageTag<std::string>(status);
1047 if (U_FAILURE(status) || expected != actual) {
1048 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1049 }
1050
1051 // remove "efg" in as the only one, with different casing.
1052 loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
1053 if (U_FAILURE(status)) {
1054 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1055 u_errorName(status));
1056 }
1057 expected = "fr";
1058 actual = loc.toLanguageTag<std::string>(status);
1059 if (U_FAILURE(status) || expected != actual) {
1060 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1061 }
1062
1063 }
1064
TestAddRemoveUnicodeLocaleAttributeWellFormed()1065 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1066 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1067 // attribute = alphanum{3,8} ;
1068 static const char* wellFormedAttributes[] = {
1069 // alphanum{3}
1070 "AbC",
1071 "ZAA",
1072 "0AA",
1073 "x3A",
1074 "xa8",
1075
1076 // alphanum{4}
1077 "AbCA",
1078 "ZASD",
1079 "0ASD",
1080 "A3a4",
1081 "zK90",
1082
1083 // alphanum{5}
1084 "efgij",
1085 "AbCAD",
1086 "ZAASD",
1087 "0AASD",
1088 "A1CAD",
1089 "ef2ij",
1090 "ads3X",
1091 "owqF4",
1092
1093 // alphanum{6}
1094 "efgijk",
1095 "AADGFE",
1096 "AkDfFz",
1097 "0ADGFE",
1098 "A9DfFz",
1099 "AADG7E",
1100
1101 // alphanum{7}
1102 "asdfads",
1103 "ADSFADF",
1104 "piSFkDk",
1105 "a0dfads",
1106 "ADSF3DF",
1107 "piSFkD9",
1108
1109 // alphanum{8}
1110 "oieradfz",
1111 "IADSFJKR",
1112 "kkDSFJkR",
1113 };
1114 LocaleBuilder bld;
1115 for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1116 if (i % 5 == 0) {
1117 bld.clear();
1118 }
1119 UErrorCode status = U_ZERO_ERROR;
1120 bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1121 Locale loc = bld.build(status);
1122 if (U_FAILURE(status)) {
1123 errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1124 wellFormedAttributes[i], u_errorName(status));
1125 }
1126 if (i > 2) {
1127 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1128 loc = bld.build(status);
1129 if (U_FAILURE(status)) {
1130 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1131 wellFormedAttributes[i - 1], u_errorName(status));
1132 }
1133 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1134 loc = bld.build(status);
1135 if (U_FAILURE(status)) {
1136 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1137 wellFormedAttributes[i - 3], u_errorName(status));
1138 }
1139 }
1140 }
1141 }
1142
TestAddUnicodeLocaleAttributeIllFormed()1143 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1144 static const char* illFormed[] = {
1145 "aa",
1146 "34",
1147 "ab-",
1148 "-cd",
1149 "-ef-",
1150 "zyzabcdef",
1151 "123456789",
1152 "ab-abc",
1153 "1ZB30zfk9-abc",
1154 "2ck30zfk9-adsf023-234kcZ",
1155 };
1156 for (const char* ill : illFormed) {
1157 UErrorCode status = U_ZERO_ERROR;
1158 LocaleBuilder bld;
1159 bld.addUnicodeLocaleAttribute(ill);
1160 Locale loc = bld.build(status);
1161 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1162 errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1163 ill);
1164 }
1165 }
1166 }
1167
TestSetExtensionU()1168 void LocaleBuilderTest::TestSetExtensionU() {
1169 LocaleBuilder bld;
1170 bld.setLanguage("zh");
1171 Verify(bld, "zh",
1172 "setLanguage(\"zh\") got Error: %s\n");
1173
1174 bld.setExtension('u', "co-stroke");
1175 Verify(bld, "zh-u-co-stroke",
1176 "setExtension('u', \"co-stroke\") got Error: %s\n");
1177
1178 bld.setExtension('U', "ca-islamic");
1179 Verify(bld, "zh-u-ca-islamic",
1180 "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1181
1182 bld.setExtension('u', "ca-chinese");
1183 Verify(bld, "zh-u-ca-chinese",
1184 "setExtension('u', \"ca-chinese\") got Error: %s\n");
1185
1186 bld.setExtension('U', "co-pinyin");
1187 Verify(bld, "zh-u-co-pinyin",
1188 "setExtension('U', \"co-pinyin\") got Error: %s\n");
1189
1190 bld.setRegion("TW");
1191 Verify(bld, "zh-TW-u-co-pinyin",
1192 "setRegion(\"TW\") got Error: %s\n");
1193
1194 bld.setExtension('U', "");
1195 Verify(bld, "zh-TW",
1196 "setExtension('U', \"\") got Error: %s\n");
1197
1198 bld.setExtension('u', "abc-defg-kr-face");
1199 Verify(bld, "zh-TW-u-abc-defg-kr-face",
1200 "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1201
1202 bld.setExtension('U', "ca-japanese");
1203 Verify(bld, "zh-TW-u-ca-japanese",
1204 "setExtension('U', \"ca-japanese\") got Error: %s\n");
1205
1206 }
1207
TestSetExtensionValidateUWellFormed()1208 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1209 static const char* wellFormedExtensions[] = {
1210 // keyword
1211 // keyword = key (sep type)? ;
1212 // key = alphanum alpha ;
1213 // type = alphanum{3,8} (sep alphanum{3,8})* ;
1214 "3A",
1215 "ZA",
1216 "az-abc",
1217 "zz-123",
1218 "7z-12345678",
1219 "kb-A234567Z",
1220 // (sep keyword)+
1221 "1z-ZZ",
1222 "2z-ZZ-123",
1223 "3z-ZZ-123-cd",
1224 "0z-ZZ-123-cd-efghijkl",
1225 // attribute
1226 "abc",
1227 "456",
1228 "87654321",
1229 "ZABADFSD",
1230 // (sep attribute)+
1231 "abc-ZABADFSD",
1232 "123-ZABADFSD",
1233 "K2K-12345678",
1234 "K2K-12345678-zzz",
1235 // (sep attribute)+ (sep keyword)*
1236 "K2K-12345678-zz",
1237 "K2K-12345678-zz-0z",
1238 "K2K-12345678-9z-AZ-abc",
1239 "K2K-12345678-zz-9A-234",
1240 "K2K-12345678-zk0-abc-efg-zz-9k-234",
1241 };
1242 for (const char* extension : wellFormedExtensions) {
1243 UErrorCode status = U_ZERO_ERROR;
1244 LocaleBuilder bld;
1245 bld.setExtension('u', extension);
1246 Locale loc = bld.build(status);
1247 if (U_FAILURE(status)) {
1248 errln("setExtension('u', \"%s\") got Error: %s\n",
1249 extension, u_errorName(status));
1250 }
1251 }
1252 }
1253
TestSetExtensionValidateUIllFormed()1254 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1255 static const char* illFormed[] = {
1256 // bad key
1257 "-",
1258 "-ab",
1259 "ab-",
1260 "abc-",
1261 "-abc",
1262 "0",
1263 "a",
1264 "A0",
1265 "z9",
1266 "09",
1267 "90",
1268 // bad keyword
1269 "AB-A0",
1270 "AB-efg-A0",
1271 "xy-123456789",
1272 "AB-Aa-",
1273 "AB-Aac-",
1274 // bad attribute
1275 "abcdefghi",
1276 "abcdefgh-",
1277 "abcdefgh-abcdefghi",
1278 "abcdefgh-1",
1279 "abcdefgh-a",
1280 "abcdefgh-a2345678z",
1281 };
1282 for (const char* ill : illFormed) {
1283 UErrorCode status = U_ZERO_ERROR;
1284 LocaleBuilder bld;
1285 bld.setExtension('u', ill);
1286 Locale loc = bld.build(status);
1287 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1288 errln("setExtension('u', \"%s\") should fail but has no Error\n",
1289 ill);
1290 }
1291 }
1292 }
1293
TestSetExtensionT()1294 void LocaleBuilderTest::TestSetExtensionT() {
1295 LocaleBuilder bld;
1296 bld.setLanguage("fr");
1297 Verify(bld, "fr",
1298 "setLanguage(\"fr\") got Error: %s\n");
1299
1300 bld.setExtension('T', "zh");
1301 Verify(bld, "fr-t-zh",
1302 "setExtension('T', \"zh\") got Error: %s\n");
1303
1304 bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1305 Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1306 "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1307
1308 bld.setExtension('T', "a9-123");
1309 Verify(bld, "fr-t-a9-123",
1310 "setExtension('T', \"a9-123\") got Error: %s\n");
1311
1312 bld.setRegion("MX");
1313 Verify(bld, "fr-MX-t-a9-123",
1314 "setRegion(\"MX\") got Error: %s\n");
1315
1316 bld.setScript("Hans");
1317 Verify(bld, "fr-Hans-MX-t-a9-123",
1318 "setScript(\"Hans\") got Error: %s\n");
1319
1320 bld.setVariant("9abc-abcde");
1321 Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1322 "setVariant(\"9abc-abcde\") got Error: %s\n");
1323
1324 bld.setExtension('T', "");
1325 Verify(bld, "fr-Hans-MX-9abc-abcde",
1326 "bld.setExtension('T', \"\") got Error: %s\n");
1327 }
1328
TestSetExtensionValidateTWellFormed()1329 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1330 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1331 static const char* wellFormedExtensions[] = {
1332 // tlang
1333 // tlang = unicode_language_subtag (sep unicode_script_subtag)?
1334 // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1335 // unicode_language_subtag
1336 "en",
1337 "abc",
1338 "abcde",
1339 "ABCDEFGH",
1340 // unicode_language_subtag sep unicode_script_subtag
1341 "en-latn",
1342 "abc-arab",
1343 "ABCDEFGH-Thai",
1344 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1345 "en-latn-ME",
1346 "abc-arab-RU",
1347 "ABCDEFGH-Thai-TH",
1348 "en-latn-409",
1349 "abc-arab-123",
1350 "ABCDEFGH-Thai-456",
1351 // unicode_language_subtag sep unicode_region_subtag
1352 "en-ME",
1353 "abc-RU",
1354 "ABCDEFGH-TH",
1355 "en-409",
1356 "abc-123",
1357 "ABCDEFGH-456",
1358 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1359 // sep (sep unicode_variant_subtag)*
1360 "en-latn-ME-abcde",
1361 "abc-arab-RU-3abc-abcdef",
1362 "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1363 "en-latn-409-xafsa",
1364 "abc-arab-123-ADASDF",
1365 "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1366 // (sep tfield)+
1367 "A0-abcde",
1368 "z9-abcde123",
1369 "z9-abcde123-a1-abcde",
1370 // tlang (sep tfield)*
1371 "fr-A0-abcde",
1372 "fr-FR-A0-abcde",
1373 "fr-123-z9-abcde123-a1-abcde",
1374 "fr-Latn-FR-z9-abcde123-a1-abcde",
1375 "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1376 "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1377 };
1378 for (const char* extension : wellFormedExtensions) {
1379 UErrorCode status = U_ZERO_ERROR;
1380 LocaleBuilder bld;
1381 bld.setExtension('t', extension);
1382 Locale loc = bld.build(status);
1383 if (U_FAILURE(status)) {
1384 errln("setExtension('t', \"%s\") got Error: %s\n",
1385 extension, u_errorName(status));
1386 }
1387 }
1388 }
1389
TestSetExtensionValidateTIllFormed()1390 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1391 static const char* illFormed[] = {
1392 "a",
1393 "a-",
1394 "0",
1395 "9-",
1396 "-9",
1397 "-z",
1398 "Latn",
1399 "Latn-",
1400 "en-",
1401 "nob-",
1402 "-z9",
1403 "a3",
1404 "a3-",
1405 "3a",
1406 "0z-",
1407 "en-123-a1",
1408 "en-TH-a1",
1409 "gab-TH-a1",
1410 "gab-Thai-a1",
1411 "gab-Thai-TH-a1",
1412 "gab-Thai-TH-0bde-a1",
1413 "gab-Thai-TH-0bde-3b",
1414 "gab-Thai-TH-0bde-z9-a1",
1415 "gab-Thai-TH-0bde-z9-3b",
1416 "gab-Thai-TH-0bde-z9-abcde123-3b",
1417 "gab-Thai-TH-0bde-z9-abcde123-ab",
1418 "gab-Thai-TH-0bde-z9-abcde123-ab",
1419 "gab-Thai-TH-0bde-z9-abcde123-a1",
1420 "gab-Thai-TH-0bde-z9-abcde123-a1-",
1421 "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1422 "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1423 // ICU-21408
1424 "root",
1425 };
1426 for (const char* ill : illFormed) {
1427 UErrorCode status = U_ZERO_ERROR;
1428 LocaleBuilder bld;
1429 bld.setExtension('t', ill);
1430 Locale loc = bld.build(status);
1431 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1432 errln("setExtension('t', \"%s\") should fail but has no Error\n",
1433 ill);
1434 }
1435 }
1436 }
1437
TestSetExtensionPU()1438 void LocaleBuilderTest::TestSetExtensionPU() {
1439 LocaleBuilder bld;
1440 bld.setLanguage("ar");
1441 Verify(bld, "ar",
1442 "setLanguage(\"ar\") got Error: %s\n");
1443
1444 bld.setExtension('X', "a-b-c-d-e");
1445 Verify(bld, "ar-x-a-b-c-d-e",
1446 "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1447
1448 bld.setExtension('x', "0-1-2-3");
1449 Verify(bld, "ar-x-0-1-2-3",
1450 "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1451
1452 bld.setExtension('X', "0-12345678-x-x");
1453 Verify(bld, "ar-x-0-12345678-x-x",
1454 "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1455
1456 bld.setRegion("TH");
1457 Verify(bld, "ar-TH-x-0-12345678-x-x",
1458 "setRegion(\"TH\") got Error: %s\n");
1459
1460 bld.setExtension('X', "");
1461 Verify(bld, "ar-TH",
1462 "setExtension(\"X\") got Error: %s\n");
1463 }
1464
TestSetExtensionValidatePUWellFormed()1465 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1466 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1467 static const char* wellFormedExtensions[] = {
1468 "a", // Short subtag
1469 "z", // Short subtag
1470 "0", // Short subtag, digit
1471 "9", // Short subtag, digit
1472 "a-0", // Two short subtag, alpha and digit
1473 "9-z", // Two short subtag, digit and alpha
1474 "ab",
1475 "abc",
1476 "abcefghi", // Long subtag
1477 "87654321",
1478 "01",
1479 "234",
1480 "0a-ab-87654321", // Three subtags
1481 "87654321-ab-00-3A", // Four subtabs
1482 "a-9-87654321", // Three subtags with short and long subtags
1483 "87654321-ab-0-3A",
1484 };
1485 for (const char* extension : wellFormedExtensions) {
1486 UErrorCode status = U_ZERO_ERROR;
1487 LocaleBuilder bld;
1488 bld.setExtension('x', extension);
1489 Locale loc = bld.build(status);
1490 if (U_FAILURE(status)) {
1491 errln("setExtension('x', \"%s\") got Error: %s\n",
1492 extension, u_errorName(status));
1493 }
1494 }
1495 }
1496
TestSetExtensionValidatePUIllFormed()1497 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1498 static const char* illFormed[] = {
1499 "123456789", // Too long
1500 "abcdefghi", // Too long
1501 "ab-123456789", // Second subtag too long
1502 "abcdefghi-12", // First subtag too long
1503 "a-ab-987654321", // Third subtag too long
1504 "987654321-a-0-3", // First subtag too long
1505 };
1506 for (const char* ill : illFormed) {
1507 UErrorCode status = U_ZERO_ERROR;
1508 LocaleBuilder bld;
1509 bld.setExtension('x', ill);
1510 Locale loc = bld.build(status);
1511 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1512 errln("setExtension('x', \"%s\") should fail but has no Error\n",
1513 ill);
1514 }
1515 }
1516 }
1517
TestSetExtensionOthers()1518 void LocaleBuilderTest::TestSetExtensionOthers() {
1519 LocaleBuilder bld;
1520 bld.setLanguage("fr");
1521 Verify(bld, "fr",
1522 "setLanguage(\"fr\") got Error: %s\n");
1523
1524 bld.setExtension('Z', "ab");
1525 Verify(bld, "fr-z-ab",
1526 "setExtension('Z', \"ab\") got Error: %s\n");
1527
1528 bld.setExtension('0', "xyz12345-abcdefg");
1529 Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1530 "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1531
1532 bld.setExtension('a', "01-12345678-ABcdef");
1533 Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1534 "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1535
1536 bld.setRegion("TH");
1537 Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1538 "setRegion(\"TH\") got Error: %s\n");
1539
1540 bld.setScript("Arab");
1541 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1542 "setRegion(\"Arab\") got Error: %s\n");
1543
1544 bld.setExtension('A', "97");
1545 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1546 "setExtension('a', \"97\") got Error: %s\n");
1547
1548 bld.setExtension('a', "");
1549 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1550 "setExtension('a', \"\") got Error: %s\n");
1551
1552 bld.setExtension('0', "");
1553 Verify(bld, "fr-Arab-TH-z-ab",
1554 "setExtension('0', \"\") got Error: %s\n");
1555 }
1556
TestSetExtensionValidateOthersWellFormed()1557 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1558 static const char* wellFormedExtensions[] = {
1559 "ab",
1560 "abc",
1561 "abcefghi",
1562 "01",
1563 "234",
1564 "87654321",
1565 "0a-ab-87654321",
1566 "87654321-ab-00-3A",
1567 };
1568
1569 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1570 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1571 int32_t i = 0;
1572 for (const char* extension : wellFormedExtensions) {
1573 char ch = aToZ[i];
1574 i = (i + 1) % aToZLen;
1575 UErrorCode status = U_ZERO_ERROR;
1576 LocaleBuilder bld;
1577 bld.setExtension(ch, extension);
1578 Locale loc = bld.build(status);
1579 if (U_FAILURE(status)) {
1580 errln("setExtension('%c', \"%s\") got Error: %s\n",
1581 ch, extension, u_errorName(status));
1582 }
1583 }
1584
1585 const char* someChars =
1586 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1587 const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1588 for (int32_t i = 0; i < someCharsLen; i++) {
1589 char ch = someChars[i];
1590 UErrorCode status = U_ZERO_ERROR;
1591 LocaleBuilder bld;
1592 bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1593 Locale loc = bld.build(status);
1594 if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1595 if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1596 if (U_FAILURE(status)) {
1597 errln("setExtension('%c', \"%s\") got Error: %s\n",
1598 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1599 }
1600 }
1601 } else {
1602 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1603 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1604 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1605 }
1606 }
1607
1608 }
1609 }
1610
TestSetExtensionValidateOthersIllFormed()1611 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1612 static const char* illFormed[] = {
1613 "0", // Too short
1614 "a", // Too short
1615 "123456789", // Too long
1616 "abcdefghi", // Too long
1617 "ab-123456789", // Second subtag too long
1618 "abcdefghi-12", // First subtag too long
1619 "a-ab-87654321", // Third subtag too long
1620 "87654321-a-0-3", // First subtag too long
1621 };
1622 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1623 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1624 int32_t i = 0;
1625 for (const char* ill : illFormed) {
1626 char ch = aToZ[i];
1627 i = (i + 1) % aToZLen;
1628 UErrorCode status = U_ZERO_ERROR;
1629 LocaleBuilder bld;
1630 bld.setExtension(ch, ill);
1631 Locale loc = bld.build(status);
1632 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1633 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1634 ch, ill);
1635 }
1636 }
1637 }
1638
TestSetLocale()1639 void LocaleBuilderTest::TestSetLocale() {
1640 LocaleBuilder bld1, bld2;
1641 UErrorCode status = U_ZERO_ERROR;
1642 Locale l1 = bld1.setLanguage("en")
1643 .setScript("Latn")
1644 .setRegion("MX")
1645 .setVariant("3456-abcde")
1646 .addUnicodeLocaleAttribute("456")
1647 .addUnicodeLocaleAttribute("123")
1648 .setUnicodeLocaleKeyword("nu", "thai")
1649 .setUnicodeLocaleKeyword("co", "stroke")
1650 .setUnicodeLocaleKeyword("ca", "chinese")
1651 .build(status);
1652 if (U_FAILURE(status) || l1.isBogus()) {
1653 errln("build got Error: %s\n", u_errorName(status));
1654 }
1655 status = U_ZERO_ERROR;
1656 Locale l2 = bld1.setLocale(l1).build(status);
1657 if (U_FAILURE(status) || l2.isBogus()) {
1658 errln("build got Error: %s\n", u_errorName(status));
1659 }
1660
1661 if (l1 != l2) {
1662 errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1663 l1.getName(), l2.getName());
1664 }
1665 }
1666
TestPosixCases()1667 void LocaleBuilderTest::TestPosixCases() {
1668 UErrorCode status = U_ZERO_ERROR;
1669 Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1670 if (U_FAILURE(status) || l1.isBogus()) {
1671 errln("build got Error: %s\n", u_errorName(status));
1672 }
1673 LocaleBuilder bld;
1674 bld.setLanguage("en")
1675 .setRegion("MX")
1676 .setScript("Arab")
1677 .setUnicodeLocaleKeyword("nu", "Thai")
1678 .setExtension('x', "1");
1679 // All of above should be cleared by the setLocale call.
1680 Locale l2 = bld.setLocale(l1).build(status);
1681 if (U_FAILURE(status) || l2.isBogus()) {
1682 errln("build got Error: %s\n", u_errorName(status));
1683 }
1684 if (l1 != l2) {
1685 errln("The result locale should be the set as the setLocale %s but got %s\n",
1686 l1.toLanguageTag<std::string>(status).c_str(),
1687 l2.toLanguageTag<std::string>(status).c_str());
1688 }
1689 Locale posix("en-US-POSIX");
1690 if (posix != l2) {
1691 errln("The result locale should be the set as %s but got %s\n",
1692 posix.getName(), l2.getName());
1693 }
1694 }
1695