1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <memory>
5
6 #include "cmemory.h"
7 #include "cstring.h"
8 #include "localebuildertest.h"
9 #include "unicode/localebuilder.h"
10 #include "unicode/strenum.h"
11
LocaleBuilderTest()12 LocaleBuilderTest::LocaleBuilderTest()
13 {
14 }
15
~LocaleBuilderTest()16 LocaleBuilderTest::~LocaleBuilderTest()
17 {
18 }
19
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)20 void LocaleBuilderTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par*/ )
21 {
22 TESTCASE_AUTO_BEGIN;
23 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttribute);
24 TESTCASE_AUTO(TestAddRemoveUnicodeLocaleAttributeWellFormed);
25 TESTCASE_AUTO(TestAddUnicodeLocaleAttributeIllFormed);
26 TESTCASE_AUTO(TestLocaleBuilder);
27 TESTCASE_AUTO(TestLocaleBuilderBasic);
28 TESTCASE_AUTO(TestPosixCases);
29 TESTCASE_AUTO(TestSetExtensionOthers);
30 TESTCASE_AUTO(TestSetExtensionPU);
31 TESTCASE_AUTO(TestSetExtensionT);
32 TESTCASE_AUTO(TestSetExtensionU);
33 TESTCASE_AUTO(TestSetExtensionValidateOthersIllFormed);
34 TESTCASE_AUTO(TestSetExtensionValidateOthersWellFormed);
35 TESTCASE_AUTO(TestSetExtensionValidatePUIllFormed);
36 TESTCASE_AUTO(TestSetExtensionValidatePUWellFormed);
37 TESTCASE_AUTO(TestSetExtensionValidateTIllFormed);
38 TESTCASE_AUTO(TestSetExtensionValidateTWellFormed);
39 TESTCASE_AUTO(TestSetExtensionValidateUIllFormed);
40 TESTCASE_AUTO(TestSetExtensionValidateUWellFormed);
41 TESTCASE_AUTO(TestSetLanguageIllFormed);
42 TESTCASE_AUTO(TestSetLanguageWellFormed);
43 TESTCASE_AUTO(TestSetLocale);
44 TESTCASE_AUTO(TestSetRegionIllFormed);
45 TESTCASE_AUTO(TestSetRegionWellFormed);
46 TESTCASE_AUTO(TestSetScriptIllFormed);
47 TESTCASE_AUTO(TestSetScriptWellFormed);
48 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedKey);
49 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordIllFormedValue);
50 TESTCASE_AUTO(TestSetUnicodeLocaleKeywordWellFormed);
51 TESTCASE_AUTO(TestSetVariantIllFormed);
52 TESTCASE_AUTO(TestSetVariantWellFormed);
53 TESTCASE_AUTO_END;
54 }
55
Verify(LocaleBuilder & bld,const char * expected,const char * msg)56 void LocaleBuilderTest::Verify(LocaleBuilder& bld, const char* expected, const char* msg) {
57 UErrorCode status = U_ZERO_ERROR;
58 UErrorCode copyStatus = U_ZERO_ERROR;
59 UErrorCode errorStatus = U_ILLEGAL_ARGUMENT_ERROR;
60 if (bld.copyErrorTo(copyStatus)) {
61 errln(msg, u_errorName(copyStatus));
62 }
63 if (!bld.copyErrorTo(errorStatus) || errorStatus != U_ILLEGAL_ARGUMENT_ERROR) {
64 errln("Should always get the previous error and return FALSE");
65 }
66 Locale loc = bld.build(status);
67 if (U_FAILURE(status)) {
68 errln(msg, u_errorName(status));
69 }
70 if (status != copyStatus) {
71 errln(msg, u_errorName(status));
72 }
73 std::string tag = loc.toLanguageTag<std::string>(status);
74 if (U_FAILURE(status)) {
75 errln("loc.toLanguageTag() got Error: %s\n",
76 u_errorName(status));
77 }
78 if (tag != expected) {
79 errln("should get \"%s\", but got \"%s\"\n", expected, tag.c_str());
80 }
81 }
82
TestLocaleBuilder()83 void LocaleBuilderTest::TestLocaleBuilder() {
84 // The following test data are copy from
85 // icu4j/main/tests/core/src/com/ibm/icu/dev/test/util/LocaleBuilderTest.java
86 // "L": +1 = language
87 // "S": +1 = script
88 // "R": +1 = region
89 // "V": +1 = variant
90 // "K": +1 = Unicode locale key / +2 = Unicode locale type
91 // "A": +1 = Unicode locale attribute
92 // "E": +1 = extension letter / +2 = extension value
93 // "P": +1 = private use
94 // "U": +1 = ULocale
95 // "B": +1 = BCP47 language tag
96 // "C": Clear all
97 // "N": Clear extensions
98 // "D": +1 = Unicode locale attribute to be removed
99 // "X": indicates an exception must be thrown
100 // "T": +1 = expected language tag / +2 = expected locale string
101 const char* TESTCASES[][14] = {
102 {"L", "en", "R", "us", "T", "en-US", "en_US"},
103 {"L", "en", "R", "CA", "L", nullptr, "T", "und-CA", "_CA"},
104 {"L", "en", "R", "CA", "L", "", "T", "und-CA", "_CA"},
105 {"L", "en", "R", "FR", "L", "fr", "T", "fr-FR", "fr_FR"},
106 {"L", "123", "X"},
107 {"R", "us", "T", "und-US", "_US"},
108 {"R", "usa", "X"},
109 {"R", "123", "L", "it", "R", nullptr, "T", "it", "it"},
110 {"R", "123", "L", "it", "R", "", "T", "it", "it"},
111 {"R", "123", "L", "en", "T", "en-123", "en_123"},
112 {"S", "LATN", "L", "DE", "T", "de-Latn", "de_Latn"},
113 {"L", "De", "S", "latn", "R", "de", "S", "", "T", "de-DE", "de_DE"},
114 {"L", "De", "S", "Arab", "R", "de", "S", nullptr, "T", "de-DE", "de_DE"},
115 {"S", "latin", "X"},
116 {"V", "1234", "L", "en", "T", "en-1234", "en__1234"},
117 {"V", "1234", "L", "en", "V", "5678", "T", "en-5678", "en__5678"},
118 {"V", "1234", "L", "en", "V", nullptr, "T", "en", "en"},
119 {"V", "1234", "L", "en", "V", "", "T", "en", "en"},
120 {"V", "123", "X"},
121 {"U", "en_US", "T", "en-US", "en_US"},
122 {"U", "en_US_WIN", "X"},
123 {"B", "fr-FR-1606nict-u-ca-gregory-x-test", "T",
124 "fr-FR-1606nict-u-ca-gregory-x-test",
125 "fr_FR_1606NICT@calendar=gregorian;x=test"},
126 {"B", "ab-cde-fghij", "T", "cde-fghij", "cde__FGHIJ"},
127 {"B", "und-CA", "T", "und-CA", "_CA"},
128 // Blocked by ICU-20327
129 // {"B", "en-US-x-test-lvariant-var", "T", "en-US-x-test-lvariant-var",
130 // "en_US_VAR@x=test"},
131 {"B", "en-US-VAR", "X"},
132 {"U", "ja_JP@calendar=japanese;currency=JPY", "L", "ko", "T",
133 "ko-JP-u-ca-japanese-cu-jpy", "ko_JP@calendar=japanese;currency=JPY"},
134 {"U", "ja_JP@calendar=japanese;currency=JPY", "K", "ca", nullptr, "T",
135 "ja-JP-u-cu-jpy", "ja_JP@currency=JPY"},
136 {"U", "ja_JP@calendar=japanese;currency=JPY", "E", "u",
137 "attr1-ca-gregory", "T", "ja-JP-u-attr1-ca-gregory",
138 "ja_JP@attribute=attr1;calendar=gregorian"},
139 {"U", "en@colnumeric=yes", "K", "kn", "true", "T", "en-u-kn",
140 "en@colnumeric=yes"},
141 {"L", "th", "R", "th", "K", "nu", "thai", "T", "th-TH-u-nu-thai",
142 "th_TH@numbers=thai"},
143 {"U", "zh_Hans", "R", "sg", "K", "ca", "badcalendar", "X"},
144 {"U", "zh_Hans", "R", "sg", "K", "cal", "gregory", "X"},
145 {"E", "z", "ExtZ", "L", "en", "T", "en-z-extz", "en@z=extz"},
146 {"E", "z", "ExtZ", "L", "en", "E", "z", "", "T", "en", "en"},
147 {"E", "z", "ExtZ", "L", "en", "E", "z", nullptr, "T", "en", "en"},
148 {"E", "a", "x", "X"},
149 {"E", "a", "abc_def", "T", "und-a-abc-def", "@a=abc-def"},
150 // Design limitation - typeless u extension keyword 0a below is interpreted as a boolean value true/yes.
151 // With the legacy keyword syntax, "yes" is used for such boolean value instead of "true".
152 // However, once the legacy keyword is translated back to BCP 47 u extension, key "0a" is unknown,
153 // so "yes" is preserved - not mapped to "true". We could change the code to automatically transform
154 // key = alphanum alpha
155 {"L", "en", "E", "u", "bbb-aaa-0a", "T", "en-u-aaa-bbb-0a",
156 "en@0a=yes;attribute=aaa-bbb"},
157 {"L", "fr", "R", "FR", "P", "Yoshito-ICU", "T", "fr-FR-x-yoshito-icu",
158 "fr_FR@x=yoshito-icu"},
159 {"L", "ja", "R", "jp", "K", "ca", "japanese", "T", "ja-JP-u-ca-japanese",
160 "ja_JP@calendar=japanese"},
161 {"K", "co", "PHONEBK", "K", "ca", "gregory", "L", "De", "T",
162 "de-u-ca-gregory-co-phonebk", "de@calendar=gregorian;collation=phonebook"},
163 {"E", "o", "OPQR", "E", "a", "aBcD", "T", "und-a-abcd-o-opqr", "@a=abcd;o=opqr"},
164 {"E", "u", "nu-thai-ca-gregory", "L", "TH", "T", "th-u-ca-gregory-nu-thai",
165 "th@calendar=gregorian;numbers=thai"},
166 {"L", "en", "K", "tz", "usnyc", "R", "US", "T", "en-US-u-tz-usnyc",
167 "en_US@timezone=America/New_York"},
168 {"L", "de", "K", "co", "phonebk", "K", "ks", "level1", "K", "kk",
169 "true", "T", "de-u-co-phonebk-kk-ks-level1",
170 "de@collation=phonebook;colnormalization=yes;colstrength=primary"},
171 {"L", "en", "R", "US", "K", "ca", "gregory", "T", "en-US-u-ca-gregory",
172 "en_US@calendar=gregorian"},
173 {"L", "en", "R", "US", "K", "cal", "gregory", "X"},
174 {"L", "en", "R", "US", "K", "ca", "gregorian", "X"},
175 {"L", "en", "R", "US", "K", "kn", "true", "T", "en-US-u-kn",
176 "en_US@colnumeric=yes"},
177 {"B", "de-DE-u-co-phonebk", "C", "L", "pt", "T", "pt", "pt"},
178 {"B", "ja-jp-u-ca-japanese", "N", "T", "ja-JP", "ja_JP"},
179 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "T",
180 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
181 {"B", "es-u-def-abc-co-trad", "A", "hij", "D", "def", "D", "def", "T",
182 "es-u-abc-hij-co-trad", "es@attribute=abc-hij;collation=traditional"},
183 {"L", "en", "A", "aa", "X"},
184 {"B", "fr-u-attr1-cu-eur", "D", "attribute1", "X"},
185 };
186 UErrorCode status = U_ZERO_ERROR;
187 LocaleBuilder bld;
188 for (int tidx = 0; tidx < UPRV_LENGTHOF(TESTCASES); tidx++) {
189 const char* (&testCase)[14] = TESTCASES[tidx];
190 std::string actions;
191 for (int p = 0; p < UPRV_LENGTHOF(testCase); p++) {
192 if (testCase[p] == nullptr) {
193 actions += " (nullptr)";
194 break;
195 }
196 if (p > 0) actions += " ";
197 actions += testCase[p];
198 }
199 int i = 0;
200 const char* method;
201 status = U_ZERO_ERROR;
202 bld.clear();
203 while (true) {
204 status = U_ZERO_ERROR;
205 UErrorCode copyStatus = U_ZERO_ERROR;
206 method = testCase[i++];
207 if (strcmp("L", method) == 0) {
208 bld.setLanguage(testCase[i++]);
209 bld.copyErrorTo(copyStatus);
210 bld.build(status);
211 } else if (strcmp("S", method) == 0) {
212 bld.setScript(testCase[i++]);
213 bld.copyErrorTo(copyStatus);
214 bld.build(status);
215 } else if (strcmp("R", method) == 0) {
216 bld.setRegion(testCase[i++]);
217 bld.copyErrorTo(copyStatus);
218 bld.build(status);
219 } else if (strcmp("V", method) == 0) {
220 bld.setVariant(testCase[i++]);
221 bld.copyErrorTo(copyStatus);
222 bld.build(status);
223 } else if (strcmp("K", method) == 0) {
224 const char* key = testCase[i++];
225 const char* type = testCase[i++];
226 bld.setUnicodeLocaleKeyword(key, type);
227 bld.copyErrorTo(copyStatus);
228 bld.build(status);
229 } else if (strcmp("A", method) == 0) {
230 bld.addUnicodeLocaleAttribute(testCase[i++]);
231 bld.copyErrorTo(copyStatus);
232 bld.build(status);
233 } else if (strcmp("E", method) == 0) {
234 const char* key = testCase[i++];
235 const char* value = testCase[i++];
236 bld.setExtension(key[0], value);
237 bld.copyErrorTo(copyStatus);
238 bld.build(status);
239 } else if (strcmp("P", method) == 0) {
240 bld.setExtension('x', testCase[i++]);
241 bld.copyErrorTo(copyStatus);
242 bld.build(status);
243 } else if (strcmp("U", method) == 0) {
244 bld.setLocale(Locale(testCase[i++]));
245 bld.copyErrorTo(copyStatus);
246 bld.build(status);
247 } else if (strcmp("B", method) == 0) {
248 bld.setLanguageTag(testCase[i++]);
249 bld.copyErrorTo(copyStatus);
250 bld.build(status);
251 }
252 // clear / remove
253 else if (strcmp("C", method) == 0) {
254 bld.clear();
255 bld.copyErrorTo(copyStatus);
256 bld.build(status);
257 } else if (strcmp("N", method) == 0) {
258 bld.clearExtensions();
259 bld.copyErrorTo(copyStatus);
260 bld.build(status);
261 } else if (strcmp("D", method) == 0) {
262 bld.removeUnicodeLocaleAttribute(testCase[i++]);
263 bld.copyErrorTo(copyStatus);
264 bld.build(status);
265 }
266 // result
267 else if (strcmp("X", method) == 0) {
268 if (U_SUCCESS(status)) {
269 errln("FAIL: No error return - test case: %s", actions.c_str());
270 }
271 } else if (strcmp("T", method) == 0) {
272 status = U_ZERO_ERROR;
273 Locale loc = bld.build(status);
274 if (status != copyStatus) {
275 errln("copyErrorTo not matching");
276 }
277 if (U_FAILURE(status) ||
278 strcmp(loc.getName(), testCase[i + 1]) != 0) {
279 errln("FAIL: Wrong locale ID - %s %s %s", loc.getName(),
280 " for test case: ", actions.c_str());
281 }
282 std::string langtag = loc.toLanguageTag<std::string>(status);
283 if (U_FAILURE(status) || langtag != testCase[i]) {
284 errln("FAIL: Wrong language tag - %s %s %s", langtag.c_str(),
285 " for test case: ", actions.c_str());
286 }
287 break;
288 } else {
289 // Unknow test method
290 errln("Unknown test case method: There is an error in the test case data.");
291 break;
292 }
293 if (status != copyStatus) {
294 errln("copyErrorTo not matching");
295 }
296 if (U_FAILURE(status)) {
297 if (strcmp("X", testCase[i]) == 0) {
298 // This failure is expected
299 break;
300 } else {
301 errln("FAIL: U_ILLEGAL_ARGUMENT_ERROR at offset %d %s %s", i,
302 " in test case: ", actions.c_str());
303 break;
304 }
305 }
306 if (strcmp("T", method) == 0) {
307 break;
308 }
309 } // while(true)
310 } // for TESTCASES
311 }
312
TestLocaleBuilderBasic()313 void LocaleBuilderTest::TestLocaleBuilderBasic() {
314 LocaleBuilder bld;
315 bld.setLanguage("zh");
316 Verify(bld, "zh", "setLanguage('zh') got Error: %s\n");
317
318 bld.setScript("Hant");
319 Verify(bld, "zh-Hant", "setScript('Hant') got Error: %s\n");
320
321 bld.setRegion("SG");
322 Verify(bld, "zh-Hant-SG", "setRegion('SG') got Error: %s\n");
323
324 bld.setRegion("HK");
325 bld.setScript("Hans");
326 Verify(bld, "zh-Hans-HK",
327 "setRegion('HK') and setScript('Hans') got Error: %s\n");
328
329 bld.setVariant("revised");
330 Verify(bld, "zh-Hans-HK-revised",
331 "setVariant('revised') got Error: %s\n");
332
333 bld.setUnicodeLocaleKeyword("nu", "thai");
334 Verify(bld, "zh-Hans-HK-revised-u-nu-thai",
335 "setUnicodeLocaleKeyword('nu', 'thai'') got Error: %s\n");
336
337 bld.setUnicodeLocaleKeyword("co", "pinyin");
338 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-thai",
339 "setUnicodeLocaleKeyword('co', 'pinyin'') got Error: %s\n");
340
341 bld.setUnicodeLocaleKeyword("nu", "latn");
342 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin-nu-latn",
343 "setUnicodeLocaleKeyword('nu', 'latn'') got Error: %s\n");
344
345 bld.setUnicodeLocaleKeyword("nu", nullptr);
346 Verify(bld, "zh-Hans-HK-revised-u-co-pinyin",
347 "setUnicodeLocaleKeyword('nu', ''') got Error: %s\n");
348
349 bld.setUnicodeLocaleKeyword("co", nullptr);
350 Verify(bld, "zh-Hans-HK-revised",
351 "setUnicodeLocaleKeyword('nu', nullptr) got Error: %s\n");
352
353 bld.setScript("");
354 Verify(bld, "zh-HK-revised",
355 "setScript('') got Error: %s\n");
356
357 bld.setVariant("");
358 Verify(bld, "zh-HK",
359 "setVariant('') got Error: %s\n");
360
361 bld.setRegion("");
362 Verify(bld, "zh",
363 "setRegion('') got Error: %s\n");
364 }
365
TestSetLanguageWellFormed()366 void LocaleBuilderTest::TestSetLanguageWellFormed() {
367 // http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag
368 // unicode_language_subtag = alpha{2,3} | alpha{5,8};
369 // ICUTC decided also support alpha{4}
370 static const char* wellFormedLanguages[] = {
371 "",
372
373 // alpha{2}
374 "en",
375 "NE",
376 "eN",
377 "Ne",
378
379 // alpha{3}
380 "aNe",
381 "zzz",
382 "AAA",
383
384 // alpha{4}
385 "ABCD",
386 "abcd",
387
388 // alpha{5}
389 "efgij",
390 "AbCAD",
391 "ZAASD",
392
393 // alpha{6}
394 "efgijk",
395 "AADGFE",
396 "AkDfFz",
397
398 // alpha{7}
399 "asdfads",
400 "ADSFADF",
401 "piSFkDk",
402
403 // alpha{8}
404 "oieradfz",
405 "IADSFJKR",
406 "kkDSFJkR",
407 };
408 for (const char* lang : wellFormedLanguages) {
409 UErrorCode status = U_ZERO_ERROR;
410 LocaleBuilder bld;
411 bld.setLanguage(lang);
412 Locale loc = bld.build(status);
413 if (U_FAILURE(status)) {
414 errln("setLanguage(\"%s\") got Error: %s\n",
415 lang, u_errorName(status));
416 }
417 }
418 }
419
TestSetLanguageIllFormed()420 void LocaleBuilderTest::TestSetLanguageIllFormed() {
421 static const char* illFormed[] = {
422 "a",
423 "z",
424 "A",
425 "F",
426 "2",
427 "0",
428 "9"
429 "{",
430 ".",
431 "[",
432 "]",
433 "\\",
434
435 "e1",
436 "N2",
437 "3N",
438 "4e",
439 "e:",
440 "43",
441 "a9",
442
443 "aN0",
444 "z1z",
445 "2zz",
446 "3A3",
447 "456",
448 "af)",
449
450 // Per 2019-01-23 ICUTC, we still accept 4alpha as tlang. see ICU-20321.
451 // "latn",
452 // "Arab",
453 // "LATN",
454
455 "e)gij",
456 "Ab3AD",
457 "ZAAS8",
458
459 "efgi[]",
460 "AA9GFE",
461 "7kD3Fz",
462 "as8fads",
463 "0DSFADF",
464 "'iSFkDk",
465
466 "oieradf+",
467 "IADSFJK-",
468 "kkDSFJk0",
469
470 // alpha{9}
471 "oieradfab",
472 "IADSFJKDE",
473 "kkDSFJkzf",
474 };
475 for (const char* ill : illFormed) {
476 UErrorCode status = U_ZERO_ERROR;
477 LocaleBuilder bld;
478 bld.setLanguage(ill);
479 Locale loc = bld.build(status);
480 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
481 errln("setLanguage(\"%s\") should fail but has no Error\n", ill);
482 }
483 }
484 }
485
TestSetScriptWellFormed()486 void LocaleBuilderTest::TestSetScriptWellFormed() {
487 // http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag
488 // unicode_script_subtag = alpha{4} ;
489 static const char* wellFormedScripts[] = {
490 "",
491
492 "Latn",
493 "latn",
494 "lATN",
495 "laTN",
496 "arBN",
497 "ARbn",
498 "adsf",
499 "aADF",
500 "BSVS",
501 "LATn",
502 };
503 for (const char* script : wellFormedScripts) {
504 UErrorCode status = U_ZERO_ERROR;
505 LocaleBuilder bld;
506 bld.setScript(script);
507 Locale loc = bld.build(status);
508 if (U_FAILURE(status)) {
509 errln("setScript(\"%s\") got Error: %s\n",
510 script, u_errorName(status));
511 }
512 }
513 }
514
TestSetScriptIllFormed()515 void LocaleBuilderTest::TestSetScriptIllFormed() {
516 static const char* illFormed[] = {
517 "a",
518 "z",
519 "A",
520 "F",
521 "2",
522 "0",
523 "9"
524 "{",
525 ".",
526 "[",
527 "]",
528 "\\",
529
530 "e1",
531 "N2",
532 "3N",
533 "4e",
534 "e:",
535 "43",
536 "a9",
537
538 "aN0",
539 "z1z",
540 "2zz",
541 "3A3",
542 "456",
543 "af)",
544
545 "0atn",
546 "l1tn",
547 "lA2N",
548 "la4N",
549 "arB5",
550 "1234",
551
552 "e)gij",
553 "Ab3AD",
554 "ZAAS8",
555
556 "efgi[]",
557 "AA9GFE",
558 "7kD3Fz",
559
560 "as8fads",
561 "0DSFADF",
562 "'iSFkDk",
563
564 "oieradf+",
565 "IADSFJK-",
566 "kkDSFJk0",
567
568 // alpha{9}
569 "oieradfab",
570 "IADSFJKDE",
571 "kkDSFJkzf",
572 };
573 for (const char* ill : illFormed) {
574 UErrorCode status = U_ZERO_ERROR;
575 LocaleBuilder bld;
576 bld.setScript(ill);
577 Locale loc = bld.build(status);
578 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
579 errln("setScript(\"%s\") should fail but has no Error\n", ill);
580 }
581 }
582 }
583
TestSetRegionWellFormed()584 void LocaleBuilderTest::TestSetRegionWellFormed() {
585 // http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag
586 // unicode_region_subtag = (alpha{2} | digit{3})
587 static const char* wellFormedRegions[] = {
588 "",
589
590 // alpha{2}
591 "en",
592 "NE",
593 "eN",
594 "Ne",
595
596 // digit{3}
597 "000",
598 "999",
599 "123",
600 "987"
601 };
602 for (const char* region : wellFormedRegions) {
603 UErrorCode status = U_ZERO_ERROR;
604 LocaleBuilder bld;
605 bld.setRegion(region);
606 Locale loc = bld.build(status);
607 if (U_FAILURE(status)) {
608 errln("setRegion(\"%s\") got Error: %s\n",
609 region, u_errorName(status));
610 }
611 }
612 }
613
TestSetRegionIllFormed()614 void LocaleBuilderTest::TestSetRegionIllFormed() {
615 static const char* illFormed[] = {
616 "a",
617 "z",
618 "A",
619 "F",
620 "2",
621 "0",
622 "9"
623 "{",
624 ".",
625 "[",
626 "]",
627 "\\",
628
629 "e1",
630 "N2",
631 "3N",
632 "4e",
633 "e:",
634 "43",
635 "a9",
636
637 "aN0",
638 "z1z",
639 "2zz",
640 "3A3",
641 "4.6",
642 "af)",
643
644 "0atn",
645 "l1tn",
646 "lA2N",
647 "la4N",
648 "arB5",
649 "1234",
650
651 "e)gij",
652 "Ab3AD",
653 "ZAAS8",
654
655 "efgi[]",
656 "AA9GFE",
657 "7kD3Fz",
658
659 "as8fads",
660 "0DSFADF",
661 "'iSFkDk",
662
663 "oieradf+",
664 "IADSFJK-",
665 "kkDSFJk0",
666
667 // alpha{9}
668 "oieradfab",
669 "IADSFJKDE",
670 "kkDSFJkzf",
671 };
672 for (const char* ill : illFormed) {
673 UErrorCode status = U_ZERO_ERROR;
674 LocaleBuilder bld;
675 bld.setRegion(ill);
676 Locale loc = bld.build(status);
677 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
678 errln("setRegion(\"%s\") should fail but has no Error\n", ill);
679 }
680 }
681 }
682
TestSetVariantWellFormed()683 void LocaleBuilderTest::TestSetVariantWellFormed() {
684 // http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag
685 // (sep unicode_variant_subtag)*
686 // unicode_variant_subtag = (alphanum{5,8} | digit alphanum{3}) ;
687 static const char* wellFormedVariants[] = {
688 "",
689
690 // alphanum{5}
691 "efgij",
692 "AbCAD",
693 "ZAASD",
694 "0AASD",
695 "A1CAD",
696 "ef2ij",
697 "ads3X",
698 "owqF4",
699
700 // alphanum{6}
701 "efgijk",
702 "AADGFE",
703 "AkDfFz",
704 "0ADGFE",
705 "A9DfFz",
706 "AADG7E",
707
708 // alphanum{7}
709 "asdfads",
710 "ADSFADF",
711 "piSFkDk",
712 "a0dfads",
713 "ADSF3DF",
714 "piSFkD9",
715
716 // alphanum{8}
717 "oieradfz",
718 "IADSFJKR",
719 "kkDSFJkR",
720 "0ADSFJKR",
721 "12345679",
722
723 // digit alphanum{3}
724 "0123",
725 "1abc",
726 "20EF",
727 "30EF",
728 "8A03",
729 "3Ax3",
730 "9Axy",
731
732 // (sep unicode_variant_subtag)*
733 "0123-4567",
734 "0ab3-ABCDE",
735 "9ax3-xByD9",
736 "9ax3-xByD9-adfk934a",
737
738 "0123_4567",
739 "0ab3_ABCDE",
740 "9ax3_xByD9",
741 "9ax3_xByD9_adfk934a",
742
743 "9ax3-xByD9_adfk934a",
744 "9ax3_xByD9-adfk934a",
745 };
746 for (const char* variant : wellFormedVariants) {
747 UErrorCode status = U_ZERO_ERROR;
748 LocaleBuilder bld;
749 bld.setVariant(variant);
750 Locale loc = bld.build(status);
751 if (U_FAILURE(status)) {
752 errln("setVariant(\"%s\") got Error: %s\n",
753 variant, u_errorName(status));
754 }
755 }
756 }
757
TestSetVariantIllFormed()758 void LocaleBuilderTest::TestSetVariantIllFormed() {
759 static const char* illFormed[] = {
760 "a",
761 "z",
762 "A",
763 "F",
764 "2",
765 "0",
766 "9"
767 "{",
768 ".",
769 "[",
770 "]",
771 "\\",
772
773 "e1",
774 "N2",
775 "3N",
776 "4e",
777 "e:",
778 "43",
779 "a9",
780 "en",
781 "NE",
782 "eN",
783 "Ne",
784
785 "aNe",
786 "zzz",
787 "AAA",
788 "aN0",
789 "z1z",
790 "2zz",
791 "3A3",
792 "4.6",
793 "af)",
794 "345",
795 "923",
796
797 "Latn",
798 "latn",
799 "lATN",
800 "laTN",
801 "arBN",
802 "ARbn",
803 "adsf",
804 "aADF",
805 "BSVS",
806 "LATn",
807 "l1tn",
808 "lA2N",
809 "la4N",
810 "arB5",
811 "abc3",
812 "A3BC",
813
814 "e)gij",
815 "A+3AD",
816 "ZAA=8",
817
818 "efgi[]",
819 "AA9]FE",
820 "7k[3Fz",
821
822 "as8f/ds",
823 "0DSFAD{",
824 "'iSFkDk",
825
826 "oieradf+",
827 "IADSFJK-",
828 "k}DSFJk0",
829
830 // alpha{9}
831 "oieradfab",
832 "IADSFJKDE",
833 "kkDSFJkzf",
834 "123456789",
835
836 "-0123",
837 "-0123-4567",
838 "0123-4567-",
839 "-123-4567",
840 "_0123",
841 "_0123_4567",
842 "0123_4567_",
843 "_123_4567",
844
845 "-abcde-figjk",
846 "abcde-figjk-",
847 "-abcde-figjk-",
848 "_abcde_figjk",
849 "abcde_figjk_",
850 "_abcde_figjk_",
851 };
852 for (const char* ill : illFormed) {
853 UErrorCode status = U_ZERO_ERROR;
854 LocaleBuilder bld;
855 bld.setVariant(ill);
856 Locale loc = bld.build(status);
857 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
858 errln("setVariant(\"%s\") should fail but has no Error\n", ill);
859 }
860 }
861 }
862
TestSetUnicodeLocaleKeywordWellFormed()863 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordWellFormed() {
864 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
865 // keyword = key (sep type)? ;
866 // key = alphanum alpha ;
867 // type = alphanum{3,8} (sep alphanum{3,8})* ;
868 static const char* wellFormed_key_value[] = {
869 "aa", "123",
870 "3b", "zyzbcdef",
871 "0Z", "1ZB30zk9-abc",
872 "cZ", "2ck30zfZ-adsf023-234kcZ",
873 "ZZ", "Lant",
874 "ko", "",
875 };
876 for (int i = 0; i < UPRV_LENGTHOF(wellFormed_key_value); i += 2) {
877 UErrorCode status = U_ZERO_ERROR;
878 LocaleBuilder bld;
879 bld.setUnicodeLocaleKeyword(wellFormed_key_value[i],
880 wellFormed_key_value[i + 1]);
881 Locale loc = bld.build(status);
882 if (U_FAILURE(status)) {
883 errln("setUnicodeLocaleKeyword(\"%s\", \"%s\") got Error: %s\n",
884 wellFormed_key_value[i],
885 wellFormed_key_value[i + 1],
886 u_errorName(status));
887 }
888 }
889 }
890
TestSetUnicodeLocaleKeywordIllFormedKey()891 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedKey() {
892 static const char* illFormed[] = {
893 "34",
894 "ab-cde",
895 "123",
896 "b3",
897 "zyzabcdef",
898 "Z0",
899 };
900 for (const char* ill : illFormed) {
901 UErrorCode status = U_ZERO_ERROR;
902 LocaleBuilder bld;
903 bld.setUnicodeLocaleKeyword(ill, "abc");
904 Locale loc = bld.build(status);
905 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
906 errln("setUnicodeLocaleKeyword(\"%s\", \"abc\") should fail but has no Error\n",
907 ill);
908 }
909 }
910 }
911
TestSetUnicodeLocaleKeywordIllFormedValue()912 void LocaleBuilderTest::TestSetUnicodeLocaleKeywordIllFormedValue() {
913 static const char* illFormed[] = {
914 "34",
915 "ab-",
916 "-cd",
917 "-ef-",
918 "zyzabcdef",
919 "ab-abc",
920 "1ZB30zfk9-abc",
921 "2ck30zfk9-adsf023-234kcZ",
922 };
923 for (const char* ill : illFormed) {
924 UErrorCode status = U_ZERO_ERROR;
925 LocaleBuilder bld;
926 bld.setUnicodeLocaleKeyword("ab", ill);
927 Locale loc = bld.build(status);
928 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
929 errln("setUnicodeLocaleKeyword(\"ab\", \"%s\") should fail but has no Error\n",
930 ill);
931 }
932 }
933 }
934
TestAddRemoveUnicodeLocaleAttribute()935 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttribute() {
936 LocaleBuilder bld;
937 UErrorCode status = U_ZERO_ERROR;
938 Locale loc = bld.setLanguage("fr")
939 .addUnicodeLocaleAttribute("abc")
940 .addUnicodeLocaleAttribute("aBc")
941 .addUnicodeLocaleAttribute("EFG")
942 .addUnicodeLocaleAttribute("efghi")
943 .addUnicodeLocaleAttribute("efgh")
944 .addUnicodeLocaleAttribute("efGhi")
945 .addUnicodeLocaleAttribute("EFg")
946 .addUnicodeLocaleAttribute("hijk")
947 .addUnicodeLocaleAttribute("EFG")
948 .addUnicodeLocaleAttribute("HiJK")
949 .addUnicodeLocaleAttribute("aBc")
950 .build(status);
951 if (U_FAILURE(status)) {
952 errln("addUnicodeLocaleAttribute() got Error: %s\n",
953 u_errorName(status));
954 }
955 std::string expected("fr-u-abc-efg-efgh-efghi-hijk");
956 std::string actual = loc.toLanguageTag<std::string>(status);
957 if (U_FAILURE(status) || expected != actual) {
958 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
959 }
960
961 // remove "efgh" in the middle with different casing.
962 loc = bld.removeUnicodeLocaleAttribute("eFgH").build(status);
963 if (U_FAILURE(status)) {
964 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
965 u_errorName(status));
966 }
967 expected = "fr-u-abc-efg-efghi-hijk";
968 actual = loc.toLanguageTag<std::string>(status);
969 if (U_FAILURE(status) || expected != actual) {
970 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
971 }
972
973 // remove non-existing attributes.
974 loc = bld.removeUnicodeLocaleAttribute("efgh").build(status);
975 if (U_FAILURE(status)) {
976 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
977 u_errorName(status));
978 }
979 actual = loc.toLanguageTag<std::string>(status);
980 if (U_FAILURE(status) || expected != actual) {
981 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
982 }
983
984 // remove "abc" in the beginning with different casing.
985 loc = bld.removeUnicodeLocaleAttribute("ABC").build(status);
986 if (U_FAILURE(status)) {
987 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
988 u_errorName(status));
989 }
990 expected = "fr-u-efg-efghi-hijk";
991 actual = loc.toLanguageTag<std::string>(status);
992 if (U_FAILURE(status) || expected != actual) {
993 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
994 }
995
996 // remove non-existing substring in the end.
997 loc = bld.removeUnicodeLocaleAttribute("hij").build(status);
998 if (U_FAILURE(status)) {
999 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1000 u_errorName(status));
1001 }
1002 actual = loc.toLanguageTag<std::string>(status);
1003 if (U_FAILURE(status) || expected != actual) {
1004 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1005 }
1006
1007 // remove "hijk" in the end with different casing.
1008 loc = bld.removeUnicodeLocaleAttribute("hIJK").build(status);
1009 if (U_FAILURE(status)) {
1010 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1011 u_errorName(status));
1012 }
1013 expected = "fr-u-efg-efghi";
1014 actual = loc.toLanguageTag<std::string>(status);
1015 if (U_FAILURE(status) || expected != actual) {
1016 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1017 }
1018
1019 // remove "efghi" in the end with different casing.
1020 loc = bld.removeUnicodeLocaleAttribute("EFGhi").build(status);
1021 if (U_FAILURE(status)) {
1022 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1023 u_errorName(status));
1024 }
1025 expected = "fr-u-efg";
1026 actual = loc.toLanguageTag<std::string>(status);
1027 if (U_FAILURE(status) || expected != actual) {
1028 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1029 }
1030
1031 // remove "efg" in as the only one, with different casing.
1032 loc = bld.removeUnicodeLocaleAttribute("EFG").build(status);
1033 if (U_FAILURE(status)) {
1034 errln("removeUnicodeLocaleAttribute() got Error: %s\n",
1035 u_errorName(status));
1036 }
1037 expected = "fr";
1038 actual = loc.toLanguageTag<std::string>(status);
1039 if (U_FAILURE(status) || expected != actual) {
1040 errln("Should get \"%s\" but get \"%s\"\n", expected.c_str(), actual.c_str());
1041 }
1042
1043 }
1044
TestAddRemoveUnicodeLocaleAttributeWellFormed()1045 void LocaleBuilderTest::TestAddRemoveUnicodeLocaleAttributeWellFormed() {
1046 // http://www.unicode.org/reports/tr35/tr35.html#unicode_locale_extensions
1047 // attribute = alphanum{3,8} ;
1048 static const char* wellFormedAttributes[] = {
1049 // alphanum{3}
1050 "AbC",
1051 "ZAA",
1052 "0AA",
1053 "x3A",
1054 "xa8",
1055
1056 // alphanum{4}
1057 "AbCA",
1058 "ZASD",
1059 "0ASD",
1060 "A3a4",
1061 "zK90",
1062
1063 // alphanum{5}
1064 "efgij",
1065 "AbCAD",
1066 "ZAASD",
1067 "0AASD",
1068 "A1CAD",
1069 "ef2ij",
1070 "ads3X",
1071 "owqF4",
1072
1073 // alphanum{6}
1074 "efgijk",
1075 "AADGFE",
1076 "AkDfFz",
1077 "0ADGFE",
1078 "A9DfFz",
1079 "AADG7E",
1080
1081 // alphanum{7}
1082 "asdfads",
1083 "ADSFADF",
1084 "piSFkDk",
1085 "a0dfads",
1086 "ADSF3DF",
1087 "piSFkD9",
1088
1089 // alphanum{8}
1090 "oieradfz",
1091 "IADSFJKR",
1092 "kkDSFJkR",
1093 };
1094 LocaleBuilder bld;
1095 for (int i = 0; i < UPRV_LENGTHOF(wellFormedAttributes); i++) {
1096 if (i % 5 == 0) {
1097 bld.clear();
1098 }
1099 UErrorCode status = U_ZERO_ERROR;
1100 bld.addUnicodeLocaleAttribute(wellFormedAttributes[i]);
1101 Locale loc = bld.build(status);
1102 if (U_FAILURE(status)) {
1103 errln("addUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1104 wellFormedAttributes[i], u_errorName(status));
1105 }
1106 if (i > 2) {
1107 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 1]);
1108 loc = bld.build(status);
1109 if (U_FAILURE(status)) {
1110 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1111 wellFormedAttributes[i - 1], u_errorName(status));
1112 }
1113 bld.removeUnicodeLocaleAttribute(wellFormedAttributes[i - 3]);
1114 loc = bld.build(status);
1115 if (U_FAILURE(status)) {
1116 errln("removeUnicodeLocaleAttribute(\"%s\") got Error: %s\n",
1117 wellFormedAttributes[i - 3], u_errorName(status));
1118 }
1119 }
1120 }
1121 }
1122
TestAddUnicodeLocaleAttributeIllFormed()1123 void LocaleBuilderTest::TestAddUnicodeLocaleAttributeIllFormed() {
1124 static const char* illFormed[] = {
1125 "aa",
1126 "34",
1127 "ab-",
1128 "-cd",
1129 "-ef-",
1130 "zyzabcdef",
1131 "123456789",
1132 "ab-abc",
1133 "1ZB30zfk9-abc",
1134 "2ck30zfk9-adsf023-234kcZ",
1135 };
1136 for (const char* ill : illFormed) {
1137 UErrorCode status = U_ZERO_ERROR;
1138 LocaleBuilder bld;
1139 bld.addUnicodeLocaleAttribute(ill);
1140 Locale loc = bld.build(status);
1141 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1142 errln("addUnicodeLocaleAttribute(\"%s\") should fail but has no Error\n",
1143 ill);
1144 }
1145 }
1146 }
1147
TestSetExtensionU()1148 void LocaleBuilderTest::TestSetExtensionU() {
1149 LocaleBuilder bld;
1150 bld.setLanguage("zh");
1151 Verify(bld, "zh",
1152 "setLanguage(\"zh\") got Error: %s\n");
1153
1154 bld.setExtension('u', "co-stroke");
1155 Verify(bld, "zh-u-co-stroke",
1156 "setExtension('u', \"co-stroke\") got Error: %s\n");
1157
1158 bld.setExtension('U', "ca-islamic");
1159 Verify(bld, "zh-u-ca-islamic",
1160 "setExtension('U', \"zh-u-ca-islamic\") got Error: %s\n");
1161
1162 bld.setExtension('u', "ca-chinese");
1163 Verify(bld, "zh-u-ca-chinese",
1164 "setExtension('u', \"ca-chinese\") got Error: %s\n");
1165
1166 bld.setExtension('U', "co-pinyin");
1167 Verify(bld, "zh-u-co-pinyin",
1168 "setExtension('U', \"co-pinyin\") got Error: %s\n");
1169
1170 bld.setRegion("TW");
1171 Verify(bld, "zh-TW-u-co-pinyin",
1172 "setRegion(\"TW\") got Error: %s\n");
1173
1174 bld.setExtension('U', "");
1175 Verify(bld, "zh-TW",
1176 "setExtension('U', \"\") got Error: %s\n");
1177
1178 bld.setExtension('u', "abc-defg-kr-face");
1179 Verify(bld, "zh-TW-u-abc-defg-kr-face",
1180 "setExtension('u', \"abc-defg-kr-face\") got Error: %s\n");
1181
1182 bld.setExtension('U', "ca-japanese");
1183 Verify(bld, "zh-TW-u-ca-japanese",
1184 "setExtension('U', \"ca-japanese\") got Error: %s\n");
1185
1186 }
1187
TestSetExtensionValidateUWellFormed()1188 void LocaleBuilderTest::TestSetExtensionValidateUWellFormed() {
1189 static const char* wellFormedExtensions[] = {
1190 // keyword
1191 // keyword = key (sep type)? ;
1192 // key = alphanum alpha ;
1193 // type = alphanum{3,8} (sep alphanum{3,8})* ;
1194 "3A",
1195 "ZA",
1196 "az-abc",
1197 "zz-123",
1198 "7z-12345678",
1199 "kb-A234567Z",
1200 // (sep keyword)+
1201 "1z-ZZ",
1202 "2z-ZZ-123",
1203 "3z-ZZ-123-cd",
1204 "0z-ZZ-123-cd-efghijkl",
1205 // attribute
1206 "abc",
1207 "456",
1208 "87654321",
1209 "ZABADFSD",
1210 // (sep attribute)+
1211 "abc-ZABADFSD",
1212 "123-ZABADFSD",
1213 "K2K-12345678",
1214 "K2K-12345678-zzz",
1215 // (sep attribute)+ (sep keyword)*
1216 "K2K-12345678-zz",
1217 "K2K-12345678-zz-0z",
1218 "K2K-12345678-9z-AZ-abc",
1219 "K2K-12345678-zz-9A-234",
1220 "K2K-12345678-zk0-abc-efg-zz-9k-234",
1221 };
1222 for (const char* extension : wellFormedExtensions) {
1223 UErrorCode status = U_ZERO_ERROR;
1224 LocaleBuilder bld;
1225 bld.setExtension('u', extension);
1226 Locale loc = bld.build(status);
1227 if (U_FAILURE(status)) {
1228 errln("setExtension('u', \"%s\") got Error: %s\n",
1229 extension, u_errorName(status));
1230 }
1231 }
1232 }
1233
TestSetExtensionValidateUIllFormed()1234 void LocaleBuilderTest::TestSetExtensionValidateUIllFormed() {
1235 static const char* illFormed[] = {
1236 // bad key
1237 "-",
1238 "-ab",
1239 "ab-",
1240 "abc-",
1241 "-abc",
1242 "0",
1243 "a",
1244 "A0",
1245 "z9",
1246 "09",
1247 "90",
1248 // bad keyword
1249 "AB-A0",
1250 "AB-efg-A0",
1251 "xy-123456789",
1252 "AB-Aa-",
1253 "AB-Aac-",
1254 // bad attribute
1255 "abcdefghi",
1256 "abcdefgh-",
1257 "abcdefgh-abcdefghi",
1258 "abcdefgh-1",
1259 "abcdefgh-a",
1260 "abcdefgh-a2345678z",
1261 };
1262 for (const char* ill : illFormed) {
1263 UErrorCode status = U_ZERO_ERROR;
1264 LocaleBuilder bld;
1265 bld.setExtension('u', ill);
1266 Locale loc = bld.build(status);
1267 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1268 errln("setExtension('u', \"%s\") should fail but has no Error\n",
1269 ill);
1270 }
1271 }
1272 }
1273
TestSetExtensionT()1274 void LocaleBuilderTest::TestSetExtensionT() {
1275 LocaleBuilder bld;
1276 bld.setLanguage("fr");
1277 Verify(bld, "fr",
1278 "setLanguage(\"fr\") got Error: %s\n");
1279
1280 bld.setExtension('T', "zh");
1281 Verify(bld, "fr-t-zh",
1282 "setExtension('T', \"zh\") got Error: %s\n");
1283
1284 bld.setExtension('t', "zh-Hant-TW-1234-A9-123-456ABCDE");
1285 Verify(bld, "fr-t-zh-hant-tw-1234-a9-123-456abcde",
1286 "setExtension('t', \"zh-Hant-TW-1234-A9-123-456ABCDE\") got Error: %s\n");
1287
1288 bld.setExtension('T', "a9-123");
1289 Verify(bld, "fr-t-a9-123",
1290 "setExtension('T', \"a9-123\") got Error: %s\n");
1291
1292 bld.setRegion("MX");
1293 Verify(bld, "fr-MX-t-a9-123",
1294 "setRegion(\"MX\") got Error: %s\n");
1295
1296 bld.setScript("Hans");
1297 Verify(bld, "fr-Hans-MX-t-a9-123",
1298 "setScript(\"Hans\") got Error: %s\n");
1299
1300 bld.setVariant("9abc-abcde");
1301 Verify(bld, "fr-Hans-MX-9abc-abcde-t-a9-123",
1302 "setVariant(\"9abc-abcde\") got Error: %s\n");
1303
1304 bld.setExtension('T', "");
1305 Verify(bld, "fr-Hans-MX-9abc-abcde",
1306 "bld.setExtension('T', \"\") got Error: %s\n");
1307 }
1308
TestSetExtensionValidateTWellFormed()1309 void LocaleBuilderTest::TestSetExtensionValidateTWellFormed() {
1310 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1311 static const char* wellFormedExtensions[] = {
1312 // tlang
1313 // tlang = unicode_language_subtag (sep unicode_script_subtag)?
1314 // (sep unicode_region_subtag)? (sep unicode_variant_subtag)* ;
1315 // unicode_language_subtag
1316 "en",
1317 "abc",
1318 "abcde",
1319 "ABCDEFGH",
1320 // unicode_language_subtag sep unicode_script_subtag
1321 "en-latn",
1322 "abc-arab",
1323 "ABCDEFGH-Thai",
1324 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1325 "en-latn-ME",
1326 "abc-arab-RU",
1327 "ABCDEFGH-Thai-TH",
1328 "en-latn-409",
1329 "abc-arab-123",
1330 "ABCDEFGH-Thai-456",
1331 // unicode_language_subtag sep unicode_region_subtag
1332 "en-ME",
1333 "abc-RU",
1334 "ABCDEFGH-TH",
1335 "en-409",
1336 "abc-123",
1337 "ABCDEFGH-456",
1338 // unicode_language_subtag sep unicode_script_subtag sep unicode_region_subtag
1339 // sep (sep unicode_variant_subtag)*
1340 "en-latn-ME-abcde",
1341 "abc-arab-RU-3abc-abcdef",
1342 "ABCDEFGH-Thai-TH-ADSFS-9xyz-abcdef",
1343 "en-latn-409-xafsa",
1344 "abc-arab-123-ADASDF",
1345 "ABCDEFGH-Thai-456-9sdf-ADASFAS",
1346 // (sep tfield)+
1347 "A0-abcde",
1348 "z9-abcde123",
1349 "z9-abcde123-a1-abcde",
1350 // tlang (sep tfield)*
1351 "fr-A0-abcde",
1352 "fr-FR-A0-abcde",
1353 "fr-123-z9-abcde123-a1-abcde",
1354 "fr-Latn-FR-z9-abcde123-a1-abcde",
1355 "gab-Thai-TH-abcde-z9-abcde123-a1-abcde",
1356 "gab-Thai-TH-0bde-z9-abcde123-a1-abcde",
1357 };
1358 for (const char* extension : wellFormedExtensions) {
1359 UErrorCode status = U_ZERO_ERROR;
1360 LocaleBuilder bld;
1361 bld.setExtension('t', extension);
1362 Locale loc = bld.build(status);
1363 if (U_FAILURE(status)) {
1364 errln("setExtension('t', \"%s\") got Error: %s\n",
1365 extension, u_errorName(status));
1366 }
1367 }
1368 }
1369
TestSetExtensionValidateTIllFormed()1370 void LocaleBuilderTest::TestSetExtensionValidateTIllFormed() {
1371 static const char* illFormed[] = {
1372 "a",
1373 "a-",
1374 "0",
1375 "9-",
1376 "-9",
1377 "-z",
1378 "Latn",
1379 "Latn-",
1380 "en-",
1381 "nob-",
1382 "-z9",
1383 "a3",
1384 "a3-",
1385 "3a",
1386 "0z-",
1387 "en-123-a1",
1388 "en-TH-a1",
1389 "gab-TH-a1",
1390 "gab-Thai-a1",
1391 "gab-Thai-TH-a1",
1392 "gab-Thai-TH-0bde-a1",
1393 "gab-Thai-TH-0bde-3b",
1394 "gab-Thai-TH-0bde-z9-a1",
1395 "gab-Thai-TH-0bde-z9-3b",
1396 "gab-Thai-TH-0bde-z9-abcde123-3b",
1397 "gab-Thai-TH-0bde-z9-abcde123-ab",
1398 "gab-Thai-TH-0bde-z9-abcde123-ab",
1399 "gab-Thai-TH-0bde-z9-abcde123-a1",
1400 "gab-Thai-TH-0bde-z9-abcde123-a1-",
1401 "gab-Thai-TH-0bde-z9-abcde123-a1-a",
1402 "gab-Thai-TH-0bde-z9-abcde123-a1-ab",
1403 // ICU-21408
1404 "root",
1405 };
1406 for (const char* ill : illFormed) {
1407 UErrorCode status = U_ZERO_ERROR;
1408 LocaleBuilder bld;
1409 bld.setExtension('t', ill);
1410 Locale loc = bld.build(status);
1411 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1412 errln("setExtension('t', \"%s\") should fail but has no Error\n",
1413 ill);
1414 }
1415 }
1416 }
1417
TestSetExtensionPU()1418 void LocaleBuilderTest::TestSetExtensionPU() {
1419 LocaleBuilder bld;
1420 bld.setLanguage("ar");
1421 Verify(bld, "ar",
1422 "setLanguage(\"ar\") got Error: %s\n");
1423
1424 bld.setExtension('X', "a-b-c-d-e");
1425 Verify(bld, "ar-x-a-b-c-d-e",
1426 "setExtension('X', \"a-b-c-d-e\") got Error: %s\n");
1427
1428 bld.setExtension('x', "0-1-2-3");
1429 Verify(bld, "ar-x-0-1-2-3",
1430 "setExtension('x', \"0-1-2-3\") got Error: %s\n");
1431
1432 bld.setExtension('X', "0-12345678-x-x");
1433 Verify(bld, "ar-x-0-12345678-x-x",
1434 "setExtension('x', \"ar-x-0-12345678-x-x\") got Error: %s\n");
1435
1436 bld.setRegion("TH");
1437 Verify(bld, "ar-TH-x-0-12345678-x-x",
1438 "setRegion(\"TH\") got Error: %s\n");
1439
1440 bld.setExtension('X', "");
1441 Verify(bld, "ar-TH",
1442 "setExtension(\"X\") got Error: %s\n");
1443 }
1444
TestSetExtensionValidatePUWellFormed()1445 void LocaleBuilderTest::TestSetExtensionValidatePUWellFormed() {
1446 // ((sep tlang (sep tfield)*) | (sep tfield)+)
1447 static const char* wellFormedExtensions[] = {
1448 "a", // Short subtag
1449 "z", // Short subtag
1450 "0", // Short subtag, digit
1451 "9", // Short subtag, digit
1452 "a-0", // Two short subtag, alpha and digit
1453 "9-z", // Two short subtag, digit and alpha
1454 "ab",
1455 "abc",
1456 "abcefghi", // Long subtag
1457 "87654321",
1458 "01",
1459 "234",
1460 "0a-ab-87654321", // Three subtags
1461 "87654321-ab-00-3A", // Four subtabs
1462 "a-9-87654321", // Three subtags with short and long subtags
1463 "87654321-ab-0-3A",
1464 };
1465 for (const char* extension : wellFormedExtensions) {
1466 UErrorCode status = U_ZERO_ERROR;
1467 LocaleBuilder bld;
1468 bld.setExtension('x', extension);
1469 Locale loc = bld.build(status);
1470 if (U_FAILURE(status)) {
1471 errln("setExtension('x', \"%s\") got Error: %s\n",
1472 extension, u_errorName(status));
1473 }
1474 }
1475 }
1476
TestSetExtensionValidatePUIllFormed()1477 void LocaleBuilderTest::TestSetExtensionValidatePUIllFormed() {
1478 static const char* illFormed[] = {
1479 "123456789", // Too long
1480 "abcdefghi", // Too long
1481 "ab-123456789", // Second subtag too long
1482 "abcdefghi-12", // First subtag too long
1483 "a-ab-987654321", // Third subtag too long
1484 "987654321-a-0-3", // First subtag too long
1485 };
1486 for (const char* ill : illFormed) {
1487 UErrorCode status = U_ZERO_ERROR;
1488 LocaleBuilder bld;
1489 bld.setExtension('x', ill);
1490 Locale loc = bld.build(status);
1491 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1492 errln("setExtension('x', \"%s\") should fail but has no Error\n",
1493 ill);
1494 }
1495 }
1496 }
1497
TestSetExtensionOthers()1498 void LocaleBuilderTest::TestSetExtensionOthers() {
1499 LocaleBuilder bld;
1500 bld.setLanguage("fr");
1501 Verify(bld, "fr",
1502 "setLanguage(\"fr\") got Error: %s\n");
1503
1504 bld.setExtension('Z', "ab");
1505 Verify(bld, "fr-z-ab",
1506 "setExtension('Z', \"ab\") got Error: %s\n");
1507
1508 bld.setExtension('0', "xyz12345-abcdefg");
1509 Verify(bld, "fr-0-xyz12345-abcdefg-z-ab",
1510 "setExtension('0', \"xyz12345-abcdefg\") got Error: %s\n");
1511
1512 bld.setExtension('a', "01-12345678-ABcdef");
1513 Verify(bld, "fr-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1514 "setExtension('a', \"01-12345678-ABcdef\") got Error: %s\n");
1515
1516 bld.setRegion("TH");
1517 Verify(bld, "fr-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1518 "setRegion(\"TH\") got Error: %s\n");
1519
1520 bld.setScript("Arab");
1521 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-01-12345678-abcdef-z-ab",
1522 "setRegion(\"Arab\") got Error: %s\n");
1523
1524 bld.setExtension('A', "97");
1525 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-a-97-z-ab",
1526 "setExtension('a', \"97\") got Error: %s\n");
1527
1528 bld.setExtension('a', "");
1529 Verify(bld, "fr-Arab-TH-0-xyz12345-abcdefg-z-ab",
1530 "setExtension('a', \"\") got Error: %s\n");
1531
1532 bld.setExtension('0', "");
1533 Verify(bld, "fr-Arab-TH-z-ab",
1534 "setExtension('0', \"\") got Error: %s\n");
1535 }
1536
TestSetExtensionValidateOthersWellFormed()1537 void LocaleBuilderTest::TestSetExtensionValidateOthersWellFormed() {
1538 static const char* wellFormedExtensions[] = {
1539 "ab",
1540 "abc",
1541 "abcefghi",
1542 "01",
1543 "234",
1544 "87654321",
1545 "0a-ab-87654321",
1546 "87654321-ab-00-3A",
1547 };
1548
1549 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1550 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1551 int32_t i = 0;
1552 for (const char* extension : wellFormedExtensions) {
1553 char ch = aToZ[i];
1554 i = (i + 1) % aToZLen;
1555 UErrorCode status = U_ZERO_ERROR;
1556 LocaleBuilder bld;
1557 bld.setExtension(ch, extension);
1558 Locale loc = bld.build(status);
1559 if (U_FAILURE(status)) {
1560 errln("setExtension('%c', \"%s\") got Error: %s\n",
1561 ch, extension, u_errorName(status));
1562 }
1563 }
1564
1565 const char* someChars =
1566 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789`~!@#$%^&*()-_=+;:,.<>?";
1567 const int32_t someCharsLen = static_cast<int32_t>(uprv_strlen(someChars));
1568 for (int32_t i = 0; i < someCharsLen; i++) {
1569 char ch = someChars[i];
1570 UErrorCode status = U_ZERO_ERROR;
1571 LocaleBuilder bld;
1572 bld.setExtension(ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1573 Locale loc = bld.build(status);
1574 if (uprv_isASCIILetter(ch) || ('0' <= ch && ch <= '9')) {
1575 if (ch != 't' && ch != 'T' && ch != 'u' && ch != 'U' && ch != 'x' && ch != 'X') {
1576 if (U_FAILURE(status)) {
1577 errln("setExtension('%c', \"%s\") got Error: %s\n",
1578 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)], u_errorName(status));
1579 }
1580 }
1581 } else {
1582 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1583 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1584 ch, wellFormedExtensions[ch % UPRV_LENGTHOF(wellFormedExtensions)]);
1585 }
1586 }
1587
1588 }
1589 }
1590
TestSetExtensionValidateOthersIllFormed()1591 void LocaleBuilderTest::TestSetExtensionValidateOthersIllFormed() {
1592 static const char* illFormed[] = {
1593 "0", // Too short
1594 "a", // Too short
1595 "123456789", // Too long
1596 "abcdefghi", // Too long
1597 "ab-123456789", // Second subtag too long
1598 "abcdefghi-12", // First subtag too long
1599 "a-ab-87654321", // Third subtag too long
1600 "87654321-a-0-3", // First subtag too long
1601 };
1602 const char * aToZ = "abcdefghijklmnopqrstuvwxyz";
1603 const int32_t aToZLen = static_cast<int32_t>(uprv_strlen(aToZ));
1604 int32_t i = 0;
1605 for (const char* ill : illFormed) {
1606 char ch = aToZ[i];
1607 i = (i + 1) % aToZLen;
1608 UErrorCode status = U_ZERO_ERROR;
1609 LocaleBuilder bld;
1610 bld.setExtension(ch, ill);
1611 Locale loc = bld.build(status);
1612 if (status != U_ILLEGAL_ARGUMENT_ERROR) {
1613 errln("setExtension('%c', \"%s\") should fail but has no Error\n",
1614 ch, ill);
1615 }
1616 }
1617 }
1618
TestSetLocale()1619 void LocaleBuilderTest::TestSetLocale() {
1620 LocaleBuilder bld1, bld2;
1621 UErrorCode status = U_ZERO_ERROR;
1622 Locale l1 = bld1.setLanguage("en")
1623 .setScript("Latn")
1624 .setRegion("MX")
1625 .setVariant("3456-abcde")
1626 .addUnicodeLocaleAttribute("456")
1627 .addUnicodeLocaleAttribute("123")
1628 .setUnicodeLocaleKeyword("nu", "thai")
1629 .setUnicodeLocaleKeyword("co", "stroke")
1630 .setUnicodeLocaleKeyword("ca", "chinese")
1631 .build(status);
1632 if (U_FAILURE(status) || l1.isBogus()) {
1633 errln("build got Error: %s\n", u_errorName(status));
1634 }
1635 status = U_ZERO_ERROR;
1636 Locale l2 = bld1.setLocale(l1).build(status);
1637 if (U_FAILURE(status) || l2.isBogus()) {
1638 errln("build got Error: %s\n", u_errorName(status));
1639 }
1640
1641 if (l1 != l2) {
1642 errln("Two locales should be the same, but one is '%s' and the other is '%s'",
1643 l1.getName(), l2.getName());
1644 }
1645 }
1646
TestPosixCases()1647 void LocaleBuilderTest::TestPosixCases() {
1648 UErrorCode status = U_ZERO_ERROR;
1649 Locale l1 = Locale::forLanguageTag("en-US-u-va-posix", status);
1650 if (U_FAILURE(status) || l1.isBogus()) {
1651 errln("build got Error: %s\n", u_errorName(status));
1652 }
1653 LocaleBuilder bld;
1654 bld.setLanguage("en")
1655 .setRegion("MX")
1656 .setScript("Arab")
1657 .setUnicodeLocaleKeyword("nu", "Thai")
1658 .setExtension('x', "1");
1659 // All of above should be cleared by the setLocale call.
1660 Locale l2 = bld.setLocale(l1).build(status);
1661 if (U_FAILURE(status) || l2.isBogus()) {
1662 errln("build got Error: %s\n", u_errorName(status));
1663 }
1664 if (l1 != l2) {
1665 errln("The result locale should be the set as the setLocale %s but got %s\n",
1666 l1.toLanguageTag<std::string>(status).c_str(),
1667 l2.toLanguageTag<std::string>(status).c_str());
1668 }
1669 Locale posix("en-US-POSIX");
1670 if (posix != l2) {
1671 errln("The result locale should be the set as %s but got %s\n",
1672 posix.getName(), l2.getName());
1673 }
1674 }
1675